Repository: getmaxun/maxun Branch: develop Commit: c7bf0e97a94e Files: 226 Total size: 2.3 MB Directory structure: gitextract_d__9ladc/ ├── .dockerignore ├── .github/ │ ├── CODE_OF_CONDUCT.md │ ├── COMMIT_CONVENTION.md │ └── ISSUE_TEMPLATE/ │ └── bug_report.yml ├── .gitignore ├── .sequelizerc ├── CONTRIBUTING.md ├── Dockerfile.backend ├── Dockerfile.frontend ├── ENVEXAMPLE ├── LICENSE ├── README.md ├── SETUP.md ├── browser/ │ ├── .dockerignore │ ├── Dockerfile │ ├── package.json │ ├── server.ts │ └── tsconfig.json ├── docker-compose.yml ├── docker-entrypoint.sh ├── docs/ │ ├── nginx.conf │ └── self-hosting-docker.md ├── index.html ├── legacy/ │ ├── server/ │ │ └── worker.ts │ └── src/ │ ├── AddWhatCondModal.tsx │ ├── AddWhereCondModal.tsx │ ├── Canvas.tsx │ ├── DisplayWhereConditionSettings.tsx │ ├── Highlighter.tsx │ ├── LeftSidePanel.tsx │ ├── LeftSidePanelContent.tsx │ ├── LeftSidePanelSettings.tsx │ ├── Pair.tsx │ ├── PairDetail.tsx │ ├── PairDisplayDiv.tsx │ ├── PairEditForm.tsx │ ├── Renderer.tsx │ ├── RobotEdit.tsx │ ├── RobotSettings.tsx │ ├── ScheduleSettings.tsx │ ├── coordinateMapper.ts │ └── inputHelpers.ts ├── maxun-core/ │ ├── .gitignore │ ├── README.md │ ├── package.json │ ├── src/ │ │ ├── browserSide/ │ │ │ └── scraper.js │ │ ├── index.ts │ │ ├── interpret.ts │ │ ├── preprocessor.ts │ │ ├── types/ │ │ │ ├── logic.ts │ │ │ └── workflow.ts │ │ └── utils/ │ │ ├── concurrency.ts │ │ ├── logger.ts │ │ └── utils.ts │ └── tsconfig.json ├── nginx.conf ├── package.json ├── public/ │ └── locales/ │ ├── de.json │ ├── en.json │ ├── es.json │ ├── ja.json │ ├── tr.json │ └── zh.json ├── server/ │ ├── .gitignore │ ├── config/ │ │ └── config.json │ ├── docker-entrypoint.sh │ ├── src/ │ │ ├── api/ │ │ │ ├── record.ts │ │ │ └── sdk.ts │ │ ├── browser-management/ │ │ │ ├── browserConnection.ts │ │ │ ├── classes/ │ │ │ │ ├── BrowserPool.ts │ │ │ │ └── RemoteBrowser.ts │ │ │ ├── controller.ts │ │ │ └── inputHandlers.ts │ │ ├── constants/ │ │ │ └── config.ts │ │ ├── db/ │ │ │ ├── config/ │ │ │ │ └── database.js │ │ │ ├── migrate.js │ │ │ ├── migrations/ │ │ │ │ ├── 20250327111003-add-airtable-columns.js │ │ │ │ └── 20250527105655-add-webhooks.js │ │ │ └── models/ │ │ │ └── index.js │ │ ├── index.ts │ │ ├── logger.ts │ │ ├── markdownify/ │ │ │ ├── markdown.ts │ │ │ └── scrape.ts │ │ ├── mcp-worker.ts │ │ ├── middlewares/ │ │ │ ├── api.ts │ │ │ └── auth.ts │ │ ├── models/ │ │ │ ├── Robot.ts │ │ │ ├── Run.ts │ │ │ ├── User.ts │ │ │ └── associations.ts │ │ ├── pgboss-worker.ts │ │ ├── routes/ │ │ │ ├── auth.ts │ │ │ ├── index.ts │ │ │ ├── proxy.ts │ │ │ ├── record.ts │ │ │ ├── storage.ts │ │ │ ├── webhook.ts │ │ │ └── workflow.ts │ │ ├── schedule-worker.ts │ │ ├── sdk/ │ │ │ ├── browserSide/ │ │ │ │ └── pageAnalyzer.js │ │ │ ├── selectorValidator.ts │ │ │ └── workflowEnricher.ts │ │ ├── server.ts │ │ ├── socket-connection/ │ │ │ └── connection.ts │ │ ├── storage/ │ │ │ ├── db.ts │ │ │ ├── mino.ts │ │ │ ├── pgboss.ts │ │ │ └── schedule.ts │ │ ├── swagger/ │ │ │ └── config.ts │ │ ├── types/ │ │ │ └── index.ts │ │ ├── utils/ │ │ │ ├── analytics.ts │ │ │ ├── api.ts │ │ │ ├── auth.ts │ │ │ ├── env.ts │ │ │ └── schedule.ts │ │ └── workflow-management/ │ │ ├── classes/ │ │ │ ├── Generator.ts │ │ │ └── Interpreter.ts │ │ ├── integrations/ │ │ │ ├── airtable.ts │ │ │ └── gsheet.ts │ │ ├── scheduler/ │ │ │ └── index.ts │ │ ├── selector.ts │ │ ├── storage.ts │ │ └── utils.ts │ ├── start.sh │ ├── tsconfig.json │ └── tsconfig.mcp.json ├── src/ │ ├── App.tsx │ ├── api/ │ │ ├── auth.ts │ │ ├── integration.ts │ │ ├── proxy.ts │ │ ├── recording.ts │ │ ├── storage.ts │ │ ├── webhook.ts │ │ └── workflow.ts │ ├── apiConfig.js │ ├── components/ │ │ ├── action/ │ │ │ ├── ActionDescriptionBox.tsx │ │ │ ├── ActionSettings.tsx │ │ │ └── action-settings/ │ │ │ ├── Scrape.tsx │ │ │ ├── ScrapeSchema.tsx │ │ │ ├── Screenshot.tsx │ │ │ ├── Scroll.tsx │ │ │ └── index.ts │ │ ├── api/ │ │ │ └── ApiKey.tsx │ │ ├── browser/ │ │ │ ├── BrowserContent.tsx │ │ │ ├── BrowserNavBar.tsx │ │ │ ├── BrowserRecordingSave.tsx │ │ │ ├── BrowserTabs.tsx │ │ │ ├── BrowserWindow.tsx │ │ │ └── UrlForm.tsx │ │ ├── dashboard/ │ │ │ ├── MainMenu.tsx │ │ │ ├── NavBar.tsx │ │ │ └── NotFound.tsx │ │ ├── icons/ │ │ │ ├── DiscordIcon.tsx │ │ │ └── RecorderIcon.tsx │ │ ├── integration/ │ │ │ └── IntegrationSettings.tsx │ │ ├── pickers/ │ │ │ ├── DatePicker.tsx │ │ │ ├── DateTimeLocalPicker.tsx │ │ │ ├── Dropdown.tsx │ │ │ └── TimePicker.tsx │ │ ├── proxy/ │ │ │ └── ProxyForm.tsx │ │ ├── recorder/ │ │ │ ├── DOMBrowserRenderer.tsx │ │ │ ├── KeyValueForm.tsx │ │ │ ├── KeyValuePair.tsx │ │ │ ├── RightSidePanel.tsx │ │ │ ├── SaveRecording.tsx │ │ │ └── SidePanelHeader.tsx │ │ ├── robot/ │ │ │ ├── Recordings.tsx │ │ │ ├── RecordingsTable.tsx │ │ │ ├── ToggleButton.tsx │ │ │ └── pages/ │ │ │ ├── RobotConfigPage.tsx │ │ │ ├── RobotCreate.tsx │ │ │ ├── RobotDuplicatePage.tsx │ │ │ ├── RobotEditPage.tsx │ │ │ ├── RobotIntegrationPage.tsx │ │ │ ├── RobotSettingsPage.tsx │ │ │ └── ScheduleSettingsPage.tsx │ │ ├── run/ │ │ │ ├── ColapsibleRow.tsx │ │ │ ├── InterpretationButtons.tsx │ │ │ ├── InterpretationLog.tsx │ │ │ ├── RunContent.tsx │ │ │ ├── RunSettings.tsx │ │ │ ├── Runs.tsx │ │ │ └── RunsTable.tsx │ │ └── ui/ │ │ ├── AlertSnackbar.tsx │ │ ├── Box.tsx │ │ ├── ConfirmationBox.tsx │ │ ├── DropdownMui.tsx │ │ ├── Form.tsx │ │ ├── GenericModal.tsx │ │ ├── Loader.tsx │ │ ├── buttons/ │ │ │ ├── AddButton.tsx │ │ │ ├── BreakpointButton.tsx │ │ │ ├── Buttons.tsx │ │ │ ├── ClearButton.tsx │ │ │ ├── EditButton.tsx │ │ │ └── RemoveButton.tsx │ │ └── texts.tsx │ ├── constants/ │ │ └── const.ts │ ├── context/ │ │ ├── auth.tsx │ │ ├── browserActions.tsx │ │ ├── browserDimensions.tsx │ │ ├── browserSteps.tsx │ │ ├── globalInfo.tsx │ │ ├── socket.tsx │ │ └── theme-provider.tsx │ ├── helpers/ │ │ ├── capturedElementHighlighter.ts │ │ ├── clientListExtractor.ts │ │ ├── clientPaginationDetector.ts │ │ ├── clientSelectorGenerator.ts │ │ ├── dimensionUtils.ts │ │ └── uuid.ts │ ├── i18n.ts │ ├── index.css │ ├── index.tsx │ ├── pages/ │ │ ├── Login.tsx │ │ ├── MainPage.tsx │ │ ├── PageWrapper.tsx │ │ ├── RecordingPage.tsx │ │ └── Register.tsx │ ├── routes/ │ │ └── userRoute.tsx │ └── shared/ │ ├── constants.ts │ └── types.ts ├── tsconfig.json ├── typedoc.json ├── vite-env.d.ts └── vite.config.js ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dockerignore ================================================ node_modules npm-debug.log dist .git .gitignore .md .vscode coverage docker-compose.yml Dockerfile Dockerfile.frontend Dockerfile.backend ================================================ FILE: .github/CODE_OF_CONDUCT.md ================================================ # Contributor Code of Conduct As contributors and maintainers of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities. We are committed to making participation in this project a harassment-free experience for everyone, regardless of the level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, age, or religion. Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed from the project team. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers. This Code of Conduct is adapted from the Contributor Covenant, version 1.0.0, available at http://contributor-covenant.org/version/1/0/0/ ================================================ FILE: .github/COMMIT_CONVENTION.md ================================================ ## Git Commit Message Convention > This is adapted from [Conventional Commits 1.0.0](https://www.conventionalcommits.org/en/v1.0.0/). ## Summary The Conventional Commits specification is a lightweight convention on top of commit messages. It provides an easy set of rules for creating an explicit commit history; which makes it easier to write automated tools on top of. This convention dovetails with [SemVer](http://semver.org), by describing the features, fixes, and breaking changes made in commit messages. The commit message should be structured as follows: --- ``` [optional scope]: [optional body] [optional footer(s)] ``` ---
The commit contains the following structural elements, to communicate intent to the consumers of your library: 1. **fix:** a commit of the _type_ `fix` patches a bug in your codebase (this correlates with [`PATCH`](http://semver.org/#summary) in Semantic Versioning). 1. **feat:** a commit of the _type_ `feat` introduces a new feature to the codebase (this correlates with [`MINOR`](http://semver.org/#summary) in Semantic Versioning). 1. **BREAKING CHANGE:** a commit that has a footer `BREAKING CHANGE:`, or appends a `!` after the type/scope, introduces a breaking API change (correlating with [`MAJOR`](http://semver.org/#summary) in Semantic Versioning). A BREAKING CHANGE can be part of commits of any _type_. 1. _types_ other than `fix:` and `feat:` are allowed, for example [@commitlint/config-conventional](https://github.com/conventional-changelog/commitlint/tree/master/%40commitlint/config-conventional) (based on the [the Angular convention](https://github.com/angular/angular/blob/22b96b9/CONTRIBUTING.md#-commit-message-guidelines)) recommends `build:`, `chore:`, `ci:`, `docs:`, `style:`, `refactor:`, `perf:`, `test:`, and others. 1. _footers_ other than `BREAKING CHANGE: ` may be provided and follow a convention similar to [git trailer format](https://git-scm.com/docs/git-interpret-trailers). Additional types are not mandated by the Conventional Commits specification, and have no implicit effect in Semantic Versioning (unless they include a BREAKING CHANGE).

A scope may be provided to a commit's type, to provide additional contextual information and is contained within parenthesis, e.g., `feat(parser): add ability to parse arrays`. ## Examples ### Commit message with description and breaking change footer ``` feat: allow provided config object to extend other configs BREAKING CHANGE: `extends` key in config file is now used for extending other config files ``` ### Commit message with `!` to draw attention to breaking change ``` feat!: send an email to the customer when a product is shipped ``` ### Commit message with scope and `!` to draw attention to breaking change ``` feat(api)!: send an email to the customer when a product is shipped ``` ### Commit message with both `!` and BREAKING CHANGE footer ``` chore!: drop support for Node 6 BREAKING CHANGE: use JavaScript features not available in Node 6. ``` ### Commit message with no body ``` docs: correct spelling of CHANGELOG ``` ### Commit message with scope ``` feat(lang): add polish language ``` ### Commit message with multi-paragraph body and multiple footers ``` fix: prevent racing of requests Introduce a request id and a reference to latest request. Dismiss incoming responses other than from latest request. Remove timeouts which were used to mitigate the racing issue but are obsolete now. Reviewed-by: Z Refs: #123 ``` ## Specification The key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”, “SHOULD NOT”, “RECOMMENDED”, “MAY”, and “OPTIONAL” in this document are to be interpreted as described in [RFC 2119](https://www.ietf.org/rfc/rfc2119.txt). 1. Commits MUST be prefixed with a type, which consists of a noun, `feat`, `fix`, etc., followed by the OPTIONAL scope, OPTIONAL `!`, and REQUIRED terminal colon and space. 1. The type `feat` MUST be used when a commit adds a new feature to your application or library. 1. The type `fix` MUST be used when a commit represents a bug fix for your application. 1. A scope MAY be provided after a type. A scope MUST consist of a noun describing a section of the codebase surrounded by parenthesis, e.g., `fix(parser):` 1. A description MUST immediately follow the colon and space after the type/scope prefix. The description is a short summary of the code changes, e.g., _fix: array parsing issue when multiple spaces were contained in string_. 1. A longer commit body MAY be provided after the short description, providing additional contextual information about the code changes. The body MUST begin one blank line after the description. 1. A commit body is free-form and MAY consist of any number of newline separated paragraphs. 1. One or more footers MAY be provided one blank line after the body. Each footer MUST consist of a word token, followed by either a `:` or `#` separator, followed by a string value (this is inspired by the [git trailer convention](https://git-scm.com/docs/git-interpret-trailers)). 1. A footer's token MUST use `-` in place of whitespace characters, e.g., `Acked-by` (this helps differentiate the footer section from a multi-paragraph body). An exception is made for `BREAKING CHANGE`, which MAY also be used as a token. 1. A footer's value MAY contain spaces and newlines, and parsing MUST terminate when the next valid footer token/separator pair is observed. 1. Breaking changes MUST be indicated in the type/scope prefix of a commit, or as an entry in the footer. 1. If included as a footer, a breaking change MUST consist of the uppercase text BREAKING CHANGE, followed by a colon, space, and description, e.g., _BREAKING CHANGE: environment variables now take precedence over config files_. 1. If included in the type/scope prefix, breaking changes MUST be indicated by a `!` immediately before the `:`. If `!` is used, `BREAKING CHANGE:` MAY be omitted from the footer section, and the commit description SHALL be used to describe the breaking change. 1. Types other than `feat` and `fix` MAY be used in your commit messages, e.g., _docs: updated ref docs._ 1. The units of information that make up Conventional Commits MUST NOT be treated as case sensitive by implementors, with the exception of BREAKING CHANGE which MUST be uppercase. 1. BREAKING-CHANGE MUST be synonymous with BREAKING CHANGE, when used as a token in a footer. ## Why Use Conventional Commits * Automatically generating CHANGELOGs. * Automatically determining a semantic version bump (based on the types of commits landed). * Communicating the nature of changes to teammates, the public, and other stakeholders. * Triggering build and publish processes. * Making it easier for people to contribute to your projects, by allowing them to explore a more structured commit history. ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.yml ================================================ name: Bug Report description: Report a bug to help us improve title: "[Bug]: " labels: [bug] assignees: [] body: - type: dropdown id: environment attributes: label: Where are you using the app? options: - Cloud (Hosted by Us) - Self-Hosted (OSS) with Docker - Self-Hosted (OSS) without Docker validations: required: true - type: input id: app_version attributes: label: App Version description: Enter the version number you are using (if known). placeholder: "e.g., v1.2.3" validations: required: false - type: input id: browser attributes: label: Browser description: Which browser are you using? placeholder: "e.g., Chrome 124, Firefox 115, Safari 17" validations: required: true - type: input id: operating_system attributes: label: Operating System description: Your operating system and version. placeholder: "e.g., Windows 11, macOS Sonoma, Ubuntu 22.04" validations: required: true - type: textarea id: steps_to_reproduce attributes: label: Steps to Reproduce description: How can we reproduce the problem? placeholder: | 1. Go to '...' 2. Click on '...' 3. Scroll down to '...' 4. See error validations: required: true - type: textarea id: expected_behavior attributes: label: Expected Behavior description: What did you expect to happen instead? validations: required: true - type: textarea id: actual_behavior attributes: label: Actual Behavior description: What actually happened? validations: required: true - type: textarea id: logs attributes: label: Relevant Logs or Screenshots description: Please paste any logs, screenshots, or console errors if available. placeholder: "Paste logs or upload screenshots." validations: required: false - type: textarea id: additional_context attributes: label: Additional Context description: Anything else we should know? validations: required: false ================================================ FILE: .gitignore ================================================ # dependencies /node_modules /browser/node_modules # misc .DS_Store .env.local .env.development.local .env.test.local .env.production.local .env /.idea /server/logs /build package-lock.json ================================================ FILE: .sequelizerc ================================================ const path = require('path'); module.exports = { 'config': path.resolve('server/src/db/config', 'database.js'), 'models-path': path.resolve('server/src/db/models'), 'seeders-path': path.resolve('server/src/db/seeders'), 'migrations-path': path.resolve('server/src/db/migrations') }; ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing ## Local Setup Read local installation instructions here: Local Installation ## Open Your First Pull Request ### 1. Create a new branch Create a new branch from the develop branch. Use a descriptive name for your branch, such as `feature/new-feature` or `fix/bug-fix`. This makes it easier to understand the purpose of the changes. ### 2. Make your changes Make the necessary code changes and commit them to your local branch. Follow Conventional Commits: Use the format `(): [optional body][optional footer]` for your commit messages. Example: `feat(api): Add new endpoint for user data` ### 3. Push your changes Push your local branch to the remote repository: ```git push origin ``` ### 4. Create a Pull Request 1. Go to the repository on GitHub and navigate to the `Pull Requests` tab. 2. Click `New pull request`. 3. Select your branch as the "compare" branch and develop as the "base" branch. 4. Give your PR a descriptive title that summarizes the changes. 5. Write a clear and concise description of the changes you made and why they are necessary. 6. Add any relevant screenshots or GIFs to help visualize the changes. ### 5. Review and Merge 1. Once you submit the PR, it will be reviewed by other developers. 2. Address any comments or requested changes from the reviewers. 3. Once the PR is approved, it will be merged into the develop branch. ### 6. Remember 1. Always test your changes thoroughly before submitting a PR. 2. Keep your PRs focused on a single feature or bug fix. 3. Be respectful and responsive to feedback from reviewers. ### 7. AI-Assisted Contributions AI-assisted contributions are welcome. If you use AI tools to generate code, please ensure that: 1. The changes fully address the issue or feature request. 2. The code is tested and works as expected. 3. The implementation follows the existing project structure and conventions. 4. You understand the code you are submitting and can respond to review feedback. Low-quality, unverified, or blindly generated patches will not be merged. ================================================ FILE: Dockerfile.backend ================================================ FROM node:20-slim # Set working directory WORKDIR /app COPY .sequelizerc .sequelizerc # Install node dependencies COPY package*.json ./ COPY src ./src COPY public ./public COPY server ./server COPY tsconfig.json ./ COPY server/tsconfig.json ./server/ # COPY server/start.sh ./ # Install dependencies RUN npm install --legacy-peer-deps # Build TypeScript server RUN npm run build:server # Expose backend port EXPOSE ${BACKEND_PORT:-8080} # Run migrations & start backend using plain node CMD ["npm", "run", "server"] # CMD ["sh", "-c", "npm run migrate && npm run server"] ================================================ FILE: Dockerfile.frontend ================================================ FROM node:18-alpine AS builder WORKDIR /app # Copy package files COPY package*.json ./ # Install dependencies RUN npm install --legacy-peer-deps # Copy frontend source code and config COPY src ./src COPY public ./public COPY index.html ./ COPY vite.config.js ./ COPY tsconfig.json ./ # Expose the frontend port EXPOSE ${FRONTEND_PORT:-5173} # Start the frontend using the client script CMD ["npm", "run", "client", "--", "--host"] ================================================ FILE: ENVEXAMPLE ================================================ # App Setup NODE_ENV=production # Set to 'development' or 'production' as required JWT_SECRET=a9Z$kLq7^f03GzNw!bP9dH4xV6sT2yXl3O8vR@uYq3 # Replace with a secure JWT secret key DB_NAME=maxun # Your PostgreSQL database name DB_USER=postgres # PostgreSQL username DB_PASSWORD=postgres # PostgreSQL password DB_HOST=postgres # Host for PostgreSQL in Docker DB_PORT=5432 # Port for PostgreSQL (default: 5432) ENCRYPTION_KEY=f4d5e6a7b8c9d0e1f23456789abcdef01234567890abcdef123456789abcdef0 # Key for encrypting sensitive data (passwords and proxies) SESSION_SECRET=maxun_session # A strong, random string used to sign session cookies. Recommended to define your own session secret to avoid session hijacking. MINIO_ENDPOINT=minio # MinIO endpoint in Docker MINIO_PORT=9000 # Port for MinIO (default: 9000) MINIO_CONSOLE_PORT=9001 # Web UI Port for MinIO (default: 9001) MINIO_ACCESS_KEY=minio_access_key # MinIO access key MINIO_SECRET_KEY=minio_secret_key # MinIO secret key REDIS_HOST=redis # Redis host in Docker REDIS_PORT=6379 # Redis port (default: 6379) REDIS_PASSWORD=redis_password # Redis password (This is optional. Needed to authenticate with a password-protected Redis instance; if not set, Redis will connect without authentication.) # Backend and Frontend URLs and Ports BACKEND_PORT=8080 # Port to run backend on. Needed for Docker setup FRONTEND_PORT=5173 # Port to run frontend on. Needed for Docker setup BACKEND_URL=http://localhost:8080 # URL on which the backend runs. You can change it based on your needs. PUBLIC_URL=http://localhost:5173 # URL on which the frontend runs. You can change it based on your needs. VITE_BACKEND_URL=http://localhost:8080 # URL used by frontend to connect to backend. It should always have the same value as BACKEND_URL VITE_PUBLIC_URL=http://localhost:5173 # URL used by backend to connect to frontend. It should always have the same value as PUBLIC_URL # Optional Google OAuth settings for Google Sheet Integration GOOGLE_CLIENT_ID=your_google_client_id GOOGLE_CLIENT_SECRET=your_google_client_secret GOOGLE_REDIRECT_URI=your_google_redirect_uri # Optional Airtable OAuth settings for Airtable Integration AIRTABLE_CLIENT_ID=your_airtable_client_id AIRTABLE_REDIRECT_URI=http://localhost:8080/auth/airtable/callback # Telemetry Settings - Please keep it enabled. Keeping it enabled helps us understand how the product is used and assess the impact of any new changes. MAXUN_TELEMETRY=true # WebSocket port for browser CDP connections BROWSER_WS_PORT=3001 BROWSER_HEALTH_PORT=3002 BROWSER_WS_HOST=browser ================================================ FILE: LICENSE ================================================ GNU AFFERO GENERAL PUBLIC LICENSE Version 3, 19 November 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU Affero General Public License is a free, copyleft license for software and other kinds of works, specifically designed to ensure cooperation with the community in the case of network server software. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, our General Public Licenses are intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. Developers that use our General Public Licenses protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License which gives you legal permission to copy, distribute and/or modify the software. A secondary benefit of defending all users' freedom is that improvements made in alternate versions of the program, if they receive widespread use, become available for other developers to incorporate. Many developers of free software are heartened and encouraged by the resulting cooperation. However, in the case of software used on network servers, this result may fail to come about. The GNU General Public License permits making a modified version and letting the public access it on a server without ever releasing its source code to the public. The GNU Affero General Public License is designed specifically to ensure that, in such cases, the modified source code becomes available to the community. It requires the operator of a network server to provide the source code of the modified version running there to the users of that server. Therefore, public use of a modified version, on a publicly accessible server, gives the public access to the source code of the modified version. An older license, called the Affero General Public License and published by Affero, was designed to accomplish similar goals. This is a different license, not a version of the Affero GPL, but Affero has released a new version of the Affero GPL which permits relicensing under this license. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU Affero General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Remote Network Interaction; Use with the GNU General Public License. Notwithstanding any other provision of this License, if you modify the Program, your modified version must prominently offer all users interacting with it remotely through a computer network (if your version supports such interaction) an opportunity to receive the Corresponding Source of your version by providing access to the Corresponding Source from a network server at no charge, through some standard or customary means of facilitating copying of software. This Corresponding Source shall include the Corresponding Source for any work covered by version 3 of the GNU General Public License that is incorporated pursuant to the following paragraph. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the work with which it is combined will remain governed by version 3 of the GNU General Public License. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU Affero General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU Affero General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU Affero General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU Affero General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If your software can interact with users remotely through a computer network, you should also make sure that it provides a way for users to get its source. For example, if your program is a web application, its interface could display a "Source" link that leads users to an archive of the code. There are many ways you could offer source, and different solutions will be better for different programs; see section 13 for the specific requirements. You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU AGPL, see . ================================================ FILE: README.md ================================================

Turn Any Website Into A Structured API

✨ The unified open-source no-code platform for real-time web scraping, crawling, search and AI data extraction ✨

Go To AppDocumentationWebsiteDiscordWatch Tutorials

getmaxun%2Fmaxun | Trendshift

## What is Maxun? Maxun is an open-source no-code web data platform for turning the web into structured, reliable data. It supports extraction, crawling, scraping, and search — designed to scale from simple use cases to complex, automated workflows. ### Ecosystem 1. **[Extract](https://docs.maxun.dev/category/extract)** – Emulate real user behavior and collect structured data from any website. * **[Recorder Mode](https://docs.maxun.dev/robot/extract/robot-actions)** – Record your actions as you browse; Maxun turns them into a reusable extraction robot. * **[AI Mode](https://docs.maxun.dev/robot/extract/llm-extraction)** – Describe what you want in natural language and let LLM-powered extraction do the rest. 2. **[Scrape](https://docs.maxun.dev/robot/scrape/scrape-robots)** – Convert full webpages into clean Markdown or HTML and capture screenshots. 3. **[Crawl](https://docs.maxun.dev/robot/crawl/crawl-introduction)** – Crawl entire websites and extract content from every relevant page, with full control over scope and discovery. 4. **[Search](https://docs.maxun.dev/robot/search/search-introduction)** – Run automated web searches to discover or scrape results, with support for time-based filters. 5. **[SDK](https://docs.maxun.dev/sdk/sdk-overview)** – A complete developer toolkit for scraping, extraction, scheduling, and end-to-end data automation. ## How Does It Work? Maxun robots are automated tools that help you collect data from websites without writing any code. Think of them as your personal web assistants that can navigate websites, extract information, and organize data just like you would manually - but faster and more efficiently. There are four types of robots, each designed for a different job. ### 1. Extract Extract emulates real user behavior and captures structured data. - Recorder Mode - Record your actions as you browse; Maxun turns them into a reusable extraction robot. ### Example: Extract 10 Property Listings from Airbnb [https://github.com/user-attachments/assets/recorder-mode-demo-video](https://github.com/user-attachments/assets/c6baa75f-b950-482c-8d26-8a8b6c5382c3) - AI Mode - Describe what you want in natural language and let LLM-powered extraction do the rest. ### Example: Extract Names, Rating & Duration of Top 50 Movies from IMDb https://github.com/user-attachments/assets/f714e860-58d6-44ed-bbcd-c9374b629384 Learn more here. ### 2. Scrape Scrape converts full webpages into clean Markdown, HTML and can capture screenshots. Ideal for AI workflows, agents, and document processing. Learn more here. ### 3. Crawl Crawl entire websites and extract content from every relevant page, with full control over scope and discovery. Learn more here. ### 4. Search Run automated web searches to discover or scrape results, with support for time-based filters. Learn more here. ## Quick Start ### Getting Started The simplest & fastest way to get started is to use the hosted version: https://app.maxun.dev. You can self-host if you prefer! ### Installation Maxun can run locally with or without Docker 1. [Setup with Docker Compose](https://docs.maxun.dev/installation/docker) 2. [Setup without Docker](https://docs.maxun.dev/installation/local) 3. [Environment Variables](https://docs.maxun.dev/installation/environment_variables) 4. [SDK](https://github.com/getmaxun/node-sdk) ### Upgrading & Self Hosting 1. [Self Host Maxun With Docker & Portainer](https://docs.maxun.dev/self-host) 2. [Upgrade Maxun With Docker Compose Setup](https://docs.maxun.dev/installation/upgrade#upgrading-with-docker-compose) 3. [Upgrade Maxun Without Docker Compose Setup](https://docs.maxun.dev/installation/upgrade#upgrading-with-local-setup) ## Sponsors



TestMu AI

The Native AI-Agentic Cloud Platform to Supercharge Quality Engineering. Test Intelligently and Ship Faster.
## Features - ✨ **Extract Data With No-Code** – Point and click interface - ✨ **LLM-Powered Extraction** – Describe what you want; use LLMs to scrape structured data - ✨ **Developer SDK** – Programmatic extraction, scheduling, and robot management - ✨ **Handle Pagination & Scrolling** – Automatic navigation - ✨ **Run Robots On Schedules** – Set it and forget it - ✨ **Turn Websites to APIs** – RESTful endpoints from any site - ✨ **Turn Websites to Spreadsheets** – Direct data export to Google Sheets & Airtable - ✨ **Adapt To Website Layout Changes** – Auto-recovery from site updates - ✨ **Extract Behind Login** – Handle authentication seamlessly - ✨ **Integrations** – Connect with your favorite tools - ✨ **MCP Support** – Model Context Protocol integration - ✨ **LLM-Ready Data** – Clean Markdown for AI applications - ✨ **Self-Hostable** – Full control over your infrastructure - ✨ **Open Source** – Transparent and community-driven ## Demos Maxun can be used for various use-cases, including lead generation, market research, content aggregation and more. View demos here: https://www.maxun.dev/usecases ## Note This project is in early stages of development. Your feedback is very important for us - we're actively working on improvements. ## License

This project is licensed under AGPLv3.

## Project Values We believe in fair and responsible use of open source. If you rely on this project commercially, please consider contributing back or supporting its development. ## Support Us Star the repository, contribute if you love what we’re building, or [sponsor us](https://github.com/sponsors/amhsirak). ## Contributors Thank you to the combined efforts of everyone who contributes! ================================================ FILE: SETUP.md ================================================ # Local Installation 1. Create a root folder for your project (e.g. 'maxun') 2. Create a file named `.env` in the root folder of the project 3. Example env file can be viewed [here](https://github.com/getmaxun/maxun/blob/master/ENVEXAMPLE). Copy all content of example env to your `.env` file. 4. Choose your installation method below ### Docker Compose 1. Copy paste the [docker-compose.yml file](https://github.com/getmaxun/maxun/blob/master/docker-compose.yml) into your root folder 2. Ensure you have setup the `.env` file in that same folder 3. Run the command below from a terminal ``` docker-compose up -d ``` You can access the frontend at http://localhost:5173/ and backend at http://localhost:8080/ ### Without Docker 1. Ensure you have Node.js, PostgreSQL, MinIO and Redis installed on your system. 2. Run the commands below ``` git clone https://github.com/getmaxun/maxun # change directory to the project root cd maxun # install dependencies npm install # change directory to maxun-core to install dependencies cd maxun-core npm install # get back to the root directory cd .. # install chromium and its dependencies npx playwright install --with-deps chromium # get back to the root directory cd .. # start frontend and backend together npm run start ``` You can access the frontend at http://localhost:5173/ and backend at http://localhost:8080/ # Environment Variables 1. Create a file named `.env` in the root folder of the project 2. Example env file can be viewed [here](https://github.com/getmaxun/maxun/blob/master/ENVEXAMPLE). | Variable | Mandatory | Description | If Not Set | |-----------------------|-----------|----------------------------------------------------------------------------------------------|--------------------------------------------------------------| | `BACKEND_PORT` | Yes | Port to run backend on. Needed for Docker setup | Default value: 8080 | | `FRONTEND_PORT` | Yes | Port to run frontend on. Needed for Docker setup | Default value: 5173 | | `BACKEND_URL` | Yes | URL to run backend on. | Default value: http://localhost:8080 | | `VITE_BACKEND_URL` | Yes | URL used by frontend to connect to backend | Default value: http://localhost:8080 | | `PUBLIC_URL` | Yes | URL to run frontend on. | Default value: http://localhost:5173 | | `VITE_PUBLIC_URL` | Yes | URL used by backend to connect to frontend | Default value: http://localhost:5173 | | `JWT_SECRET` | Yes | Secret key used to sign and verify JSON Web Tokens (JWTs) for authentication. | JWT authentication will not work. | | `DB_NAME` | Yes | Name of the Postgres database to connect to. | Database connection will fail. | | `DB_USER` | Yes | Username for Postgres database authentication. | Database connection will fail. | | `DB_PASSWORD` | Yes | Password for Postgres database authentication. | Database connection will fail. | | `DB_HOST` | Yes | Host address where the Postgres database server is running. | Database connection will fail. | | `DB_PORT` | Yes | Port number used to connect to the Postgres database server. | Database connection will fail. | | `ENCRYPTION_KEY` | Yes | Key used for encrypting sensitive data (proxies, passwords). | Encryption functionality will not work. | | `SESSION_SECRET` | No | A strong, random string used to sign session cookies | Uses default secret. Recommended to define your own session secret to avoid session hijacking. | | `MINIO_ENDPOINT` | Yes | Endpoint URL for MinIO, to store Robot Run Screenshots. | Connection to MinIO storage will fail. | | `MINIO_PORT` | Yes | Port number for MinIO service. | Connection to MinIO storage will fail. | | `MINIO_CONSOLE_PORT` | No | Port number for MinIO WebUI service. Needed for Docker setup. | Cannot access MinIO Web UI. | | `MINIO_ACCESS_KEY` | Yes | Access key for authenticating with MinIO. | MinIO authentication will fail. | | `GOOGLE_CLIENT_ID` | No | Client ID for Google OAuth. Used for Google Sheet integration authentication. | Google login will not work. | | `GOOGLE_CLIENT_SECRET`| No | Client Secret for Google OAuth. Used for Google Sheet integration authentication. | Google login will not work. | | `GOOGLE_REDIRECT_URI` | No | Redirect URI for handling Google OAuth responses. | Google login will not work. | | `AIRTABLE_CLIENT_ID` | No | Client ID for Airtable, used for Airtable integration authentication. | Airtable login will not work. | | `AIRTABLE_REDIRECT_URI` | No | Redirect URI for handling Airtable OAuth responses. | Airtable login will not work. | | `MAXUN_TELEMETRY` | No | Disables telemetry to stop sending anonymous usage data. Keeping it enabled helps us understand how the product is used and assess the impact of any new changes. Please keep it enabled. | Telemetry data will not be collected. | ================================================ FILE: browser/.dockerignore ================================================ node_modules npm-debug.log .env .git .gitignore dist *.ts !*.d.ts tsconfig.json ================================================ FILE: browser/Dockerfile ================================================ FROM mcr.microsoft.com/playwright:v1.57.0-jammy WORKDIR /app # Copy package files COPY browser/package*.json ./ # Install dependencies RUN npm install # Copy TypeScript source and config COPY browser/server.ts ./ COPY browser/tsconfig.json ./ # Build TypeScript RUN npm run build # Accept build arguments for ports (with defaults) ARG BROWSER_WS_PORT=3001 ARG BROWSER_HEALTH_PORT=3002 # Set as environment variables ENV BROWSER_WS_PORT=${BROWSER_WS_PORT} ENV BROWSER_HEALTH_PORT=${BROWSER_HEALTH_PORT} # Expose ports dynamically based on build args EXPOSE ${BROWSER_WS_PORT} ${BROWSER_HEALTH_PORT} # Start the browser service (run compiled JS) CMD ["node", "dist/server.js"] ================================================ FILE: browser/package.json ================================================ { "name": "maxun-browser-service", "version": "1.0.0", "description": "Browser service that exposes Playwright browsers via WebSocket with stealth plugins", "main": "dist/server.js", "scripts": { "build": "tsc", "start": "node dist/server.js", "dev": "ts-node server.ts" }, "dependencies": { "playwright": "1.57.0", "playwright-extra": "^4.3.6", "puppeteer-extra-plugin-stealth": "^2.11.2" }, "devDependencies": { "@types/node": "^22.7.9", "typescript": "^5.0.0", "ts-node": "^10.9.2" } } ================================================ FILE: browser/server.ts ================================================ import { chromium } from 'playwright-extra'; import stealthPlugin from 'puppeteer-extra-plugin-stealth'; import http from 'http'; import type { BrowserServer } from 'playwright'; // Apply stealth plugin to chromium chromium.use(stealthPlugin()); let browserServer: BrowserServer | null = null; // Configurable ports with defaults const BROWSER_WS_PORT = parseInt(process.env.BROWSER_WS_PORT || '3001', 10); const BROWSER_HEALTH_PORT = parseInt(process.env.BROWSER_HEALTH_PORT || '3002', 10); const BROWSER_WS_HOST = process.env.BROWSER_WS_HOST || 'localhost'; async function start(): Promise { console.log('Starting Maxun Browser Service...'); console.log(`WebSocket port: ${BROWSER_WS_PORT}`); console.log(`Health check port: ${BROWSER_HEALTH_PORT}`); try { // Launch browser server that exposes WebSocket endpoint browserServer = await chromium.launchServer({ headless: true, args: [ '--disable-blink-features=AutomationControlled', '--disable-web-security', '--disable-features=IsolateOrigins,site-per-process', '--disable-site-isolation-trials', '--disable-extensions', '--no-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--force-color-profile=srgb', '--force-device-scale-factor=2', '--ignore-certificate-errors', '--mute-audio' ], port: BROWSER_WS_PORT, }); console.log(`✅ Browser WebSocket endpoint ready: ${browserServer.wsEndpoint()}`); console.log(`✅ Stealth plugin enabled`); // Health check HTTP server const healthServer = http.createServer((req, res) => { if (req.url === '/health') { const wsEndpoint = browserServer?.wsEndpoint().replace('localhost', BROWSER_WS_HOST) || ''; res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ status: 'healthy', wsEndpoint, wsPort: BROWSER_WS_PORT, healthPort: BROWSER_HEALTH_PORT, timestamp: new Date().toISOString() })); } else if (req.url === '/') { res.writeHead(200, { 'Content-Type': 'text/plain' }); const wsEndpoint = browserServer?.wsEndpoint().replace('localhost', BROWSER_WS_HOST) || ''; res.end(`Maxun Browser Service\nWebSocket: ${wsEndpoint}\nHealth: http://localhost:${BROWSER_HEALTH_PORT}/health`); } else { res.writeHead(404); res.end('Not Found'); } }); healthServer.listen(BROWSER_HEALTH_PORT, () => { console.log(`✅ Health check server running on port ${BROWSER_HEALTH_PORT}`); console.log('Browser service is ready to accept connections!'); }); } catch (error) { console.error('❌ Failed to start browser service:', error); process.exit(1); } } // Graceful shutdown async function shutdown(): Promise { console.log('Shutting down browser service...'); if (browserServer) { try { await browserServer.close(); console.log('Browser server closed'); } catch (error) { console.error('Error closing browser server:', error); } } process.exit(0); } process.on('SIGTERM', shutdown); process.on('SIGINT', shutdown); // Start the service start().catch(console.error); ================================================ FILE: browser/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2020", "module": "commonjs", "lib": [ "ES2020" ], "outDir": "./dist", "rootDir": "./", "strict": true, "esModuleInterop": true, "skipLibCheck": true, "forceConsistentCasingInFileNames": true, "resolveJsonModule": true, "moduleResolution": "node" }, "include": [ "server.ts" ], "exclude": [ "node_modules", "dist" ] } ================================================ FILE: docker-compose.yml ================================================ services: postgres: image: postgres:13 restart: unless-stopped environment: POSTGRES_USER: ${DB_USER} POSTGRES_PASSWORD: ${DB_PASSWORD} POSTGRES_DB: ${DB_NAME} ports: - "${DB_PORT:-5432}:${DB_PORT:-5432}" volumes: - postgres_data:/var/lib/postgresql/data healthcheck: test: ["CMD-SHELL", "pg_isready -U postgres"] interval: 10s timeout: 5s retries: 5 minio: image: minio/minio restart: unless-stopped environment: MINIO_ROOT_USER: ${MINIO_ACCESS_KEY} MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY} command: server /data --console-address :${MINIO_CONSOLE_PORT:-9001} ports: - "${MINIO_PORT:-9000}:${MINIO_PORT:-9000}" # API port - "${MINIO_CONSOLE_PORT:-9001}:${MINIO_CONSOLE_PORT:-9001}" # WebUI port volumes: - minio_data:/data backend: # build: # context: . # dockerfile: Dockerfile.backend image: getmaxun/maxun-backend:latest restart: unless-stopped ports: - "${BACKEND_PORT:-8080}:${BACKEND_PORT:-8080}" env_file: .env environment: BACKEND_URL: ${BACKEND_URL} # to ensure Playwright works in Docker PLAYWRIGHT_BROWSERS_PATH: /ms-playwright PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: 0 # Force container/CI detection for headless mode CI: "true" CONTAINER: "true" # DEBUG: pw:api # PWDEBUG: 1 # Enables debugging CHROMIUM_FLAGS: '--disable-gpu --no-sandbox --headless=new' security_opt: - seccomp=unconfined # This might help with browser sandbox issues shm_size: '2gb' # Increase shared memory size for Chromium mem_limit: 6g # Set 6GB memory limit depends_on: - postgres - minio volumes: - /var/run/dbus:/var/run/dbus frontend: # build: # context: . # dockerfile: Dockerfile.frontend image: getmaxun/maxun-frontend:latest restart: unless-stopped ports: - "${FRONTEND_PORT:-5173}:${FRONTEND_PORT:-5173}" env_file: .env environment: PUBLIC_URL: ${PUBLIC_URL} BACKEND_URL: ${BACKEND_URL} depends_on: - backend browser: build: context: . dockerfile: browser/Dockerfile args: BROWSER_WS_PORT: ${BROWSER_WS_PORT:-3001} BROWSER_HEALTH_PORT: ${BROWSER_HEALTH_PORT:-3002} ports: - "${BROWSER_WS_PORT:-3001}:${BROWSER_WS_PORT:-3001}" - "${BROWSER_HEALTH_PORT:-3002}:${BROWSER_HEALTH_PORT:-3002}" environment: - NODE_ENV=production - DEBUG=pw:browser* - BROWSER_WS_PORT=${BROWSER_WS_PORT:-3001} - BROWSER_HEALTH_PORT=${BROWSER_HEALTH_PORT:-3002} - BROWSER_WS_HOST=${BROWSER_WS_HOST:-browser} - PLAYWRIGHT_BROWSERS_PATH=/ms-playwright restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:${BROWSER_HEALTH_PORT:-3002}/health"] interval: 10s timeout: 5s retries: 3 start_period: 10s deploy: resources: limits: memory: 2G cpus: '1.5' reservations: memory: 1G cpus: '1.0' security_opt: - seccomp:unconfined shm_size: 2gb cap_add: - SYS_ADMIN volumes: postgres_data: minio_data: ================================================ FILE: docker-entrypoint.sh ================================================ #!/bin/sh # Start backend server cd /app && npm run start:server -- --host 0.0.0.0 & # Start nginx nginx -g 'daemon off;' ================================================ FILE: docs/nginx.conf ================================================ # Robust maxun nginx config file # DO NOT uncomment commented lines unless YOU know what they mean and YOU know what YOU are doing! ### HTTP server block ### server { server_name maxun.my.domain; root /usr/share/nginx/html; listen 80; server_tokens off; return 301 https://$server_name$request_uri; } ### HTTPS server block ### server { ### Default config ### server_name maxun.my.domain; root /usr/share/nginx/html; access_log /var/log/nginx/maxun_access.log; error_log /var/log/nginx/maxun_error.log info; listen 443 ssl; http2 on; server_tokens off; ### SSL config ### ssl_certificate /etc/letsencrypt/live/my.domain/fullchain.pem; ssl_certificate_key /etc/letsencrypt/live/my.domain/privkey.pem; ssl_trusted_certificate /etc/letsencrypt/live/my.domain/chain.pem; ssl_protocols TLSv1.2 TLSv1.3; #ssl_ecdh_curve X25519MLKEM768:X25519:prime256v1:secp384r1; ssl_ecdh_curve X25519:prime256v1:secp384r1; ssl_prefer_server_ciphers off; ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:DHE-RSA-CHACHA20-POLY1305; ssl_stapling off; ssl_stapling_verify off; ssl_session_cache shared:MozSSL:10m; ssl_session_tickets off; ssl_session_timeout 1d; ssl_dhparam dh.pem; #ssl_conf_command Options KTLS; ### Performance tuning config ### client_max_body_size 512M; client_body_timeout 300s; client_body_buffer_size 256k; #pagespeed off; ### Compression ### ## gzip ## gzip on; gzip_vary on; gzip_comp_level 5; gzip_min_length 256; gzip_disable msie6; gzip_proxied expired no-cache no-store private no_last_modified no_etag auth; gzip_buffers 16 8k; gzip_types application/atom+xml text/javascript application/javascript application/json application/ld+json application/manifest+json application/rss+xml application/vnd.geo+json application/vnd.ms-fontobject application/wasm application/x-font-ttf application/x-web-app-manifest+json application/xhtml+xml application/xml font/opentype image/bmp image/svg+xml image/x-icon text/cache-manifest text/css text/plain text/vcard text/vnd.rim.location.xloc text/vtt text/x-component text/x-cross-domain-policy; ## brotli: enable only if you have compiled nginx with brotli support!!! ## #brotli on; #brotli_static on; #brotli_comp_level 6; #brotli_types application/atom+xml application/javascript application/json application/rss+xml # application/vnd.ms-fontobject application/x-font-opentype application/x-font-truetype # application/x-font-ttf application/x-javascript application/xhtml+xml application/xml # font/eot font/opentype font/otf font/truetype image/svg+xml image/vnd.microsoft.icon # image/x-icon image/x-win-bitmap text/css text/javascript text/plain text/xml; ### Default headers ### add_header Referrer-Policy "no-referrer" always; add_header X-Content-Type-Options "nosniff" always; add_header X-Frame-Options "SAMEORIGIN" always; add_header X-Permitted-Cross-Domain-Policies "none" always; add_header X-Robots-Tag "noindex, nofollow" always; add_header X-XSS-Protection "1; mode=block" always; add_header Permissions-Policy "geolocation=(self), midi=(self), sync-xhr=(self), microphone=(self), camera=(self), magnetometer=(self), gyroscope=(self), fullscreen=(self), payment=(self), interest-cohort=()"; ### Proxy rules ### # Backend web traffic and websockets location ~ ^/(auth|storage|record|workflow|robot|proxy|api-docs|api|webhook|socket.io)(/|$) { proxy_pass http://localhost:8080; #Change the port number to match .env file BACKEND_PORT variable proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection 'upgrade'; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; } # Frontend web traffic location / { proxy_pass http://localhost:5173; #Change the port number to match .env file FRONTEND_PORT variable proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection 'upgrade'; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; } } ================================================ FILE: docs/self-hosting-docker.md ================================================ # Self hosting docker guide So you want to create a bot? Let's get you started! ## Requirements (not covered) - Webserver (Apache2, nginx, etc.) - SSL Certificates (letsencrypt, zerossl, etc) - A sub-domain to host maxun i.e. maxun.my.domain - Docker - Docker compose - Probably others... ## Guide For this guide, we assume that before you start, you have a dedicated docker folder to house config files and everything else we need for persistence between docker container reboots and updates. The path in this guide is `/home/$USER/Docker/maxun`. 1. Change directory into your docker folder `cd /home/$USER/Docker/` 2. Create a new directory for maxun and all the required sub-folders for our docker services `mkdir -p maxun/{db,minio,redis}` 3. Change directory to enter the newly created folder `cd maxun` 4. Create an environment file to save your variables `nano .env` with the following contents: ``` NODE_ENV=production JWT_SECRET=openssl rand -base64 48 DB_NAME=maxun DB_USER=postgres DB_PASSWORD=openssl rand -base64 24 DB_HOST=postgres DB_PORT=5432 ENCRYPTION_KEY=openssl rand -base64 64 SESSION_SECRET=openssl rand -base64 48 MINIO_ENDPOINT=minio MINIO_PORT=9000 MINIO_CONSOLE_PORT=9001 MINIO_ACCESS_KEY=minio MINIO_SECRET_KEY=openssl rand -base64 24 REDIS_HOST=maxun-redis REDIS_PORT=6379 REDIS_PASSWORD= BACKEND_PORT=8080 FRONTEND_PORT=5173 BACKEND_URL=https://maxun.my.domain PUBLIC_URL=https://maxun.my.domain VITE_BACKEND_URL=https://maxun.my.domain VITE_PUBLIC_URL=https://maxun.my.domain GOOGLE_CLIENT_ID= GOOGLE_CLIENT_SECRET= GOOGLE_REDIRECT_URI= AIRTABLE_CLIENT_ID= AIRTABLE_REDIRECT_URI= MAXUN_TELEMETRY=true ``` 5. Ctrl + x, Y, Enter will save your changes 6. Please be sure to READ this file and change the variables to match your environment!!! i.e. BACKEND_PORT=30000 7. Create a file for docker compose `nano docker-compose.yml` with the following contents: ```yml services: postgres: image: postgres:17 container_name: maxun-postgres mem_limit: 512M environment: POSTGRES_USER: ${DB_USER} POSTGRES_PASSWORD: ${DB_PASSWORD} POSTGRES_DB: ${DB_NAME} volumes: - /home/$USER/Docker/maxun/db:/var/lib/postgresql/data healthcheck: test: ["CMD-SHELL", "pg_isready -U postgres"] interval: 10s timeout: 5s retries: 5 redis: image: docker.io/library/redis:7 container_name: maxun-redis restart: always mem_limit: 128M volumes: - /home/$USER/Docker/maxun/redis:/data minio: image: minio/minio container_name: maxun-minio mem_limit: 512M environment: MINIO_ROOT_USER: ${MINIO_ACCESS_KEY} MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY} command: server /data --console-address :${MINIO_CONSOLE_PORT:-9001} volumes: - /home/$USER/Docker/maxun/minio:/data backend: image: getmaxun/maxun-backend:latest container_name: maxun-backend ports: - "127.0.0.1:${BACKEND_PORT:-8080}:${BACKEND_PORT:-8080}" env_file: .env environment: BACKEND_URL: ${BACKEND_URL} PLAYWRIGHT_BROWSERS_PATH: /ms-playwright PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: 0 # DEBUG: pw:api # PWDEBUG: 1 # Enables debugging CHROMIUM_FLAGS: '--disable-gpu --no-sandbox --headless=new' security_opt: - seccomp=unconfined # This might help with browser sandbox issues shm_size: '2gb' mem_limit: 4g depends_on: - postgres - minio volumes: - /var/run/dbus:/var/run/dbus frontend: image: getmaxun/maxun-frontend:latest container_name: maxun-frontend mem_limit: 512M ports: - "127.0.0.1:${FRONTEND_PORT:-5173}:5173" env_file: .env environment: PUBLIC_URL: ${PUBLIC_URL} BACKEND_URL: ${BACKEND_URL} depends_on: - backend ``` 8. Ctrl + x, Y, Enter will save your changes 9. This particular setup is "production ready" meaning that maxun is only accessible from localhost. You must configure a reverse proxy to access it! 10. Start maxun `sudo docker compose up -d` or `sudo docker-compose up -d` 11. Wait 30 seconds for everything to come up 12. Access your maxun instance at http://localhost:5173 if using defaults ## Next steps You will want to configure a reverse proxy. Click on a link below to check out some examples. - [Nginx](nginx.conf) ================================================ FILE: index.html ================================================ Maxun • Turn Websites To APIs • Open Source
================================================ FILE: legacy/server/worker.ts ================================================ import { Queue, Worker } from 'bullmq'; import IORedis from 'ioredis'; import logger from './logger'; import { handleRunRecording } from "./workflow-management/scheduler"; import Robot from './models/Robot'; import { computeNextRun } from './utils/schedule'; const connection = new IORedis({ host: process.env.REDIS_HOST, port: process.env.REDIS_PORT ? parseInt(process.env.REDIS_PORT, 10) : 6379, maxRetriesPerRequest: null, password: process.env.REDIS_PASSWORD ? process.env.REDIS_PASSWORD : undefined, }); connection.on('connect', () => { console.log('Connected to Redis!'); }); connection.on('error', (err) => { console.error('Redis connection error:', err); }); const workflowQueue = new Queue('workflow', { connection }); const worker = new Worker('workflow', async job => { const { runId, userId, id } = job.data; try { const result = await handleRunRecording(id, userId); return result; } catch (error) { logger.error('Error running workflow:', error); throw error; } }, { connection }); worker.on('completed', async (job: any) => { logger.log(`info`, `Job ${job.id} completed for ${job.data.runId}`); const robot = await Robot.findOne({ where: { 'recording_meta.id': job.data.id } }); if (robot) { // Update `lastRunAt` to the current time const lastRunAt = new Date(); // Compute the next run date if (robot.schedule && robot.schedule.cronExpression && robot.schedule.timezone) { const nextRunAt = computeNextRun(robot.schedule.cronExpression, robot.schedule.timezone) || undefined; await robot.update({ schedule: { ...robot.schedule, lastRunAt, nextRunAt, }, }); } else { logger.error('Robot schedule, cronExpression, or timezone is missing.'); } } }); worker.on('failed', async (job: any, err) => { logger.log(`error`, `Job ${job.id} failed for ${job.data.runId}:`, err); }); console.log('Worker is running...'); async function jobCounts() { const jobCounts = await workflowQueue.getJobCounts(); } jobCounts(); // We dont need this right now // process.on('SIGINT', () => { // console.log('Worker shutting down...'); // process.exit(); // }); export { workflowQueue, worker }; ================================================ FILE: legacy/src/AddWhatCondModal.tsx ================================================ import { WhereWhatPair } from "maxun-core"; import { GenericModal } from "../../src/components/ui/GenericModal"; import { modalStyle } from "./AddWhereCondModal"; import { Button, TextField, Typography } from "@mui/material"; import React, { useRef } from "react"; import { KeyValueForm } from "../../src/components/recorder/KeyValueForm"; import { ClearButton } from "../../src/components/ui/buttons/ClearButton"; import { useSocketStore } from "../../src/context/socket"; interface AddWhatCondModalProps { isOpen: boolean; onClose: () => void; pair: WhereWhatPair; index: number; } export const AddWhatCondModal = ({ isOpen, onClose, pair, index }: AddWhatCondModalProps) => { const [action, setAction] = React.useState(''); const [objectIndex, setObjectIndex] = React.useState(0); const [args, setArgs] = React.useState<({ type: string, value: (string | number | object | unknown) })[]>([]); const objectRefs = useRef<({ getObject: () => object } | unknown)[]>([]); const { socket } = useSocketStore(); const handleSubmit = () => { const argsArray: (string | number | object | unknown)[] = []; args.map((arg, index) => { switch (arg.type) { case 'string': case 'number': argsArray[index] = arg.value; break; case 'object': // @ts-ignore argsArray[index] = objectRefs.current[arg.value].getObject(); } }) setArgs([]); onClose(); pair.what.push({ // @ts-ignore action, args: argsArray, }) socket?.emit('updatePair', { index: index - 1, pair: pair }); } return ( { setArgs([]); onClose(); }} modalStyle={modalStyle}>
Add what condition:
Action: setAction(e.target.value)} value={action} label='action' />
Add new argument of type:
args: {args.map((arg, index) => { // @ts-ignore return (
{ args.splice(index, 1); setArgs([...args]); }} /> {index}: {arg.type === 'string' ? setArgs([ ...args.slice(0, index), { type: arg.type, value: e.target.value }, ...args.slice(index + 1) ])} value={args[index].value || ''} label="string" key={`arg-${arg.type}-${index}`} /> : arg.type === 'number' ? setArgs([ ...args.slice(0, index), { type: arg.type, value: Number(e.target.value) }, ...args.slice(index + 1) ])} value={args[index].value || ''} label="number" /> : //@ts-ignore objectRefs.current[arg.value] = el} key={`arg-${arg.type}-${index}`} /> }
) })}
) } ================================================ FILE: legacy/src/AddWhereCondModal.tsx ================================================ import { Dropdown as MuiDropdown } from "../../src/components/ui/DropdownMui"; import { Button, MenuItem, Typography } from "@mui/material"; import React, { useRef } from "react"; import { GenericModal } from "../../src/components/ui/GenericModal"; import { WhereWhatPair } from "maxun-core"; import { SelectChangeEvent } from "@mui/material/Select/Select"; import { DisplayConditionSettings } from "./DisplayWhereConditionSettings"; import { useSocketStore } from "../../src/context/socket"; interface AddWhereCondModalProps { isOpen: boolean; onClose: () => void; pair: WhereWhatPair; index: number; } export const AddWhereCondModal = ({ isOpen, onClose, pair, index }: AddWhereCondModalProps) => { const [whereProp, setWhereProp] = React.useState(''); const [additionalSettings, setAdditionalSettings] = React.useState(''); const [newValue, setNewValue] = React.useState(''); const [checked, setChecked] = React.useState(new Array(Object.keys(pair.where).length).fill(false)); const keyValueFormRef = useRef<{ getObject: () => object }>(null); const { socket } = useSocketStore(); const handlePropSelect = (event: SelectChangeEvent) => { setWhereProp(event.target.value); switch (event.target.value) { case 'url': setNewValue(''); break; case 'selectors': setNewValue(['']); break; case 'default': return; } } const handleSubmit = () => { switch (whereProp) { case 'url': if (additionalSettings === 'string') { pair.where.url = newValue; } else { pair.where.url = { $regex: newValue }; } break; case 'selectors': pair.where.selectors = newValue; break; case 'cookies': pair.where.cookies = keyValueFormRef.current?.getObject() as Record break; case 'before': pair.where.$before = newValue; break; case 'after': pair.where.$after = newValue; break; case 'boolean': const booleanArr = []; const deleteKeys: string[] = []; for (let i = 0; i < checked.length; i++) { if (checked[i]) { if (Object.keys(pair.where)[i]) { //@ts-ignore if (pair.where[Object.keys(pair.where)[i]]) { booleanArr.push({ //@ts-ignore [Object.keys(pair.where)[i]]: pair.where[Object.keys(pair.where)[i]] }); } deleteKeys.push(Object.keys(pair.where)[i]); } } } // @ts-ignore deleteKeys.forEach((key: string) => delete pair.where[key]); //@ts-ignore pair.where[`$${additionalSettings}`] = booleanArr; break; default: return; } onClose(); setWhereProp(''); setAdditionalSettings(''); setNewValue(''); socket?.emit('updatePair', { index: index - 1, pair: pair }); } return ( { setWhereProp(''); setAdditionalSettings(''); setNewValue(''); onClose(); }} modalStyle={modalStyle}>
Add where condition:
url selectors cookies before after boolean logic
{whereProp ?
: null}
) } export const modalStyle = { top: '45%', left: '50%', transform: 'translate(-50%, -50%)', width: '30%', backgroundColor: 'background.paper', p: 4, height: 'fit-content', display: 'block', padding: '20px', }; ================================================ FILE: legacy/src/Canvas.tsx ================================================ import React, { memo, useCallback, useEffect, useRef } from 'react'; import { useSocketStore } from '../../context/socket'; import { useGlobalInfoStore } from "../../context/globalInfo"; import { useActionContext } from '../../context/browserActions'; import DatePicker from '../pickers/DatePicker'; import Dropdown from '../pickers/Dropdown'; import TimePicker from '../pickers/TimePicker'; import DateTimeLocalPicker from '../pickers/DateTimeLocalPicker'; import { coordinateMapper } from '../../helpers/coordinateMapper'; interface CreateRefCallback { (ref: React.RefObject): void; } interface CanvasProps { width: number; height: number; onCreateRef: CreateRefCallback; } /** * Interface for mouse's x,y coordinates */ export interface Coordinates { x: number; y: number; }; const Canvas = ({ width, height, onCreateRef }: CanvasProps) => { const canvasRef = useRef(null); const contextRef = useRef(null); const imageDataRef = useRef(null); const animationFrameRef = useRef(null); const { socket } = useSocketStore(); const { setLastAction, lastAction } = useGlobalInfoStore(); const { getText, getList } = useActionContext(); const getTextRef = useRef(getText); const getListRef = useRef(getList); const MOUSE_MOVE_THROTTLE = 8; const lastMouseMoveTime = useRef(0); const [datePickerInfo, setDatePickerInfo] = React.useState<{ coordinates: Coordinates; selector: string; } | null>(null); const [dropdownInfo, setDropdownInfo] = React.useState<{ coordinates: Coordinates; selector: string; options: Array<{ value: string; text: string; disabled: boolean; selected: boolean; }>; } | null>(null); const [timePickerInfo, setTimePickerInfo] = React.useState<{ coordinates: Coordinates; selector: string; } | null>(null); const [dateTimeLocalInfo, setDateTimeLocalInfo] = React.useState<{ coordinates: Coordinates; selector: string; } | null>(null); const notifyLastAction = (action: string) => { if (lastAction !== action) { setLastAction(action); } }; const lastMousePosition = useRef({ x: 0, y: 0 }); useEffect(() => { if (canvasRef.current && !contextRef.current) { const ctx = canvasRef.current.getContext('2d', { alpha: false, desynchronized: true, willReadFrequently: false }); if (ctx) { contextRef.current = ctx; imageDataRef.current = ctx.createImageData(width, height); } } }, [width, height]); useEffect(() => { getTextRef.current = getText; getListRef.current = getList; }, [getText, getList]); useEffect(() => { if (socket) { const handleDatePicker = (info: { coordinates: Coordinates, selector: string }) => { const canvasCoords = coordinateMapper.mapBrowserToCanvas(info.coordinates); setDatePickerInfo({ ...info, coordinates: canvasCoords }); }; const handleDropdown = (info: { coordinates: Coordinates, selector: string, options: Array<{ value: string; text: string; disabled: boolean; selected: boolean; }>; }) => { const canvasCoords = coordinateMapper.mapBrowserToCanvas(info.coordinates); setDropdownInfo({ ...info, coordinates: canvasCoords }); }; const handleTimePicker = (info: { coordinates: Coordinates, selector: string }) => { const canvasCoords = coordinateMapper.mapBrowserToCanvas(info.coordinates); setTimePickerInfo({ ...info, coordinates: canvasCoords }); }; const handleDateTimePicker = (info: { coordinates: Coordinates, selector: string }) => { const canvasCoords = coordinateMapper.mapBrowserToCanvas(info.coordinates); setDateTimeLocalInfo({ ...info, coordinates: canvasCoords }); }; socket.on('showDatePicker', handleDatePicker); socket.on('showDropdown', handleDropdown); socket.on('showTimePicker', handleTimePicker); socket.on('showDateTimePicker', handleDateTimePicker); return () => { socket.off('showDatePicker', handleDatePicker); socket.off('showDropdown', handleDropdown); socket.off('showTimePicker', handleTimePicker); socket.off('showDateTimePicker', handleDateTimePicker); }; } }, [socket]); const onMouseEvent = useCallback((event: MouseEvent) => { if (!socket || !canvasRef.current) return; const rect = canvasRef.current.getBoundingClientRect(); const clickCoordinates = { x: event.clientX - rect.left, y: event.clientY - rect.top, }; const browserCoordinates = coordinateMapper.mapCanvasToBrowser(clickCoordinates); switch (event.type) { case 'mousedown': if (getTextRef.current === true) { console.log('Capturing Text...'); } else if (getListRef.current === true) { console.log('Capturing List...'); } else { socket.emit('input:mousedown', browserCoordinates); } notifyLastAction('click'); break; case 'mousemove': { const now = performance.now(); if (now - lastMouseMoveTime.current < MOUSE_MOVE_THROTTLE) { return; } lastMouseMoveTime.current = now; const dx = Math.abs(lastMousePosition.current.x - clickCoordinates.x); const dy = Math.abs(lastMousePosition.current.y - clickCoordinates.y); if (dx > 0.5 || dy > 0.5) { lastMousePosition.current = clickCoordinates; socket.emit('input:mousemove', browserCoordinates); notifyLastAction('move'); } break; } case 'wheel': { const wheelEvent = event as WheelEvent; const deltaX = Math.round(wheelEvent.deltaX / 5) * 5; const deltaY = Math.round(wheelEvent.deltaY / 5) * 5; if (Math.abs(deltaX) > 2 || Math.abs(deltaY) > 2) { socket.emit('input:wheel', { deltaX, deltaY }); notifyLastAction('scroll'); } break; } default: return; } }, [socket, notifyLastAction]); const onKeyboardEvent = useCallback((event: KeyboardEvent) => { if (socket) { const browserCoordinates = coordinateMapper.mapCanvasToBrowser(lastMousePosition.current); switch (event.type) { case 'keydown': socket.emit('input:keydown', { key: event.key, coordinates: browserCoordinates }); notifyLastAction(`${event.key} pressed`); break; case 'keyup': socket.emit('input:keyup', event.key); break; default: console.log('Default keyEvent registered'); return; } } }, [socket, notifyLastAction]); useEffect(() => { const canvas = canvasRef.current; if (!canvas) return; onCreateRef(canvasRef); const options = { passive: true }; canvas.addEventListener('mousedown', onMouseEvent, options); canvas.addEventListener('mousemove', onMouseEvent, options); canvas.addEventListener('wheel', onMouseEvent, options); canvas.addEventListener('keydown', onKeyboardEvent); canvas.addEventListener('keyup', onKeyboardEvent); return () => { canvas.removeEventListener('mousedown', onMouseEvent); canvas.removeEventListener('mousemove', onMouseEvent); canvas.removeEventListener('wheel', onMouseEvent); canvas.removeEventListener('keydown', onKeyboardEvent); canvas.removeEventListener('keyup', onKeyboardEvent); }; }, [onMouseEvent, onKeyboardEvent, onCreateRef]); useEffect(() => { return () => { if (animationFrameRef.current) { cancelAnimationFrame(animationFrameRef.current); } }; }, []); const containerStyle = React.useMemo(() => ({ borderRadius: '0px 0px 5px 5px', overflow: 'hidden', backgroundColor: 'white', contain: 'layout style paint', isolation: 'isolate' as React.CSSProperties['isolation'] }), []); const canvasStyle = React.useMemo(() => ({ display: 'block', imageRendering: 'crisp-edges' as const, willChange: 'contents', backfaceVisibility: 'hidden' as const, transform: 'translateZ(0)', maxWidth: '100%', maxHeight: '100%' }), []); return (
{datePickerInfo && ( setDatePickerInfo(null)} /> )} {dropdownInfo && ( setDropdownInfo(null)} /> )} {timePickerInfo && ( setTimePickerInfo(null)} /> )} {dateTimeLocalInfo && ( setDateTimeLocalInfo(null)} /> )}
); }; export default memo(Canvas); ================================================ FILE: legacy/src/DisplayWhereConditionSettings.tsx ================================================ import React from "react"; import { Dropdown as MuiDropdown } from "../../src/components/ui/DropdownMui"; import { Checkbox, FormControlLabel, FormGroup, MenuItem, Stack, TextField } from "@mui/material"; import { AddButton } from "../../src/components/ui/buttons/AddButton"; import { RemoveButton } from "../../src/components/ui/buttons/RemoveButton"; import { KeyValueForm } from "../../src/components/recorder/KeyValueForm"; import { WarningText } from "../../src/components/ui/texts"; interface DisplayConditionSettingsProps { whereProp: string; additionalSettings: string; setAdditionalSettings: (value: any) => void; newValue: any; setNewValue: (value: any) => void; keyValueFormRef: React.RefObject<{ getObject: () => object }>; whereKeys: string[]; checked: boolean[]; setChecked: (value: boolean[]) => void; } export const DisplayConditionSettings = ( { whereProp, setAdditionalSettings, additionalSettings, setNewValue, newValue, keyValueFormRef, whereKeys, checked, setChecked } : DisplayConditionSettingsProps) => { switch (whereProp) { case 'url': return ( setAdditionalSettings(e.target.value)}> string regex {additionalSettings ? setNewValue(e.target.value)} value={newValue} /> : null} ) case 'selectors': return ( { newValue.map((selector: string, index: number) => { return setNewValue([ ...newValue.slice(0, index), e.target.value, ...newValue.slice(index + 1) ])} /> }) } setNewValue([...newValue, ''])} /> { const arr = newValue; arr.splice(-1); setNewValue([...arr]); }} /> ) case 'cookies': return case 'before': return setNewValue(e.target.value)} /> case 'after': return setNewValue(e.target.value)} /> case 'boolean': return ( setAdditionalSettings(e.target.value)}> and or { whereKeys.map((key: string, index: number) => { return ( setChecked([ ...checked.slice(0, index), !checked[index], ...checked.slice(index + 1) ])} key={`checkbox-${key}-${index}`} /> } label={key} key={`control-label-form-${key}-${index}`} /> ) }) } Choose at least 2 where conditions. Nesting of boolean operators is possible by adding more conditions. ) default: return null; } } ================================================ FILE: legacy/src/Highlighter.tsx ================================================ import React, { useMemo } from 'react'; import styled from "styled-components"; import { coordinateMapper } from '../../helpers/coordinateMapper'; interface HighlighterProps { unmodifiedRect: DOMRect; displayedSelector: string; width: number; height: number; canvasRect: DOMRect; }; const HighlighterComponent = ({ unmodifiedRect, displayedSelector = '', width, height, canvasRect }: HighlighterProps) => { if (!unmodifiedRect) { return null; } else { const rect = useMemo(() => { const mappedRect = coordinateMapper.mapBrowserRectToCanvas(unmodifiedRect); return { top: mappedRect.top + canvasRect.top + window.scrollY, left: mappedRect.left + canvasRect.left + window.scrollX, width: mappedRect.width, height: mappedRect.height, }; }, [unmodifiedRect, canvasRect.top, canvasRect.left]); return (
{/* {displayedSelector} */}
); } } export const Highlighter = React.memo(HighlighterComponent); const HighlighterOutline = styled.div` box-sizing: border-box; pointer-events: none !important; position: fixed !important; background: #ff5d5b26 !important; outline: 2px solid #ff00c3 !important; z-index: 2147483647 !important; top: ${(p: HighlighterOutlineProps) => p.top}px; left: ${(p: HighlighterOutlineProps) => p.left}px; width: ${(p: HighlighterOutlineProps) => p.width}px; height: ${(p: HighlighterOutlineProps) => p.height}px; `; const HighlighterLabel = styled.div` pointer-events: none !important; position: fixed !important; background: #080a0b !important; color: white !important; padding: 8px !important; font-family: monospace !important; border-radius: 5px !important; z-index: 2147483647 !important; top: ${(p: HighlighterLabelProps) => p.top}px; left: ${(p: HighlighterLabelProps) => p.left}px; `; interface HighlighterLabelProps { top: number; left: number; } interface HighlighterOutlineProps { top: number; left: number; width: number; height: number; } ================================================ FILE: legacy/src/LeftSidePanel.tsx ================================================ import { Box, Paper, Tab, Tabs } from "@mui/material"; import React, { useCallback, useEffect, useState } from "react"; import { getActiveWorkflow, getParamsOfActiveWorkflow } from "../../src/api/workflow"; import { useSocketStore } from '../../src/context/socket'; import { WhereWhatPair, WorkflowFile } from "maxun-core"; import { emptyWorkflow } from "../../src/shared/constants"; import { LeftSidePanelContent } from "./LeftSidePanelContent"; import { useGlobalInfoStore } from "../../src/context/globalInfo"; import { TabContext, TabPanel } from "@mui/lab"; import { LeftSidePanelSettings } from "./LeftSidePanelSettings"; import { RunSettings } from "../../src/components/run/RunSettings"; const fetchWorkflow = (id: string, callback: (response: WorkflowFile) => void) => { getActiveWorkflow(id).then( (response) => { if (response) { callback(response); } else { throw new Error("No workflow found"); } } ).catch((error) => { console.log(`Failed to fetch workflow:`,error.message) }) }; interface LeftSidePanelProps { sidePanelRef: HTMLDivElement | null; alreadyHasScrollbar: boolean; recordingName: string; handleSelectPairForEdit: (pair: WhereWhatPair, index: number) => void; } export const LeftSidePanel = ( { sidePanelRef, alreadyHasScrollbar, recordingName, handleSelectPairForEdit }: LeftSidePanelProps) => { const [workflow, setWorkflow] = useState(emptyWorkflow); const [hasScrollbar, setHasScrollbar] = useState(alreadyHasScrollbar); const [tab, setTab] = useState('recording'); const [params, setParams] = useState([]); const [settings, setSettings] = React.useState({ maxConcurrency: 1, maxRepeats: 1, debug: false, }); const { id, socket } = useSocketStore(); const { setRecordingLength } = useGlobalInfoStore(); const workflowHandler = useCallback((data: WorkflowFile) => { setWorkflow(data); setRecordingLength(data.workflow.length); }, [workflow]) useEffect(() => { // fetch the workflow every time the id changes if (id) { fetchWorkflow(id, workflowHandler); } // fetch workflow in 15min intervals let interval = setInterval(() => { if (id) { fetchWorkflow(id, workflowHandler); } }, (900 * 60 * 15)); return () => clearInterval(interval) }, [id]); useEffect(() => { if (socket) { socket.on("workflow", workflowHandler); } if (sidePanelRef) { const workflowListHeight = sidePanelRef.clientHeight; const innerHeightWithoutNavbar = window.innerHeight - 70; if (innerHeightWithoutNavbar <= workflowListHeight) { if (!hasScrollbar) { setHasScrollbar(true); } } else { if (hasScrollbar && !alreadyHasScrollbar) { setHasScrollbar(false); } } } return () => { socket?.off('workflow', workflowHandler); } }, [socket, workflowHandler]); return ( {/* */} setTab(newTab)}> { getParamsOfActiveWorkflow(id).then((response) => { if (response) { setParams(response); } }) }} /> ); }; ================================================ FILE: legacy/src/LeftSidePanelContent.tsx ================================================ import React, { useCallback, useEffect, useState } from 'react'; import { Pair } from "./Pair"; import { WhereWhatPair, WorkflowFile } from "maxun-core"; import { useSocketStore } from "../../src/context/socket"; import { Socket } from "socket.io-client"; import { AddButton } from "../../src/components/ui/buttons/AddButton"; import { AddPair } from "../../src/api/workflow"; import { GenericModal } from "../../src/components/ui/GenericModal"; import { PairEditForm } from "./PairEditForm"; import { Tooltip } from "@mui/material"; interface LeftSidePanelContentProps { workflow: WorkflowFile; updateWorkflow: (workflow: WorkflowFile) => void; recordingName: string; handleSelectPairForEdit: (pair: WhereWhatPair, index: number) => void; } export const LeftSidePanelContent = ({ workflow, updateWorkflow, recordingName, handleSelectPairForEdit }: LeftSidePanelContentProps) => { const [activeId, setActiveId] = React.useState(0); const [breakpoints, setBreakpoints] = React.useState([]); const [showEditModal, setShowEditModal] = useState(false); const { socket } = useSocketStore(); const activePairIdHandler = useCallback((data: string, socket: Socket) => { setActiveId(parseInt(data) + 1); // -1 is specially emitted when the interpretation finishes if (parseInt(data) === -1) { return; } socket.emit('activeIndex', data); }, [activeId]) const addPair = (pair: WhereWhatPair, index: number) => { AddPair((index - 1), pair).then((updatedWorkflow) => { updateWorkflow(updatedWorkflow); }).catch((error) => { console.error(error); }); setShowEditModal(false); }; useEffect(() => { socket?.on("activePairId", (data) => activePairIdHandler(data, socket)); return () => { socket?.off("activePairId", (data) => activePairIdHandler(data, socket)); } }, [socket, setActiveId]); const handleBreakpointClick = (id: number) => { setBreakpoints(oldBreakpoints => { const newArray = [...oldBreakpoints, ...Array(workflow.workflow.length - oldBreakpoints.length).fill(false)]; newArray[id] = !newArray[id]; socket?.emit("breakpoints", newArray); return newArray; }); }; const handleAddPair = () => { setShowEditModal(true); }; return (
setShowEditModal(false)} >
{ workflow.workflow.map((pair, i, workflow,) => handleBreakpointClick(i)} isActive={activeId === i + 1} key={workflow.length - i} index={workflow.length - i} pair={pair} updateWorkflow={updateWorkflow} numberOfPairs={workflow.length} handleSelectPairForEdit={handleSelectPairForEdit} />) }
); }; ================================================ FILE: legacy/src/LeftSidePanelSettings.tsx ================================================ import React from "react"; import { Button, MenuItem, TextField, Typography } from "@mui/material"; import { Dropdown } from "../../src/components/ui/DropdownMui"; import { RunSettings } from "../../src/components/run/RunSettings"; import { useSocketStore } from "../../src/context/socket"; interface LeftSidePanelSettingsProps { params: any[] settings: RunSettings, setSettings: (setting: RunSettings) => void } export const LeftSidePanelSettings = ({ params, settings, setSettings }: LeftSidePanelSettingsProps) => { const { socket } = useSocketStore(); return (
{params.length !== 0 && ( Parameters: {params?.map((item: string, index: number) => { return setSettings( { ...settings, params: settings.params ? { ...settings.params, [item]: e.target.value, } : { [item]: e.target.value, }, })} /> })} )} Interpreter: setSettings( { ...settings, maxConcurrency: parseInt(e.target.value), })} defaultValue={settings.maxConcurrency} /> setSettings( { ...settings, maxRepeats: parseInt(e.target.value), })} defaultValue={settings.maxRepeats} /> setSettings( { ...settings, debug: e.target.value === "true", })} > true false
); } ================================================ FILE: legacy/src/Pair.tsx ================================================ import React, { FC, useState } from 'react'; import { Stack, Button, IconButton, Tooltip, Badge } from "@mui/material"; import { AddPair, deletePair, UpdatePair } from "../../src/api/workflow"; import { WorkflowFile } from "maxun-core"; import { ClearButton } from "../../src/components/ui/buttons/ClearButton"; import { GenericModal } from "../../src/components/ui/GenericModal"; import { PairEditForm } from "./PairEditForm"; import { PairDisplayDiv } from "./PairDisplayDiv"; import { EditButton } from "../../src/components/ui/buttons/EditButton"; import { BreakpointButton } from "../../src/components/ui/buttons/BreakpointButton"; import VisibilityIcon from '@mui/icons-material/Visibility'; import styled from "styled-components"; import { LoadingButton } from "@mui/lab"; type WhereWhatPair = WorkflowFile["workflow"][number]; interface PairProps { handleBreakpoint: () => void; isActive: boolean; index: number; pair: WhereWhatPair; updateWorkflow: (workflow: WorkflowFile) => void; numberOfPairs: number; handleSelectPairForEdit: (pair: WhereWhatPair, index: number) => void; } export const Pair: FC = ( { handleBreakpoint, isActive, index, pair, updateWorkflow, numberOfPairs, handleSelectPairForEdit }) => { const [open, setOpen] = useState(false); const [edit, setEdit] = useState(false); const [breakpoint, setBreakpoint] = useState(false); const enableEdit = () => setEdit(true); const disableEdit = () => setEdit(false); const handleOpen = () => setOpen(true); const handleClose = () => { setOpen(false); disableEdit(); } const handleDelete = () => { deletePair(index - 1).then((updatedWorkflow) => { updateWorkflow(updatedWorkflow); }).catch((error) => { console.error(error); }); }; const handleEdit = (pair: WhereWhatPair, newIndex: number) => { if (newIndex !== index) { AddPair((newIndex - 1), pair).then((updatedWorkflow) => { updateWorkflow(updatedWorkflow); }).catch((error) => { console.error(error); }); } else { UpdatePair((index - 1), pair).then((updatedWorkflow) => { updateWorkflow(updatedWorkflow); }).catch((error) => { console.error(error); }); } handleClose(); }; const handleBreakpointClick = () => { setBreakpoint(!breakpoint); handleBreakpoint(); }; return (
{isActive ? : breakpoint ? : }
{ enableEdit(); handleOpen(); }} />
{edit ? :
}
); }; interface ViewButtonProps { handleClick: () => void; } const ViewButton = ({ handleClick }: ViewButtonProps) => { return ( ); } const PairWrapper = styled.div<{ isActive: boolean }>` background-color: ${({ isActive }) => isActive ? 'rgba(255, 0, 0, 0.1)' : 'transparent'}; border: ${({ isActive }) => isActive ? 'solid 2px red' : 'none'}; display: flex; flex-direction: row; flex-grow: 1; width: 98%; color: gray; &:hover { color: dimgray; background: ${({ isActive }) => isActive ? 'rgba(255, 0, 0, 0.1)' : 'transparent'}; } `; ================================================ FILE: legacy/src/PairDetail.tsx ================================================ import React, { useLayoutEffect, useRef, useState } from 'react'; import { WhereWhatPair } from "maxun-core"; import { IconButton, Stack, TextField, Tooltip, Typography } from "@mui/material"; import { Close, KeyboardArrowDown, KeyboardArrowUp } from "@mui/icons-material"; import TreeView from '@mui/lab/TreeView'; import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; import ChevronRightIcon from '@mui/icons-material/ChevronRight'; import TreeItem from '@mui/lab/TreeItem'; import { AddButton } from "../../src/components/ui/buttons/AddButton"; import { WarningText } from "../../src/components/ui/texts"; import NotificationImportantIcon from '@mui/icons-material/NotificationImportant'; import { RemoveButton } from "../../src/components/ui/buttons/RemoveButton"; import { AddWhereCondModal } from "./AddWhereCondModal"; import { useSocketStore } from "../../src/context/socket"; import { AddWhatCondModal } from "./AddWhatCondModal"; interface PairDetailProps { pair: WhereWhatPair | null; index: number; } export const PairDetail = ({ pair, index }: PairDetailProps) => { const [pairIsSelected, setPairIsSelected] = useState(false); const [collapseWhere, setCollapseWhere] = useState(true); const [collapseWhat, setCollapseWhat] = useState(true); const [rerender, setRerender] = useState(false); const [expanded, setExpanded] = React.useState( pair ? Object.keys(pair.where).map((key, index) => `${key}-${index}`) : [] ); const [addWhereCondOpen, setAddWhereCondOpen] = useState(false); const [addWhatCondOpen, setAddWhatCondOpen] = useState(false); const { socket } = useSocketStore(); const handleCollapseWhere = () => { setCollapseWhere(!collapseWhere); } const handleCollapseWhat = () => { setCollapseWhat(!collapseWhat); } const handleToggle = (event: React.SyntheticEvent, nodeIds: string[]) => { setExpanded(nodeIds); }; useLayoutEffect(() => { if (pair) { setPairIsSelected(true); } }, [pair]) const handleChangeValue = (value: any, where: boolean, keys: (string | number)[]) => { // a moving reference to internal objects within pair.where or pair.what let schema: any = where ? pair?.where : pair?.what; const length = keys.length; for (let i = 0; i < length - 1; i++) { const elem = keys[i]; if (!schema[elem]) schema[elem] = {} schema = schema[elem]; } schema[keys[length - 1]] = value; if (pair && socket) { socket.emit('updatePair', { index: index - 1, pair: pair }); } setRerender(!rerender); } const DisplayValueContent = (value: any, keys: (string | number)[], where: boolean = true) => { switch (typeof (value)) { case 'string': return { try { const obj = JSON.parse(e.target.value); handleChangeValue(obj, where, keys); } catch (error) { const num = Number(e.target.value); if (!isNaN(num)) { handleChangeValue(num, where, keys); } handleChangeValue(e.target.value, where, keys) } }} defaultValue={value} key={`text-field-${keys.join('-')}-${where}`} /> case 'number': return handleChangeValue(Number(e.target.value), where, keys)} defaultValue={value} key={`text-field-${keys.join('-')}-${where}`} /> case 'object': if (value) { if (Array.isArray(value)) { return ( { value.map((element, index) => { return DisplayValueContent(element, [...keys, index], where); }) } { let prevValue: any = where ? pair?.where : pair?.what; for (const key of keys) { prevValue = prevValue[key]; } handleChangeValue([...prevValue, ''], where, keys); setRerender(!rerender); }} hoverEffect={false} /> { let prevValue: any = where ? pair?.where : pair?.what; for (const key of keys) { prevValue = prevValue[key]; } prevValue.splice(-1); handleChangeValue(prevValue, where, keys); setRerender(!rerender); }} /> ) } else { return ( } defaultExpandIcon={} sx={{ flexGrow: 1, overflowY: 'auto' }} key={`tree-view-nested-${keys.join('-')}-${where}`} > { Object.keys(value).map((key2, index) => { return ( {DisplayValueContent(value[key2], [...keys, key2], where)} ) }) } ) } } break; default: return null; } } return ( {pair && setAddWhatCondOpen(false)} pair={pair} index={index} /> setAddWhereCondOpen(false)} pair={pair} index={index} /> } { pairIsSelected ? (
Pair number: {index} { if (pair && socket) { socket.emit('updatePair', { index: index - 1, pair: pair }); pair.id = e.target.value; } }} value={pair ? pair.id ? pair.id : '' : ''} /> Where
{ setAddWhereCondOpen(true); }} style={{ color: 'rgba(0, 0, 0, 0.54)', background: 'transparent' }} />
{(collapseWhere && pair && pair.where) ? {Object.keys(pair.where).map((key, index) => { return ( } defaultExpandIcon={} sx={{ flexGrow: 1, overflowY: 'auto' }} onNodeToggle={handleToggle} key={`tree-view-${key}-${index}`} > { // @ts-ignore DisplayValueContent(pair.where[key], [key]) } ); })} : null } What
{ setAddWhatCondOpen(true); }} style={{ color: 'rgba(0, 0, 0, 0.54)', background: 'transparent' }} />
{(collapseWhat && pair && pair.what) ? ( {Object.keys(pair.what).map((key, index) => { return ( } defaultExpandIcon={} sx={{ flexGrow: 1, overflowY: 'auto' }} key={`tree-view-2-${key}-${index}`} > { // @ts-ignore DisplayValueContent(pair.what[key], [key], false) }
{ //@ts-ignore pair.what.splice(key, 1); setRerender(!rerender); }} />
); })}
) : null }
) : No pair from the left side panel was selected. }
); } interface CollapseButtonProps { handleClick: () => void; isCollapsed?: boolean; } const CollapseButton = ({ handleClick, isCollapsed }: CollapseButtonProps) => { return ( {isCollapsed ? : } ); } const CloseButton = ({ handleClick }: CollapseButtonProps) => { return ( ); } ================================================ FILE: legacy/src/PairDisplayDiv.tsx ================================================ import React, { FC } from 'react'; import Typography from '@mui/material/Typography'; import { WhereWhatPair } from "maxun-core"; import styled from "styled-components"; interface PairDisplayDivProps { index: string; pair: WhereWhatPair; } export const PairDisplayDiv: FC = ({ index, pair }) => { return (
{`Index: ${index}`} {pair.id ? `, Id: ${pair.id}` : ''} {"Where:"}
{JSON.stringify(pair?.where, undefined, 2)}
{"What:"}
{JSON.stringify(pair?.what, undefined, 2)}
); } const DescriptionWrapper = styled.div` margin: 0; font-family: "Roboto","Helvetica","Arial",sans-serif; font-weight: 400; font-size: 1rem; line-height: 1.5; letter-spacing: 0.00938em; `; ================================================ FILE: legacy/src/PairEditForm.tsx ================================================ import { Button, TextField, Typography } from "@mui/material"; import React, { FC } from "react"; import { Preprocessor, WhereWhatPair } from "maxun-core"; interface PairProps { index: string; id?: string; where: string | null; what: string | null; } interface PairEditFormProps { onSubmitOfPair: (value: WhereWhatPair, index: number) => void; numberOfPairs: number; index?: string; where?: string; what?: string; id?: string; } export const PairEditForm: FC = ( { onSubmitOfPair, numberOfPairs, index, where, what, id, }) => { const [pairProps, setPairProps] = React.useState({ where: where || null, what: what || null, index: index || "1", id: id || '', }); const [errors, setErrors] = React.useState({ where: null, what: null, index: '', }); const handleInputChange = (event: React.ChangeEvent) => { const { id, value } = event.target; if (id === 'index') { if (parseInt(value, 10) < 1) { setErrors({ ...errors, index: 'Index must be greater than 0' }); return; } else { setErrors({ ...errors, index: '' }); } } setPairProps({ ...pairProps, [id]: value }); }; const validateAndSubmit = (event: React.SyntheticEvent) => { event.preventDefault(); let whereFromPair, whatFromPair; // validate where whereFromPair = { where: pairProps.where && pairProps.where !== '{"url":"","selectors":[""] }' ? JSON.parse(pairProps.where) : {}, what: [], }; const validationError = Preprocessor.validateWorkflow({ workflow: [whereFromPair] }); setErrors({ ...errors, where: null }); if (validationError) { setErrors({ ...errors, where: validationError.message }); return; } // validate what whatFromPair = { where: {}, what: pairProps.what && pairProps.what !== '[{"action":"","args":[""] }]' ? JSON.parse(pairProps.what) : [], }; const validationErrorWhat = Preprocessor.validateWorkflow({ workflow: [whatFromPair] }); setErrors({ ...errors, "what": null }); if (validationErrorWhat) { setErrors({ ...errors, what: validationErrorWhat.message }); return; } //validate index const index = parseInt(pairProps?.index, 10); if (index > (numberOfPairs + 1)) { if (numberOfPairs === 0) { setErrors(prevState => ({ ...prevState, index: 'Index of the first pair must be 1' })); return; } else { setErrors(prevState => ({ ...prevState, index: `Index must be in the range 1-${numberOfPairs + 1}` })); return; } } else { setErrors({ ...errors, index: '' }); } // submit the pair onSubmitOfPair(pairProps.id ? { id: pairProps.id, where: whereFromPair?.where || {}, what: whatFromPair?.what || [], } : { where: whereFromPair?.where || {}, what: whatFromPair?.what || [], } , index); }; return (
Raw pair edit form: ); }; ================================================ FILE: legacy/src/Renderer.tsx ================================================ export class CanvasRenderer { private canvas: HTMLCanvasElement; private ctx: CanvasRenderingContext2D; private offscreenCanvas: OffscreenCanvas | null = null; private offscreenCtx: CanvasRenderingContext2D | null = null; private lastFrameRequest: number | null = null; private imageCache: Map = new Map(); private consecutiveFrameCount: number = 0; private lastDrawTime: number = 0; private memoryCheckCounter: number = 0; private lastMemoryCheck: number = 0; private memoryThreshold: number = 100000000; // 100MB constructor(canvas: HTMLCanvasElement) { this.canvas = canvas; // Get 2D context with optimized settings const ctx = canvas.getContext('2d', { alpha: false, // Disable alpha for better performance desynchronized: true, // Reduce latency when possible }); if (!ctx) { throw new Error('Could not get 2D context from canvas'); } this.ctx = ctx; // Apply performance optimizations this.ctx.imageSmoothingEnabled = false; // Set up offscreen canvas if supported if (typeof OffscreenCanvas !== 'undefined') { this.offscreenCanvas = new OffscreenCanvas(canvas.width, canvas.height); const offCtx = this.offscreenCanvas.getContext('2d', { alpha: false }); if (offCtx) { this.offscreenCtx = offCtx as unknown as CanvasRenderingContext2D; this.offscreenCtx.imageSmoothingEnabled = false; } } // Initial timestamp this.lastDrawTime = performance.now(); this.lastMemoryCheck = performance.now(); } /** * Renders a screenshot to the canvas, optimized for performance */ public drawScreenshot( screenshot: string | ImageBitmap | HTMLImageElement, x: number = 0, y: number = 0, width?: number, height?: number ): void { // Cancel any pending frame request if (this.lastFrameRequest !== null) { cancelAnimationFrame(this.lastFrameRequest); } // Check memory usage periodically this.memoryCheckCounter++; const now = performance.now(); if (this.memoryCheckCounter >= 30 || now - this.lastMemoryCheck > 5000) { this.checkMemoryUsage(); this.memoryCheckCounter = 0; this.lastMemoryCheck = now; } // Request a new frame this.lastFrameRequest = requestAnimationFrame(() => { this.renderFrame(screenshot, x, y, width, height); }); } private renderFrame( screenshot: string | ImageBitmap | HTMLImageElement, x: number, y: number, width?: number, height?: number ): void { // Target context (offscreen if available, otherwise main) const targetCtx = this.offscreenCtx || this.ctx; // Start timing the render const startTime = performance.now(); const timeSinceLastDraw = startTime - this.lastDrawTime; // Adaptive frame skipping for high-frequency updates // If we're getting updates faster than 60fps and this isn't the first frame if (timeSinceLastDraw < 16 && this.consecutiveFrameCount > 5) { this.consecutiveFrameCount++; // Skip some frames when we're getting excessive updates if (this.consecutiveFrameCount % 2 !== 0) { return; } } else { this.consecutiveFrameCount = 0; } try { if (typeof screenshot === 'string') { // Check if we have this image in cache let img = this.imageCache.get(screenshot); if (!img) { img = new Image(); img.src = screenshot; this.imageCache.set(screenshot, img); // If image isn't loaded yet, draw when it loads if (!img.complete) { img.onload = () => { if (img) { this.drawScreenshot(img, x, y, width, height); } }; return; } } targetCtx.drawImage( img, x, y, width || img.width, height || img.height ); } else { // Draw ImageBitmap or HTMLImageElement directly targetCtx.drawImage( screenshot, x, y, width || screenshot.width, height || screenshot.height ); } // If using offscreen canvas, copy to main canvas if (this.offscreenCanvas && this.offscreenCtx) { if ('transferToImageBitmap' in this.offscreenCanvas) { // Use more efficient transfer when available const bitmap = this.offscreenCanvas.transferToImageBitmap(); this.ctx.drawImage(bitmap, 0, 0); } else { // Fallback to drawImage this.ctx.drawImage(this.offscreenCanvas, 0, 0); } } // Update timestamp this.lastDrawTime = performance.now(); } catch (error) { console.error('Error rendering frame:', error); } } /** * Checks current memory usage and cleans up if necessary */ private checkMemoryUsage(): void { if (window.performance && (performance as any).memory) { const memory = (performance as any).memory; if (memory.usedJSHeapSize > this.memoryThreshold) { this.cleanupMemory(); } } } /** * Cleans up resources to reduce memory usage */ private cleanupMemory(): void { // Limit image cache size if (this.imageCache.size > 20) { // Keep only the most recent 10 images const keysToDelete = Array.from(this.imageCache.keys()).slice(0, this.imageCache.size - 10); keysToDelete.forEach(key => { this.imageCache.delete(key); }); } // Suggest garbage collection if (window.gc) { try { window.gc(); } catch (e) { // GC not available, ignore } } } /** * Update canvas dimensions */ public updateCanvasSize(width: number, height: number): void { this.canvas.width = width; this.canvas.height = height; // Re-apply context settings this.ctx.imageSmoothingEnabled = false; // Update offscreen canvas if available if (this.offscreenCanvas) { this.offscreenCanvas.width = width; this.offscreenCanvas.height = height; if (this.offscreenCtx) { this.offscreenCtx.imageSmoothingEnabled = false; } } } /** * Clean up resources */ public dispose(): void { // Cancel any pending frame requests if (this.lastFrameRequest !== null) { cancelAnimationFrame(this.lastFrameRequest); this.lastFrameRequest = null; } // Clear the image cache this.imageCache.clear(); // Clear canvases this.ctx.clearRect(0, 0, this.canvas.width, this.canvas.height); if (this.offscreenCtx && this.offscreenCanvas) { this.offscreenCtx.clearRect(0, 0, this.offscreenCanvas.width, this.offscreenCanvas.height); } } } ================================================ FILE: legacy/src/RobotEdit.tsx ================================================ import React, { useState, useEffect } from 'react'; import { useTranslation } from 'react-i18next'; import { GenericModal } from "../ui/GenericModal"; import { TextField, Typography, Box, Button, IconButton, InputAdornment } from "@mui/material"; import { Visibility, VisibilityOff } from '@mui/icons-material'; import { modalStyle } from "../recorder/AddWhereCondModal"; import { useGlobalInfoStore } from '../../context/globalInfo'; import { getStoredRecording, updateRecording } from '../../api/storage'; import { WhereWhatPair } from 'maxun-core'; interface RobotMeta { name: string; id: string; createdAt: string; pairs: number; updatedAt: string; params: any[]; } interface RobotWorkflow { workflow: WhereWhatPair[]; } interface ScheduleConfig { runEvery: number; runEveryUnit: 'MINUTES' | 'HOURS' | 'DAYS' | 'WEEKS' | 'MONTHS'; startFrom: 'SUNDAY' | 'MONDAY' | 'TUESDAY' | 'WEDNESDAY' | 'THURSDAY' | 'FRIDAY' | 'SATURDAY'; atTimeStart?: string; atTimeEnd?: string; timezone: string; lastRunAt?: Date; nextRunAt?: Date; cronExpression?: string; } export interface RobotSettings { id: string; userId?: number; recording_meta: RobotMeta; recording: RobotWorkflow; google_sheet_email?: string | null; google_sheet_name?: string | null; google_sheet_id?: string | null; google_access_token?: string | null; google_refresh_token?: string | null; schedule?: ScheduleConfig | null; } interface RobotSettingsProps { isOpen: boolean; handleStart: (settings: RobotSettings) => void; handleClose: () => void; initialSettings?: RobotSettings | null; } interface CredentialInfo { value: string; type: string; } interface Credentials { [key: string]: CredentialInfo; } interface CredentialVisibility { [key: string]: boolean; } interface GroupedCredentials { passwords: string[]; emails: string[]; usernames: string[]; others: string[]; } interface ScrapeListLimit { pairIndex: number; actionIndex: number; argIndex: number; currentLimit: number; } export const RobotEditModal = ({ isOpen, handleStart, handleClose, initialSettings }: RobotSettingsProps) => { const { t } = useTranslation(); const [credentials, setCredentials] = useState({}); const { recordingId, notify, setRerenderRobots } = useGlobalInfoStore(); const [robot, setRobot] = useState(null); const [credentialGroups, setCredentialGroups] = useState({ passwords: [], emails: [], usernames: [], others: [] }); const [showPasswords, setShowPasswords] = useState({}); const [scrapeListLimits, setScrapeListLimits] = useState([]); const isEmailPattern = (value: string): boolean => { return value.includes('@'); }; const isUsernameSelector = (selector: string): boolean => { return selector.toLowerCase().includes('username') || selector.toLowerCase().includes('user') || selector.toLowerCase().includes('email'); }; const determineCredentialType = (selector: string, info: CredentialInfo): 'password' | 'email' | 'username' | 'other' => { if (info.type === 'password' || selector.toLowerCase().includes('password')) { return 'password'; } if (isEmailPattern(info.value) || selector.toLowerCase().includes('email')) { return 'email'; } if (isUsernameSelector(selector)) { return 'username'; } return 'other'; }; useEffect(() => { if (isOpen) { getRobot(); } }, [isOpen]); useEffect(() => { if (robot?.recording?.workflow) { const extractedCredentials = extractInitialCredentials(robot.recording.workflow); setCredentials(extractedCredentials); setCredentialGroups(groupCredentialsByType(extractedCredentials)); findScrapeListLimits(robot.recording.workflow); } }, [robot]); const findScrapeListLimits = (workflow: WhereWhatPair[]) => { const limits: ScrapeListLimit[] = []; workflow.forEach((pair, pairIndex) => { if (!pair.what) return; pair.what.forEach((action, actionIndex) => { if (action.action === 'scrapeList' && action.args && action.args.length > 0) { // Check if first argument has a limit property const arg = action.args[0]; if (arg && typeof arg === 'object' && 'limit' in arg) { limits.push({ pairIndex, actionIndex, argIndex: 0, currentLimit: arg.limit }); } } }); }); setScrapeListLimits(limits); }; function extractInitialCredentials(workflow: any[]): Credentials { const credentials: Credentials = {}; const isPrintableCharacter = (char: string): boolean => { return char.length === 1 && !!char.match(/^[\x20-\x7E]$/); }; workflow.forEach(step => { if (!step.what) return; let currentSelector = ''; let currentValue = ''; let currentType = ''; let i = 0; while (i < step.what.length) { const action = step.what[i]; if (!action.action || !action.args?.[0]) { i++; continue; } const selector = action.args[0]; // Handle full word type actions first if (action.action === 'type' && action.args?.length >= 2 && typeof action.args[1] === 'string' && action.args[1].length > 1) { if (!credentials[selector]) { credentials[selector] = { value: action.args[1], type: action.args[2] || 'text' }; } i++; continue; } // Handle character-by-character sequences (both type and press) if ((action.action === 'type' || action.action === 'press') && action.args?.length >= 2 && typeof action.args[1] === 'string') { if (selector !== currentSelector) { if (currentSelector && currentValue) { credentials[currentSelector] = { value: currentValue, type: currentType || 'text' }; } currentSelector = selector; currentValue = credentials[selector]?.value || ''; currentType = action.args[2] || credentials[selector]?.type || 'text'; } const character = action.args[1]; if (isPrintableCharacter(character)) { currentValue += character; } else if (character === 'Backspace') { currentValue = currentValue.slice(0, -1); } if (!currentType && action.args[2]?.toLowerCase() === 'password') { currentType = 'password'; } let j = i + 1; while (j < step.what.length) { const nextAction = step.what[j]; if (!nextAction.action || !nextAction.args?.[0] || nextAction.args[0] !== selector || (nextAction.action !== 'type' && nextAction.action !== 'press')) { break; } if (nextAction.args[1] === 'Backspace') { currentValue = currentValue.slice(0, -1); } else if (isPrintableCharacter(nextAction.args[1])) { currentValue += nextAction.args[1]; } j++; } credentials[currentSelector] = { value: currentValue, type: currentType }; i = j; } else { i++; } } if (currentSelector && currentValue) { credentials[currentSelector] = { value: currentValue, type: currentType || 'text' }; } }); return credentials; } const groupCredentialsByType = (credentials: Credentials): GroupedCredentials => { return Object.entries(credentials).reduce((acc: GroupedCredentials, [selector, info]) => { const credentialType = determineCredentialType(selector, info); switch (credentialType) { case 'password': acc.passwords.push(selector); break; case 'email': acc.emails.push(selector); break; case 'username': acc.usernames.push(selector); break; default: acc.others.push(selector); } return acc; }, { passwords: [], emails: [], usernames: [], others: [] }); }; const getRobot = async () => { if (recordingId) { const robot = await getStoredRecording(recordingId); setRobot(robot); } else { notify('error', t('robot_edit.notifications.update_failed')); } }; const handleClickShowPassword = (selector: string) => { setShowPasswords(prev => ({ ...prev, [selector]: !prev[selector] })); }; const handleRobotNameChange = (newName: string) => { setRobot((prev) => prev ? { ...prev, recording_meta: { ...prev.recording_meta, name: newName } } : prev ); }; const handleCredentialChange = (selector: string, value: string) => { setCredentials(prev => ({ ...prev, [selector]: { ...prev[selector], value } })); }; const handleLimitChange = (pairIndex: number, actionIndex: number, argIndex: number, newLimit: number) => { setRobot((prev) => { if (!prev) return prev; const updatedWorkflow = [...prev.recording.workflow]; if ( updatedWorkflow.length > pairIndex && updatedWorkflow[pairIndex]?.what && updatedWorkflow[pairIndex].what.length > actionIndex && updatedWorkflow[pairIndex].what[actionIndex].args && updatedWorkflow[pairIndex].what[actionIndex].args.length > argIndex ) { updatedWorkflow[pairIndex].what[actionIndex].args[argIndex].limit = newLimit; setScrapeListLimits(prev => { return prev.map(item => { if (item.pairIndex === pairIndex && item.actionIndex === actionIndex && item.argIndex === argIndex) { return { ...item, currentLimit: newLimit }; } return item; }); }); } return { ...prev, recording: { ...prev.recording, workflow: updatedWorkflow } }; }); }; const handleTargetUrlChange = (newUrl: string) => { setRobot((prev) => { if (!prev) return prev; const updatedWorkflow = [...prev.recording.workflow]; const lastPairIndex = updatedWorkflow.length - 1; if (lastPairIndex >= 0) { const gotoAction = updatedWorkflow[lastPairIndex]?.what?.find(action => action.action === "goto"); if (gotoAction && gotoAction.args && gotoAction.args.length > 0) { gotoAction.args[0] = newUrl; } } return { ...prev, recording: { ...prev.recording, workflow: updatedWorkflow } }; }); }; const renderAllCredentialFields = () => { return ( <> {renderCredentialFields( credentialGroups.usernames, t('Username'), 'text' )} {renderCredentialFields( credentialGroups.emails, t('Email'), 'text' )} {renderCredentialFields( credentialGroups.passwords, t('Password'), 'password' )} {renderCredentialFields( credentialGroups.others, t('Other'), 'text' )} ); }; const renderCredentialFields = (selectors: string[], headerText: string, defaultType: 'text' | 'password' = 'text') => { if (selectors.length === 0) return null; return ( <> {selectors.map((selector, index) => { const isVisible = showPasswords[selector]; return ( handleCredentialChange(selector, e.target.value)} style={{ marginBottom: '20px' }} InputProps={{ endAdornment: ( handleClickShowPassword(selector)} edge="end" disabled={!credentials[selector]?.value} > {isVisible ? : } ), }} /> ); })} ); }; const renderScrapeListLimitFields = () => { if (scrapeListLimits.length === 0) return null; return ( <> {t('List Limits')} {scrapeListLimits.map((limitInfo, index) => ( { const value = parseInt(e.target.value, 10); if (value >= 1) { handleLimitChange( limitInfo.pairIndex, limitInfo.actionIndex, limitInfo.argIndex, value ); } }} inputProps={{ min: 1 }} style={{ marginBottom: '20px' }} /> ))} ); }; const handleSave = async () => { if (!robot) return; try { const credentialsForPayload = Object.entries(credentials).reduce((acc, [selector, info]) => { const enforceType = info.type === 'password' ? 'password' : 'text'; acc[selector] = { value: info.value, type: enforceType }; return acc; }, {} as Record); const lastPair = robot.recording.workflow[robot.recording.workflow.length - 1]; const targetUrl = lastPair?.what.find(action => action.action === "goto")?.args?.[0]; const payload = { name: robot.recording_meta.name, limits: scrapeListLimits.map(limit => ({ pairIndex: limit.pairIndex, actionIndex: limit.actionIndex, argIndex: limit.argIndex, limit: limit.currentLimit })), credentials: credentialsForPayload, targetUrl: targetUrl, }; const success = await updateRecording(robot.recording_meta.id, payload); if (success) { setRerenderRobots(true); notify('success', t('robot_edit.notifications.update_success')); handleStart(robot); handleClose(); } else { notify('error', t('robot_edit.notifications.update_failed')); } } catch (error) { notify('error', t('robot_edit.notifications.update_error')); console.error('Error updating robot:', error); } }; const lastPair = robot?.recording.workflow[robot?.recording.workflow.length - 1]; const targetUrl = lastPair?.what.find(action => action.action === "goto")?.args?.[0]; return ( <> {t('robot_edit.title')} {robot && ( <> handleRobotNameChange(e.target.value)} style={{ marginBottom: '20px' }} /> handleTargetUrlChange(e.target.value)} style={{ marginBottom: '20px' }} /> {renderScrapeListLimitFields()} {(Object.keys(credentials).length > 0) && ( <> {t('Input Texts')} {renderAllCredentialFields()} )} )} ); }; ================================================ FILE: legacy/src/RobotSettings.tsx ================================================ import React, { useState, useEffect } from 'react'; import { useTranslation } from 'react-i18next'; import { GenericModal } from "../ui/GenericModal"; import { TextField, Typography, Box } from "@mui/material"; import { useGlobalInfoStore } from '../../context/globalInfo'; import { getStoredRecording } from '../../api/storage'; import { WhereWhatPair } from 'maxun-core'; import { getUserById } from "../../api/auth"; interface RobotMeta { name: string; id: string; createdAt: string; pairs: number; updatedAt: string; params: any[]; } interface RobotWorkflow { workflow: WhereWhatPair[]; } interface ScheduleConfig { runEvery: number; runEveryUnit: 'MINUTES' | 'HOURS' | 'DAYS' | 'WEEKS' | 'MONTHS'; startFrom: 'SUNDAY' | 'MONDAY' | 'TUESDAY' | 'WEDNESDAY' | 'THURSDAY' | 'FRIDAY' | 'SATURDAY'; atTimeStart?: string; atTimeEnd?: string; timezone: string; lastRunAt?: Date; nextRunAt?: Date; cronExpression?: string; } export interface RobotSettings { id: string; userId?: number; recording_meta: RobotMeta; recording: RobotWorkflow; google_sheet_email?: string | null; google_sheet_name?: string | null; google_sheet_id?: string | null; google_access_token?: string | null; google_refresh_token?: string | null; schedule?: ScheduleConfig | null; } interface RobotSettingsProps { isOpen: boolean; handleStart: (settings: RobotSettings) => void; handleClose: () => void; initialSettings?: RobotSettings | null; } export const RobotSettingsModal = ({ isOpen, handleStart, handleClose, initialSettings }: RobotSettingsProps) => { const { t } = useTranslation(); const [userEmail, setUserEmail] = useState(null); const [robot, setRobot] = useState(null); const { recordingId, notify } = useGlobalInfoStore(); useEffect(() => { if (isOpen) { getRobot(); } }, [isOpen]); const getRobot = async () => { if (recordingId) { const robot = await getStoredRecording(recordingId); setRobot(robot); } else { notify('error', t('robot_settings.errors.robot_not_found')); } } const lastPair = robot?.recording.workflow[robot?.recording.workflow.length - 1]; // Find the `goto` action in `what` and retrieve its arguments const targetUrl = lastPair?.what.find(action => action.action === "goto")?.args?.[0]; useEffect(() => { const fetchUserEmail = async () => { if (robot && robot.userId) { const userData = await getUserById(robot.userId.toString()); if (userData && userData.user) { setUserEmail(userData.user.email); } } }; fetchUserEmail(); }, [robot?.userId]); return ( <> {t('robot_settings.title')} { robot && ( <> {robot.recording.workflow?.[0]?.what?.[0]?.args?.[0]?.limit !== undefined && ( )} ) } ); }; export const modalStyle = { top: "50%", left: "50%", transform: "translate(-50%, -50%)", width: "30%", backgroundColor: "background.paper", p: 4, height: "fit-content", display: "block", padding: "20px", }; ================================================ FILE: legacy/src/ScheduleSettings.tsx ================================================ import React, { useState, useEffect } from 'react'; import { useTranslation } from 'react-i18next'; import { GenericModal } from "../ui/GenericModal"; import { MenuItem, TextField, Typography, Box } from "@mui/material"; import { Dropdown } from "../ui/DropdownMui"; import Button from "@mui/material/Button"; import { validMomentTimezones } from '../../constants/const'; import { useGlobalInfoStore } from '../../context/globalInfo'; import { getSchedule, deleteSchedule } from '../../api/storage'; interface ScheduleSettingsProps { isOpen: boolean; handleStart: (settings: ScheduleSettings) => Promise; handleClose: () => void; initialSettings?: ScheduleSettings | null; } export interface ScheduleSettings { runEvery: number; runEveryUnit: string; startFrom: string; dayOfMonth?: string; atTimeStart?: string; atTimeEnd?: string; timezone: string; } export const ScheduleSettingsModal = ({ isOpen, handleStart, handleClose, initialSettings }: ScheduleSettingsProps) => { const { t } = useTranslation(); const [schedule, setSchedule] = useState(null); const [settings, setSettings] = useState({ runEvery: 1, runEveryUnit: 'HOURS', startFrom: 'MONDAY', dayOfMonth: '1', atTimeStart: '00:00', atTimeEnd: '01:00', timezone: 'UTC' }); useEffect(() => { if (initialSettings) { setSettings(initialSettings); } }, [initialSettings]); const handleChange = (field: keyof ScheduleSettings, value: string | number | boolean) => { setSettings(prev => ({ ...prev, [field]: value })); }; const textStyle = { width: '150px', height: '52px', marginRight: '10px', }; const dropDownStyle = { marginTop: '2px', width: '150px', height: '59px', marginRight: '10px', }; const units = [ 'MINUTES', 'HOURS', 'DAYS', 'WEEKS', 'MONTHS' ]; const days = [ 'MONDAY', 'TUESDAY', 'WEDNESDAY', 'THURSDAY', 'FRIDAY', 'SATURDAY', 'SUNDAY' ]; const { recordingId, notify } = useGlobalInfoStore(); const deleteRobotSchedule = () => { if (recordingId) { deleteSchedule(recordingId); setSchedule(null); notify('success', t('Schedule deleted successfully')); } else { console.error('No recording id provided'); } setSettings({ runEvery: 1, runEveryUnit: 'HOURS', startFrom: 'MONDAY', dayOfMonth: '', atTimeStart: '00:00', atTimeEnd: '01:00', timezone: 'UTC' }); }; const getRobotSchedule = async () => { if (recordingId) { const scheduleData = await getSchedule(recordingId); setSchedule(scheduleData); } else { console.error('No recording id provided'); } } useEffect(() => { if (isOpen) { const fetchSchedule = async () => { await getRobotSchedule(); }; fetchSchedule(); } }, [isOpen]); const getDayOrdinal = (day: string | undefined) => { if (!day) return ''; const lastDigit = day.slice(-1); const lastTwoDigits = day.slice(-2); // Special cases for 11, 12, 13 if (['11', '12', '13'].includes(lastTwoDigits)) { return t('schedule_settings.labels.on_day.th'); } // Other cases switch (lastDigit) { case '1': return t('schedule_settings.labels.on_day.st'); case '2': return t('schedule_settings.labels.on_day.nd'); case '3': return t('schedule_settings.labels.on_day.rd'); default: return t('schedule_settings.labels.on_day.th'); } }; return ( *': { marginBottom: '20px' }, }}> {t('schedule_settings.title')} <> {schedule !== null ? ( <> {t('schedule_settings.run_every')}: {schedule.runEvery} {schedule.runEveryUnit.toLowerCase()} {['MONTHS', 'WEEKS'].includes(settings.runEveryUnit) ? t('schedule_settings.start_from') : t('schedule_settings.start_from')}: {schedule.startFrom.charAt(0).toUpperCase() + schedule.startFrom.slice(1).toLowerCase()} {schedule.runEveryUnit === 'MONTHS' && ( {t('schedule_settings.on_day')}: {schedule.dayOfMonth}{getDayOrdinal(schedule.dayOfMonth)} of the month )} {t('schedule_settings.at_around')}: {schedule.atTimeStart}, {schedule.timezone} {t('schedule_settings.timezone')} ) : ( <> {t('schedule_settings.labels.run_once_every')} handleChange('runEvery', parseInt(e.target.value))} sx={textStyle} inputProps={{ min: 1 }} /> handleChange('runEveryUnit', e.target.value)} sx={dropDownStyle} > {units.map((unit) => ( {unit.charAt(0).toUpperCase() + unit.slice(1).toLowerCase()} ))} {['MONTHS', 'WEEKS'].includes(settings.runEveryUnit) ? t('schedule_settings.labels.start_from_label') : t('schedule_settings.labels.start_from_label')} handleChange('startFrom', e.target.value)} sx={dropDownStyle} > {days.map((day) => ( {day.charAt(0).toUpperCase() + day.slice(1).toLowerCase()} ))} {settings.runEveryUnit === 'MONTHS' && ( {t('schedule_settings.labels.on_day_of_month')} handleChange('dayOfMonth', e.target.value)} sx={textStyle} inputProps={{ min: 1, max: 31 }} /> )} {['MINUTES', 'HOURS'].includes(settings.runEveryUnit) ? ( {t('schedule_settings.labels.in_between')} handleChange('atTimeStart', e.target.value)} sx={textStyle} /> handleChange('atTimeEnd', e.target.value)} sx={textStyle} /> ) : ( {t('schedule_settings.at_around')} handleChange('atTimeStart', e.target.value)} sx={textStyle} /> )} {t('schedule_settings.timezone')} handleChange('timezone', e.target.value)} sx={dropDownStyle} > {validMomentTimezones.map((tz) => ( {tz.charAt(0).toUpperCase() + tz.slice(1).toLowerCase()} ))} )} ); }; const modalStyle = { top: '50%', left: '50%', transform: 'translate(-50%, -50%)', width: '40%', backgroundColor: 'background.paper', p: 4, height: 'fit-content', display: 'block', padding: '20px', }; ================================================ FILE: legacy/src/coordinateMapper.ts ================================================ import { BROWSER_DEFAULT_HEIGHT, BROWSER_DEFAULT_WIDTH } from "../constants/const"; import { getResponsiveDimensions } from "./dimensionUtils"; export class CoordinateMapper { private canvasWidth: number; private canvasHeight: number; private browserWidth: number; private browserHeight: number; private lastBrowserRect: { left: number, top: number, right: number, bottom: number } | null = null; private lastCanvasRect: DOMRect | null = null; constructor() { const dimensions = getResponsiveDimensions(); this.canvasWidth = dimensions.canvasWidth; this.canvasHeight = dimensions.canvasHeight; this.browserWidth = BROWSER_DEFAULT_WIDTH; this.browserHeight = BROWSER_DEFAULT_HEIGHT; } mapCanvasToBrowser(coord: { x: number, y: number }): { x: number, y: number } { return { x: (coord.x / this.canvasWidth) * this.browserWidth, y: (coord.y / this.canvasHeight) * this.browserHeight }; } mapBrowserToCanvas(coord: { x: number, y: number }): { x: number, y: number } { return { x: (coord.x / this.browserWidth) * this.canvasWidth, y: (coord.y / this.browserHeight) * this.canvasHeight }; } mapBrowserRectToCanvas(rect: DOMRect): DOMRect { if (this.lastBrowserRect && this.lastBrowserRect.left === rect.left && this.lastBrowserRect.top === rect.top && this.lastBrowserRect.right === rect.right && this.lastBrowserRect.bottom === rect.bottom) { return this.lastCanvasRect!; } const topLeft = this.mapBrowserToCanvas({ x: rect.left, y: rect.top }); const bottomRight = this.mapBrowserToCanvas({ x: rect.right, y: rect.bottom }); const width = bottomRight.x - topLeft.x; const height = bottomRight.y - topLeft.y; const result = new DOMRect( topLeft.x, topLeft.y, width, height ); this.lastBrowserRect = { left: rect.left, top: rect.top, right: rect.right, bottom: rect.bottom }; this.lastCanvasRect = result; return result; } mapCanvasRectToBrowser(rect: DOMRect): DOMRect { const topLeft = this.mapCanvasToBrowser({ x: rect.left, y: rect.top }); const bottomRight = this.mapCanvasToBrowser({ x: rect.right, y: rect.bottom }); const width = bottomRight.x - topLeft.x; const height = bottomRight.y - topLeft.y; return new DOMRect( topLeft.x, topLeft.y, width, height ); } updateDimensions(canvasWidth?: number, canvasHeight?: number, browserWidth?: number, browserHeight?: number) { if (canvasWidth) this.canvasWidth = canvasWidth; if (canvasHeight) this.canvasHeight = canvasHeight; if (browserWidth) this.browserWidth = browserWidth; if (browserHeight) this.browserHeight = browserHeight; this.lastBrowserRect = null; this.lastCanvasRect = null; } } export const coordinateMapper = new CoordinateMapper(); ================================================ FILE: legacy/src/inputHelpers.ts ================================================ import { ONE_PERCENT_OF_VIEWPORT_H, ONE_PERCENT_OF_VIEWPORT_W, } from "../constants/const"; import { Coordinates } from '../components/recorder/Canvas'; export const throttle = (callback: any, limit: number) => { let wait = false; return (...args: any[]) => { if (!wait) { callback(...args); wait = true; setTimeout(function () { wait = false; }, limit); } } } export const getMappedCoordinates = ( event: MouseEvent, canvas: HTMLCanvasElement | null, browserWidth: number, browserHeight: number, ): Coordinates => { const clientCoordinates = getCoordinates(event, canvas); const mappedX = mapPixelFromSmallerToLarger( browserWidth / 100, ONE_PERCENT_OF_VIEWPORT_W, clientCoordinates.x, ); const mappedY = mapPixelFromSmallerToLarger( browserHeight / 100, ONE_PERCENT_OF_VIEWPORT_H, clientCoordinates.y, ); return { x: mappedX, y: mappedY }; }; const getCoordinates = (event: MouseEvent, canvas: HTMLCanvasElement | null): Coordinates => { if (!canvas) { return { x: 0, y: 0 }; } return { x: event.pageX - canvas.offsetLeft, y: event.pageY - canvas.offsetTop }; }; export const mapRect = ( rect: DOMRect, browserWidth: number, browserHeight: number, ) => { const mappedX = mapPixelFromSmallerToLarger( browserWidth / 100, ONE_PERCENT_OF_VIEWPORT_W, rect.x, ); const mappedLeft = mapPixelFromSmallerToLarger( browserWidth / 100, ONE_PERCENT_OF_VIEWPORT_W, rect.left, ); const mappedRight = mapPixelFromSmallerToLarger( browserWidth / 100, ONE_PERCENT_OF_VIEWPORT_W, rect.right, ); const mappedWidth = mapPixelFromSmallerToLarger( browserWidth / 100, ONE_PERCENT_OF_VIEWPORT_W, rect.width, ); const mappedY = mapPixelFromSmallerToLarger( browserHeight / 100, ONE_PERCENT_OF_VIEWPORT_H, rect.y, ); const mappedTop = mapPixelFromSmallerToLarger( browserHeight / 100, ONE_PERCENT_OF_VIEWPORT_H, rect.top, ); const mappedBottom = mapPixelFromSmallerToLarger( browserHeight / 100, ONE_PERCENT_OF_VIEWPORT_H, rect.bottom, ); const mappedHeight = mapPixelFromSmallerToLarger( browserHeight / 100, ONE_PERCENT_OF_VIEWPORT_H, rect.height, ); console.log('Mapped:', { x: mappedX, y: mappedY, width: mappedWidth, height: mappedHeight, top: mappedTop, right: mappedRight, bottom: mappedBottom, left: mappedLeft, }) return { x: mappedX, y: mappedY, width: mappedWidth, height: mappedHeight, top: mappedTop, right: mappedRight, bottom: mappedBottom, left: mappedLeft, }; }; const mapPixelFromSmallerToLarger = ( onePercentOfSmallerScreen: number, onePercentOfLargerScreen: number, pixel: number ): number => { const xPercentOfScreen = pixel / onePercentOfSmallerScreen; return xPercentOfScreen * onePercentOfLargerScreen; }; const mapPixelFromLargerToSmaller = ( onePercentOfSmallerScreen: number, onePercentOfLargerScreen: number, pixel: number ): number => { const xPercentOfScreen = pixel / onePercentOfLargerScreen; return Math.round(xPercentOfScreen * onePercentOfSmallerScreen); }; ================================================ FILE: maxun-core/.gitignore ================================================ # dependencies /node_modules # misc .DS_Store .env.local .env.development.local .env.test.local .env.production.local .env /.idea /server/logs /build package-lock.json ================================================ FILE: maxun-core/README.md ================================================ ### Maxun-Core ================================================ FILE: maxun-core/package.json ================================================ { "name": "maxun-core", "version": "0.0.32", "description": "Core package for Maxun, responsible for data extraction", "main": "build/index.js", "typings": "build/index.d.ts", "scripts": { "test": "jest", "build": "npm run clean && tsc", "lint": "eslint .", "clean": "rimraf ./build" }, "files": [ "build/*" ], "keywords": [ "maxun", "no-code scraping", "web", "automation", "workflow", "data extraction", "scraping", "web scraper", "web scraping", "data scraping", "no-code web scraper", "no-code web scraping" ], "author": "Maxun", "license": "AGPL-3.0-or-later", "dependencies": { "@types/node": "22.7.9", "cross-fetch": "^4.0.0", "joi": "^17.6.0", "playwright-core": "1.57.0", "turndown": "^7.2.2" } } ================================================ FILE: maxun-core/src/browserSide/scraper.js ================================================ /* eslint-disable @typescript-eslint/no-unused-vars */ const area = (element) => element.offsetHeight * element.offsetWidth; function getBiggestElement(selector) { const elements = Array.from(document.querySelectorAll(selector)); const biggest = elements.reduce( (max, elem) => ( area(elem) > area(max) ? elem : max), { offsetHeight: 0, offsetWidth: 0 }, ); return biggest; } /** * Generates structural selector (describing element by its DOM tree location). * * **The generated selector is not guaranteed to be unique!** (In fact, this is * the desired behaviour in here.) * @param {HTMLElement} element Element being described. * @returns {string} CSS-compliant selector describing the element's location in the DOM tree. */ function GetSelectorStructural(element) { // Base conditions for the recursive approach. if (element.tagName === 'BODY') { return 'BODY'; } const selector = element.tagName; if (element.parentElement) { return `${GetSelectorStructural(element.parentElement)} > ${selector}`; } return selector; } /** * Heuristic method to find collections of "interesting" items on the page. * @returns {Array} A collection of interesting DOM nodes * (online store products, plane tickets, list items... and many more?) */ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, metricType = 'size_deviation') { const restoreScroll = (() => { const { scrollX, scrollY } = window; return () => { window.scrollTo(scrollX, scrollY); }; })(); /** * @typedef {Array<{x: number, y: number}>} Grid */ /** * Returns an array of grid-aligned {x,y} points. * @param {number} [granularity=0.005] sets the number of generated points * (the higher the granularity, the more points). * @returns {Grid} Array of {x, y} objects. */ function getGrid(startX = 0, startY = 0, granularity = 0.005) { const width = window.innerWidth; const height = window.innerHeight; const out = []; for (let x = 0; x < width; x += 1 / granularity) { for (let y = 0; y < height; y += 1 / granularity) { out.push({ x: startX + x, y: startY + y }); } } return out; } let maxSelector = { selector: 'body', metric: 0 }; const updateMaximumWithPoint = (point) => { const currentElement = document.elementFromPoint(point.x, point.y); const selector = GetSelectorStructural(currentElement); const elements = Array.from(document.querySelectorAll(selector)) .filter((element) => area(element) > minArea); // If the current selector targets less than three elements, // we consider it not interesting (would be a very underwhelming scraper) if (elements.length < 3) { return; } let metric = null; if (metricType === 'total_area') { metric = elements .reduce((p, x) => p + area(x), 0); } else if (metricType === 'size_deviation') { // This could use a proper "statistics" approach... but meh, so far so good! const sizes = elements .map((element) => area(element)); metric = (1 - (Math.max(...sizes) - Math.min(...sizes)) / Math.max(...sizes)); } if (metric > maxSelector.metric && elements.length < maxCountPerPage) { maxSelector = { selector, metric }; } }; for (let scroll = 0; scroll < scrolls; scroll += 1) { window.scrollTo(0, scroll * window.innerHeight); const grid = getGrid(); grid.forEach(updateMaximumWithPoint); } restoreScroll(); let out = Array.from(document.querySelectorAll(maxSelector.selector)); const different = (x, i, a) => a.findIndex((e) => e === x) === i; // as long as we don't merge any two elements by substituing them for their parents, // we substitute. while (out.map((x) => x.parentElement).every(different) && out.forEach((x) => x.parentElement !== null)) { out = out.map((x) => x.parentElement ?? x); } return out; } /** * Returns a "scrape" result from the current page. * @returns {Array} *Curated* array of scraped information (with sparse rows removed) */ // Wrap the entire function in an IIFE (Immediately Invoked Function Expression) // and attach it to the window object (function (window) { /** * Returns a "scrape" result from the current page. * @returns {Array} *Curated* array of scraped information (with sparse rows removed) */ window.scrape = function (selector = null) { /** * **crudeRecords** contains uncurated rundowns of "scrapable" elements * @type {Array} */ const crudeRecords = (selector ? Array.from(document.querySelectorAll(selector)) : scrapableHeuristics()) .map((record) => ({ ...Array.from(record.querySelectorAll('img')) .reduce((p, x, i) => { let url = null; if (x.srcset) { const urls = x.srcset.split(', '); [url] = urls[urls.length - 1].split(' '); } /** * Contains the largest elements from `srcset` - if `srcset` is not present, contains * URL from the `src` attribute * * If the `src` attribute contains a data url, imgUrl contains `undefined`. */ let imgUrl; if (x.srcset) { imgUrl = url; } else if (x.src.indexOf('data:') === -1) { imgUrl = x.src; } return ({ ...p, ...(imgUrl ? { [`img_${i}`]: imgUrl } : {}), }); }, {}), ...record.innerText.split('\n') .reduce((p, x, i) => ({ ...p, [`record_${String(i).padStart(4, '0')}`]: x.trim(), }), {}), })); return crudeRecords; }; /** * TODO: Simplify. * Given an object with named lists of elements, * groups the elements by their distance in the DOM tree. * @param {Object.} lists The named lists of HTML elements. * @returns {Array.>} */ window.scrapeSchema = function(lists) { // Utility functions remain the same function omap(object, f, kf = (x) => x) { return Object.fromEntries( Object.entries(object) .map(([k, v]) => [kf(k), f(v)]), ); } function ofilter(object, f) { return Object.fromEntries( Object.entries(object) .filter(([k, v]) => f(k, v)), ); } function findAllElements(config) { // Regular DOM query if no special delimiters if (!config.selector.includes('>>') && !config.selector.includes(':>>')) { return Array.from(document.querySelectorAll(config.selector)); } if (config.selector.includes(':>>')) { const parts = config.selector.split(':>>').map(s => s.trim()); let currentElements = [document]; // Traverse through each part of the selector for (let i = 0; i < parts.length; i++) { const part = parts[i]; const nextElements = []; const isLast = i === parts.length - 1; for (const element of currentElements) { try { const doc = element.contentDocument || element || element.contentWindow?.document; if (!doc) continue; if (part.startsWith('frame[name=') || part.startsWith('iframe[name=')) { const nameMatch = part.match(/\[name=['"]([^'"]+)['"]\]/); if (nameMatch && nameMatch[1]) { const frameName = nameMatch[1]; let foundFrames = []; if (doc.getElementsByName && typeof doc.getElementsByName === 'function') { foundFrames = Array.from(doc.getElementsByName(frameName)) .filter(el => el.tagName === 'FRAME' || el.tagName === 'IFRAME'); } if (foundFrames.length === 0) { const framesBySelector = Array.from(doc.querySelectorAll(`frame[name="${frameName}"], iframe[name="${frameName}"]`)); foundFrames = framesBySelector; } if (isLast) { nextElements.push(...foundFrames); } else { nextElements.push(...foundFrames); } continue; } } const found = Array.from(doc.querySelectorAll(part)); if (isLast) { nextElements.push(...found); } else { const frames = found.filter(el => el.tagName === 'IFRAME' || el.tagName === 'FRAME'); nextElements.push(...frames); } } catch (error) { console.warn('Cannot access iframe/frame content:', error, { part, element, index: i }); } } if (nextElements.length === 0) { console.warn('No elements found for part:', part, 'at depth:', i); return []; } currentElements = nextElements; } return currentElements; } // Handle shadow DOM traversal if (config.selector.includes('>>')) { const parts = config.selector.split('>>').map(s => s.trim()); let currentElements = [document]; for (const part of parts) { const nextElements = []; for (const element of currentElements) { // Try regular DOM first const found = Array.from(element.querySelectorAll(part)); // Then check shadow roots for (const foundEl of found) { if (foundEl.shadowRoot) { nextElements.push(foundEl.shadowRoot); } else { nextElements.push(foundEl); } } } currentElements = nextElements; } return currentElements.filter(el => !(el instanceof ShadowRoot)); } return []; } function getElementValue(element, attribute) { if (!element) return null; let baseURL; try { baseURL = element.ownerDocument?.location?.href || element.ownerDocument?.baseURI || window.location.origin; } catch (e) { baseURL = window.location.origin; } switch (attribute) { case 'href': { const relativeHref = element.getAttribute('href'); return relativeHref ? new URL(relativeHref, baseURL).href : null; } case 'src': { const relativeSrc = element.getAttribute('src'); return relativeSrc ? new URL(relativeSrc, baseURL).href : null; } case 'innerText': return element.innerText?.trim(); case 'textContent': return element.textContent?.trim(); case 'innerHTML': return element.innerHTML; case 'outerHTML': return element.outerHTML; default: return element.getAttribute(attribute) || element.innerText?.trim(); } } // Rest of the functions remain largely the same function getSeedKey(listObj) { const maxLength = Math.max(...Object.values( omap(listObj, (x) => findAllElements(x).length) )); return Object.keys( ofilter(listObj, (_, v) => findAllElements(v).length === maxLength) )[0]; } // Find minimal bounding elements function getMBEs(elements) { return elements.map((element) => { let candidate = element; const isUniqueChild = (e) => elements .filter((elem) => { // Handle both iframe and shadow DOM boundaries const sameContext = elem.getRootNode() === e.getRootNode() && elem.ownerDocument === e.ownerDocument; return sameContext && e.parentNode?.contains(elem); }) .length === 1; while (candidate && isUniqueChild(candidate)) { candidate = candidate.parentNode; } return candidate; }); } const seedName = getSeedKey(lists); const seedElements = findAllElements(lists[seedName]); const MBEs = getMBEs(seedElements); const mbeResults = MBEs.map((mbe) => omap( lists, (config) => { const elem = findAllElements(config) .find((elem) => mbe.contains(elem)); return elem ? getElementValue(elem, config.attribute) : undefined; }, (key) => key )) || []; // If MBE approach didn't find all elements, try independent scraping if (mbeResults.some(result => Object.values(result).some(v => v === undefined))) { // Fall back to independent scraping const results = []; const foundElements = new Map(); // Find all elements for each selector Object.entries(lists).forEach(([key, config]) => { const elements = findAllElements(config); foundElements.set(key, elements); }); // Create result objects for each found element foundElements.forEach((elements, key) => { elements.forEach((element, index) => { if (!results[index]) { results[index] = {}; } results[index][key] = getElementValue(element, lists[key].attribute); }); }); return results.filter(result => Object.keys(result).length > 0); } return mbeResults; }; /** * Scrapes multiple lists of similar items based on a template item. * @param {Object} config - Configuration object * @param {string} config.listSelector - Selector for the list container(s) * @param {Object.} config.fields - Fields to scrape * @param {number} [config.limit] - Maximum number of items to scrape per list (optional) * @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ window.scrapeList = async function ({ listSelector, fields, limit = 10 }) { // XPath evaluation functions const queryInsideContext = (context, part) => { try { const { tagName, conditions } = parseXPathPart(part); const candidateElements = Array.from(context.querySelectorAll(tagName)); if (candidateElements.length === 0) { return []; } const matchingElements = candidateElements.filter((el) => { return elementMatchesConditions(el, conditions); }); return matchingElements; } catch (err) { console.error("Error in queryInsideContext:", err); return []; } }; // Helper function to parse XPath part const parseXPathPart = (part) => { const tagMatch = part.match(/^([a-zA-Z0-9-]+)/); const tagName = tagMatch ? tagMatch[1] : "*"; const conditionMatches = part.match(/\[([^\]]+)\]/g); const conditions = conditionMatches ? conditionMatches.map((c) => c.slice(1, -1)) : []; return { tagName, conditions }; }; // Helper function to check if element matches all conditions const elementMatchesConditions = (element, conditions) => { for (const condition of conditions) { if (!elementMatchesCondition(element, condition)) { return false; } } return true; }; // Helper function to check if element matches a single condition const elementMatchesCondition = (element, condition) => { condition = condition.trim(); if (/^\d+$/.test(condition)) { return true; } // Handle @attribute="value" const attrMatch = condition.match(/^@([^=]+)=["']([^"']+)["']$/); if (attrMatch) { const [, attr, value] = attrMatch; const elementValue = element.getAttribute(attr); return elementValue === value; } // Handle contains(@class, 'value') const classContainsMatch = condition.match( /^contains\(@class,\s*["']([^"']+)["']\)$/ ); if (classContainsMatch) { const className = classContainsMatch[1]; return element.classList.contains(className); } // Handle contains(@attribute, 'value') const attrContainsMatch = condition.match( /^contains\(@([^,]+),\s*["']([^"']+)["']\)$/ ); if (attrContainsMatch) { const [, attr, value] = attrContainsMatch; const elementValue = element.getAttribute(attr) || ""; return elementValue.includes(value); } // Handle text()="value" const textMatch = condition.match(/^text\(\)=["']([^"']+)["']$/); if (textMatch) { const expectedText = textMatch[1]; const elementText = element.textContent?.trim() || ""; return elementText === expectedText; } // Handle contains(text(), 'value') const textContainsMatch = condition.match( /^contains\(text\(\),\s*["']([^"']+)["']\)$/ ); if (textContainsMatch) { const expectedText = textContainsMatch[1]; const elementText = element.textContent?.trim() || ""; return elementText.includes(expectedText); } // Handle count(*)=0 (element has no children) if (condition === "count(*)=0") { return element.children.length === 0; } // Handle other count conditions const countMatch = condition.match(/^count\(\*\)=(\d+)$/); if (countMatch) { const expectedCount = parseInt(countMatch[1]); return element.children.length === expectedCount; } return true; }; const evaluateXPath = (document, xpath, isShadow = false) => { try { if (!document || !xpath) { console.warn('Invalid document or xpath provided to evaluateXPath'); return null; } const result = document.evaluate( xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue; if (!isShadow) { if (result === null) { return null; } return result; } let cleanPath = xpath; let isIndexed = false; const indexedMatch = xpath.match(/^\((.*?)\)\[(\d+)\](.*)$/); if (indexedMatch) { cleanPath = indexedMatch[1] + indexedMatch[3]; isIndexed = true; } const pathParts = cleanPath .replace(/^\/\//, "") .split("/") .map((p) => p.trim()) .filter((p) => p.length > 0); let currentContexts = [document]; for (let i = 0; i < pathParts.length; i++) { const part = pathParts[i]; const nextContexts = []; for (const ctx of currentContexts) { const positionalMatch = part.match(/^([^[]+)\[(\d+)\]$/); let partWithoutPosition = part; let requestedPosition = null; if (positionalMatch) { partWithoutPosition = positionalMatch[1]; requestedPosition = parseInt(positionalMatch[2]); } const matched = queryInsideContext(ctx, partWithoutPosition); let elementsToAdd = matched; if (requestedPosition !== null) { const index = requestedPosition - 1; // XPath is 1-based, arrays are 0-based if (index >= 0 && index < matched.length) { elementsToAdd = [matched[index]]; } else { console.warn( `Position ${requestedPosition} out of range (${matched.length} elements found)` ); elementsToAdd = []; } } elementsToAdd.forEach((el) => { nextContexts.push(el); if (el.shadowRoot) { nextContexts.push(el.shadowRoot); } }); } if (nextContexts.length === 0) { return null; } currentContexts = nextContexts; } if (currentContexts.length > 0) { if (isIndexed && indexedMatch) { const requestedIndex = parseInt(indexedMatch[2]) - 1; if (requestedIndex >= 0 && requestedIndex < currentContexts.length) { return currentContexts[requestedIndex]; } else { console.warn( `Requested index ${requestedIndex + 1} out of range (${currentContexts.length} elements found)` ); return null; } } return currentContexts[0]; } return null; } catch (err) { console.error("Critical XPath failure:", xpath, err); // Return null instead of throwing to prevent crashes return null; } }; const evaluateXPathAll = (rootElement, xpath) => { try { const ownerDoc = rootElement.nodeType === Node.DOCUMENT_NODE ? rootElement : rootElement.ownerDocument; if (!ownerDoc) return []; const result = ownerDoc.evaluate( xpath, rootElement, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ); const elements = []; for (let i = 0; i < result.snapshotLength; i++) { const node = result.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { elements.push(node); } } return elements; } catch (error) { console.warn("XPath evaluation failed:", xpath, error); return []; } }; // Helper function to detect selector type const isXPathSelector = (selector) => { return ( selector.startsWith("//") || selector.startsWith("/") || selector.startsWith("./") ); }; // Enhanced query function to handle iframe, frame, shadow DOM, CSS selectors, and XPath const queryElement = (rootElement, selector) => { if (!selector.includes(">>") && !selector.includes(":>>")) { // Check if it's an XPath selector if (isXPathSelector(selector)) { return evaluateXPath(rootElement, selector); } else { return rootElement.querySelector(selector); } } const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim()); let currentElement = rootElement; for (let i = 0; i < parts.length; i++) { if (!currentElement) return null; // Handle iframe and frame traversal with enhanced safety if ( currentElement.tagName === "IFRAME" || currentElement.tagName === "FRAME" ) { try { // Check if frame is accessible if (!currentElement.contentDocument && !currentElement.contentWindow) { console.warn('Frame is not accessible (cross-origin or unloaded)'); return null; } const frameDoc = currentElement.contentDocument || currentElement.contentWindow?.document; if (!frameDoc) { console.warn('Frame document is not available'); return null; } if (isXPathSelector(parts[i])) { currentElement = evaluateXPath(frameDoc, parts[i]); } else { currentElement = frameDoc.querySelector(parts[i]); } continue; } catch (e) { console.warn( `Cannot access ${currentElement.tagName.toLowerCase()} content:`, e ); return null; } } let nextElement = null; // Try regular DOM first if ("querySelector" in currentElement) { if (isXPathSelector(parts[i])) { nextElement = evaluateXPath(currentElement, parts[i]); } else { nextElement = currentElement.querySelector(parts[i]); } } // Try shadow DOM if not found if ( !nextElement && "shadowRoot" in currentElement && currentElement.shadowRoot ) { if (isXPathSelector(parts[i])) { nextElement = evaluateXPath(currentElement.shadowRoot, parts[i]); } else { nextElement = currentElement.shadowRoot.querySelector(parts[i]); } } // Check children's shadow roots if still not found if (!nextElement && "children" in currentElement) { const children = Array.from(currentElement.children || []); for (const child of children) { if (child.shadowRoot) { if (isXPathSelector(parts[i])) { nextElement = evaluateXPath(child.shadowRoot, parts[i]); } else { nextElement = child.shadowRoot.querySelector(parts[i]); } if (nextElement) break; } } } currentElement = nextElement; } return currentElement; }; // Enhanced query all function for both contexts const queryElementAll = (rootElement, selector) => { if (!selector.includes(">>") && !selector.includes(":>>")) { if (isXPathSelector(selector)) { return evaluateXPathAll(rootElement, selector); } else { return Array.from(rootElement.querySelectorAll(selector)); } } const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim()); let currentElements = [rootElement]; for (const part of parts) { const nextElements = []; for (const element of currentElements) { // Handle iframe and frame traversal if (element.tagName === "IFRAME" || element.tagName === "FRAME") { try { const frameDoc = element.contentDocument || element.contentWindow.document; if (frameDoc) { if (isXPathSelector(part)) { nextElements.push(...evaluateXPathAll(frameDoc, part)); } else { nextElements.push( ...Array.from(frameDoc.querySelectorAll(part)) ); } } } catch (e) { console.warn( `Cannot access ${element.tagName.toLowerCase()} content:`, e ); continue; } } else { // Regular DOM elements if (element.querySelectorAll) { if (isXPathSelector(part)) { nextElements.push(...evaluateXPathAll(element, part)); } else { nextElements.push( ...Array.from(element.querySelectorAll(part)) ); } } // Shadow DOM elements if (element.shadowRoot) { if (isXPathSelector(part)) { nextElements.push( ...evaluateXPathAll(element.shadowRoot, part) ); } else { nextElements.push( ...Array.from(element.shadowRoot.querySelectorAll(part)) ); } } // Check children's shadow roots const children = Array.from(element.children || []); for (const child of children) { if (child.shadowRoot) { if (isXPathSelector(part)) { nextElements.push( ...evaluateXPathAll(child.shadowRoot, part) ); } else { nextElements.push( ...Array.from(child.shadowRoot.querySelectorAll(part)) ); } } } } } currentElements = nextElements; } return currentElements; }; // Enhanced value extraction with context awareness const extractValue = (element, attribute) => { if (!element) return null; // Get context-aware base URL const baseURL = element.ownerDocument?.location?.href || window.location.origin; // Check shadow root first if (element.shadowRoot) { const shadowContent = element.shadowRoot.textContent; if (shadowContent?.trim()) { return shadowContent.trim(); } } if (attribute === "innerText") { // First try standard innerText/textContent let textContent = element.innerText?.trim() || element.textContent?.trim(); // If empty, check for common data attributes that might contain the text if (!textContent) { const dataAttributes = [ "data-600", "data-text", "data-label", "data-value", "data-content", ]; for (const attr of dataAttributes) { const dataValue = element.getAttribute(attr); if (dataValue && dataValue.trim()) { textContent = dataValue.trim(); break; } } } return textContent || null; } else if (attribute === "innerHTML") { return element.innerHTML?.trim() || null; } else if (attribute === "src" || attribute === "href") { if (attribute === "href" && element.tagName !== "A") { const parentElement = element.parentElement; if (parentElement && parentElement.tagName === "A") { const parentHref = parentElement.getAttribute("href"); if (parentHref) { try { return new URL(parentHref, baseURL).href; } catch (e) { return parentHref; } } } } const attrValue = element.getAttribute(attribute); const dataAttr = attrValue || element.getAttribute("data-" + attribute); if (!dataAttr || dataAttr.trim() === "") { if (attribute === "src") { const style = window.getComputedStyle(element); const bgImage = style.backgroundImage; if (bgImage && bgImage !== "none") { const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/); return matches ? new URL(matches[1], baseURL).href : null; } } return null; } try { return new URL(dataAttr, baseURL).href; } catch (e) { console.warn("Error creating URL from", dataAttr, e); return dataAttr; } } return element.getAttribute(attribute); }; // Enhanced table ancestor finding with context support const findTableAncestor = (element) => { let currentElement = element; const MAX_DEPTH = 5; let depth = 0; while (currentElement && depth < MAX_DEPTH) { // Handle shadow DOM if (currentElement.getRootNode() instanceof ShadowRoot) { currentElement = currentElement.getRootNode().host; continue; } if (currentElement.tagName === "TD") { return { type: "TD", element: currentElement }; } else if (currentElement.tagName === "TR") { return { type: "TR", element: currentElement }; } // Handle iframe and frame crossing if ( currentElement.tagName === "IFRAME" || currentElement.tagName === "FRAME" ) { try { currentElement = currentElement.contentDocument.body; } catch (e) { return null; } } else { currentElement = currentElement.parentElement; } depth++; } return null; }; // Helper function to get cell index const getCellIndex = (td) => { if (td.getRootNode() instanceof ShadowRoot) { const shadowRoot = td.getRootNode(); const allCells = Array.from(shadowRoot.querySelectorAll("td")); return allCells.indexOf(td); } let index = 0; let sibling = td; while ((sibling = sibling.previousElementSibling)) { index++; } return index; }; // Helper function to check for TH elements const hasThElement = (row, tableFields) => { for (const [_, { selector }] of Object.entries(tableFields)) { const element = queryElement(row, selector); if (element) { let current = element; while (current && current !== row) { if (current.getRootNode() instanceof ShadowRoot) { current = current.getRootNode().host; continue; } if (current.tagName === "TH") return true; if (current.tagName === "IFRAME" || current.tagName === "FRAME") { try { current = current.contentDocument.body; } catch (e) { break; } } else { current = current.parentElement; } } } } return false; }; // Helper function to filter rows const filterRowsBasedOnTag = (rows, tableFields) => { for (const row of rows) { if (hasThElement(row, tableFields)) { return rows; } } return rows.filter((row) => { const directTH = row.getElementsByTagName("TH").length === 0; const shadowTH = row.shadowRoot ? row.shadowRoot.querySelector("th") === null : true; return directTH && shadowTH; }); }; // Class similarity comparison functions const calculateClassSimilarity = (classList1, classList2) => { const set1 = new Set(classList1); const set2 = new Set(classList2); const intersection = new Set([...set1].filter((x) => set2.has(x))); const union = new Set([...set1, ...set2]); return intersection.size / union.size; }; // Enhanced similar elements finding with context support const findSimilarElements = (baseElement, similarityThreshold = 0.7) => { const baseClasses = Array.from(baseElement.classList); if (baseClasses.length === 0) return []; const allElements = []; // Get elements from main document allElements.push(...document.getElementsByTagName(baseElement.tagName)); // Get elements from shadow DOM if (baseElement.getRootNode() instanceof ShadowRoot) { const shadowHost = baseElement.getRootNode().host; allElements.push( ...shadowHost.getElementsByTagName(baseElement.tagName) ); } // Get elements from iframes and frames const frames = [ ...Array.from(document.getElementsByTagName("iframe")), ...Array.from(document.getElementsByTagName("frame")), ]; for (const frame of frames) { try { const frameDoc = frame.contentDocument || frame.contentWindow.document; allElements.push( ...frameDoc.getElementsByTagName(baseElement.tagName) ); } catch (e) { console.warn( `Cannot access ${frame.tagName.toLowerCase()} content:`, e ); } } return allElements.filter((element) => { if (element === baseElement) return false; const similarity = calculateClassSimilarity( baseClasses, Array.from(element.classList) ); return similarity >= similarityThreshold; }); }; const tryFallbackSelector = (rootElement, originalSelector) => { let element = queryElement(rootElement, originalSelector); if (!element && originalSelector.includes("nth-child")) { const match = originalSelector.match(/nth-child\((\d+)\)/); if (match) { const position = parseInt(match[1], 10); for (let i = position - 1; i >= 1; i--) { const fallbackSelector = originalSelector.replace( /nth-child\(\d+\)/, `nth-child(${i})` ); element = queryElement(rootElement, fallbackSelector); if (element) break; } if (!element) { const baseSelector = originalSelector.replace( /\:nth-child\(\d+\)/, "" ); element = queryElement(rootElement, baseSelector); } } } return element; }; // Create indexed XPath for specific container instance const createIndexedXPath = ( childSelector, listSelector, containerIndex ) => { // Check if the child selector contains the list selector pattern if (childSelector.includes(listSelector.replace("//", ""))) { // Replace the list selector part with indexed version const listPattern = listSelector.replace("//", ""); const indexedListSelector = `(${listSelector})[${containerIndex}]`; const indexedSelector = childSelector.replace( `//${listPattern}`, indexedListSelector ); return indexedSelector; } else { // If pattern doesn't match, create a more generic indexed selector return `(${listSelector})[${containerIndex}]${childSelector.replace( "//", "/" )}`; } }; // Main scraping logic with unified support for both CSS and XPath console.log("🚀 Starting unified list data extraction"); console.log("List Selector:", listSelector); console.log("Fields:", fields); let containers = queryElementAll(document, listSelector); containers = Array.from(containers); if (containers.length === 0) { console.warn("❌ No containers found for listSelector:", listSelector); return []; } console.log(`📦 Found ${containers.length} list containers`); // For CSS selectors, try to find similar containers if needed if ( !isXPathSelector(listSelector) && limit > 1 && containers.length === 1 ) { const baseContainer = containers[0]; const similarContainers = findSimilarElements(baseContainer); if (similarContainers.length > 0) { const newContainers = similarContainers.filter( (container) => !container.matches(listSelector) ); containers = [...containers, ...newContainers]; } } const containerFields = containers.map(() => ({ tableFields: {}, nonTableFields: {}, })); // For XPath selectors, use the new approach if (isXPathSelector(listSelector)) { const extractedData = []; const containersToProcess = Math.min(containers.length, limit); for ( let containerIndex = 0; containerIndex < containersToProcess; containerIndex++ ) { const record = {}; for (const [label, field] of Object.entries(fields)) { let element = null; if (isXPathSelector(field.selector)) { // Create indexed absolute XPath const indexedSelector = createIndexedXPath( field.selector, listSelector, containerIndex + 1 ); element = evaluateXPath(document, indexedSelector, field.isShadow); } else { // Fallback for CSS selectors within XPath containers const container = containers[containerIndex]; element = queryElement(container, field.selector); } if (element) { const value = extractValue(element, field.attribute); if (value !== null && value !== "") { record[label] = value; } else { record[label] = ""; } } else { record[label] = ""; } } if (Object.values(record).some((value) => value !== "")) { extractedData.push(record); } } console.log(`📊 Total records extracted: ${extractedData.length}`); return extractedData; } // For CSS selectors, use the original table-aware approach containers.forEach((container, containerIndex) => { for (const [label, field] of Object.entries(fields)) { const sampleElement = queryElement(container, field.selector); if (sampleElement) { const ancestor = findTableAncestor(sampleElement); if (ancestor) { containerFields[containerIndex].tableFields[label] = { ...field, tableContext: ancestor.type, cellIndex: ancestor.type === "TD" ? getCellIndex(ancestor.element) : -1, }; } else { containerFields[containerIndex].nonTableFields[label] = field; } } else { containerFields[containerIndex].nonTableFields[label] = field; } } }); const tableData = []; const nonTableData = []; // Process table data with support for iframes, frames, and shadow DOM for ( let containerIndex = 0; containerIndex < containers.length; containerIndex++ ) { const container = containers[containerIndex]; const { tableFields } = containerFields[containerIndex]; if (Object.keys(tableFields).length > 0) { const firstField = Object.values(tableFields)[0]; const firstElement = queryElement(container, firstField.selector); let tableContext = firstElement; // Find table context including iframe, frame and shadow DOM while ( tableContext && tableContext.tagName !== "TABLE" && tableContext !== container ) { if (tableContext.getRootNode() instanceof ShadowRoot) { tableContext = tableContext.getRootNode().host; continue; } if ( tableContext.tagName === "IFRAME" || tableContext.tagName === "FRAME" ) { try { tableContext = tableContext.contentDocument.body; } catch (e) { break; } } else { tableContext = tableContext.parentElement; } } if (tableContext) { // Get rows from all contexts const rows = []; // Get rows from regular DOM rows.push(...tableContext.getElementsByTagName("TR")); // Get rows from shadow DOM if (tableContext.shadowRoot) { rows.push(...tableContext.shadowRoot.getElementsByTagName("TR")); } // Get rows from iframes and frames if ( tableContext.tagName === "IFRAME" || tableContext.tagName === "FRAME" ) { try { const frameDoc = tableContext.contentDocument || tableContext.contentWindow.document; rows.push(...frameDoc.getElementsByTagName("TR")); } catch (e) { console.warn( `Cannot access ${tableContext.tagName.toLowerCase()} rows:`, e ); } } const processedRows = filterRowsBasedOnTag(rows, tableFields); for ( let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++ ) { const record = {}; const currentRow = processedRows[rowIndex]; for (const [ label, { selector, attribute, cellIndex }, ] of Object.entries(tableFields)) { let element = null; if (cellIndex >= 0) { // Get TD element considering both contexts let td = currentRow.children[cellIndex]; // Check shadow DOM for td if (!td && currentRow.shadowRoot) { const shadowCells = currentRow.shadowRoot.children; if (shadowCells && shadowCells.length > cellIndex) { td = shadowCells[cellIndex]; } } if (td) { element = queryElement(td, selector); if ( !element && selector .split(/(?:>>|:>>)/) .pop() .includes("td:nth-child") ) { element = td; } if (!element) { const tagOnlySelector = selector.split(".")[0]; element = queryElement(td, tagOnlySelector); } if (!element) { let currentElement = td; while ( currentElement && currentElement.children.length > 0 ) { let foundContentChild = false; for (const child of currentElement.children) { if (extractValue(child, attribute)) { currentElement = child; foundContentChild = true; break; } } if (!foundContentChild) break; } element = currentElement; } } } else { element = queryElement(currentRow, selector); } if (element) { record[label] = extractValue(element, attribute); } } if (Object.keys(record).length > 0) { tableData.push(record); } } } } } // Process non-table data with all contexts support for ( let containerIndex = 0; containerIndex < containers.length; containerIndex++ ) { if (nonTableData.length >= limit) break; const container = containers[containerIndex]; const { nonTableFields } = containerFields[containerIndex]; if (Object.keys(nonTableFields).length > 0) { const record = {}; for (const [label, { selector, attribute }] of Object.entries( nonTableFields )) { // Get the last part of the selector after any context delimiter const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0]; const element = tryFallbackSelector(container, relativeSelector); if (element) { record[label] = extractValue(element, attribute); } } if (Object.keys(record).length > 0) { nonTableData.push(record); } } } // Merge and limit the results const scrapedData = [...tableData, ...nonTableData]; console.log(`📊 Total records extracted: ${scrapedData.length}`); return scrapedData; }; /** * Gets all children of the elements matching the listSelector, * returning their CSS selectors and innerText. * @param {string} listSelector - Selector for the list container(s) * @returns {Array.} Array of objects, each containing the CSS selector and innerText of the children */ window.scrapeListAuto = function (listSelector) { const lists = Array.from(document.querySelectorAll(listSelector)); const results = []; lists.forEach(list => { const children = Array.from(list.children); children.forEach(child => { const selectors = []; let element = child; // Traverse up to gather the CSS selector for the element while (element && element !== document) { let selector = element.nodeName.toLowerCase(); if (element.id) { selector += `#${element.id}`; selectors.push(selector); break; } else { const className = element.className.trim().split(/\s+/).join('.'); if (className) { selector += `.${className}`; } selectors.push(selector); element = element.parentElement; } } results.push({ selector: selectors.reverse().join(' > '), innerText: child.innerText.trim() }); }); }); return results; }; })(window); ================================================ FILE: maxun-core/src/index.ts ================================================ import Interpreter from './interpret'; export default Interpreter; export { default as Preprocessor } from './preprocessor'; export type { WorkflowFile, WhereWhatPair, Where, What, } from './types/workflow'; export { unaryOperators, naryOperators, meta as metaOperators } from './types/logic'; ================================================ FILE: maxun-core/src/interpret.ts ================================================ /* eslint-disable no-await-in-loop, no-restricted-syntax */ import { ElementHandle, Page, PageScreenshotOptions } from 'playwright-core'; import fetch from 'cross-fetch'; import path from 'path'; import { EventEmitter } from 'events'; import { Where, What, PageState, Workflow, WorkflowFile, ParamType, SelectorArray, CustomFunctions, } from './types/workflow'; import { operators, meta } from './types/logic'; import { arrayToObject } from './utils/utils'; import Concurrency from './utils/concurrency'; import Preprocessor from './preprocessor'; import log, { Level } from './utils/logger'; /** * Extending the Window interface for custom scraping functions. */ declare global { interface Window { scrape: (selector: string | null) => Record[]; scrapeSchema: ( schema: Record ) => Record; scrapeList: (config: { listSelector: string; fields: any; limit?: number; pagination: any }) => Record[]; scrapeListAuto: (listSelector: string) => { selector: string; innerText: string }[]; scrollDown: (pages?: number) => void; scrollUp: (pages?: number) => void; } } /** * Defines optional intepreter options (passed in constructor) */ interface InterpreterOptions { mode?: string; maxRepeats: number; maxConcurrency: number; serializableCallback: (output: any) => (void | Promise); binaryCallback: (output: any, mimeType: string) => (void | Promise); debug: boolean; debugChannel: Partial<{ activeId: (id: number) => void, debugMessage: (msg: string) => void, setActionType: (type: string) => void, incrementScrapeListIndex: () => void, progressUpdate: (current: number, total: number, percentage: number) => void, }> } /** * Class for running the Smart Workflows. */ export default class Interpreter extends EventEmitter { private workflow: Workflow; private initializedWorkflow: Workflow | null; private options: InterpreterOptions; private concurrency: Concurrency; private stopper: Function | null = null; private isAborted: boolean = false; private log: typeof log; // private blocker: PlaywrightBlocker | null = null; private cumulativeResults: Record[] = []; private namedResults: Record> = {}; private screenshotCounter: number = 0; private serializableDataByType: Record> = { scrapeList: {}, scrapeSchema: {}, crawl: {}, search: {} }; private scrapeListCounter: number = 0; private totalActions: number = 0; private executedActions: number = 0; constructor(workflow: WorkflowFile, options?: Partial) { super(); this.workflow = workflow.workflow; this.initializedWorkflow = null; this.options = { maxRepeats: 5, maxConcurrency: 5, serializableCallback: (data) => { log(JSON.stringify(data), Level.WARN); }, binaryCallback: () => { log('Received binary data, thrashing them.', Level.WARN); }, debug: false, debugChannel: {}, ...options, }; this.concurrency = new Concurrency(this.options.maxConcurrency); this.log = (...args) => log(...args); const error = Preprocessor.validateWorkflow(workflow); if (error) { throw (error); } if (this.options.debugChannel?.debugMessage) { const oldLog = this.log; // @ts-ignore this.log = (...args: Parameters) => { if (args[1] !== Level.LOG) { this.options.debugChannel.debugMessage!(typeof args[0] === 'string' ? args[0] : args[0].message); } oldLog(...args); }; } // PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']).then(blocker => { // this.blocker = blocker; // }).catch(err => { // this.log(`Failed to initialize ad-blocker: ${err.message}`, Level.ERROR); // // Continue without ad-blocker rather than crashing // this.blocker = null; // }) } /** * Sets the abort flag to immediately stop all operations */ public abort(): void { this.isAborted = true; } /** * Returns the current abort status */ public getIsAborted(): boolean { return this.isAborted; } // private getSelectors(workflow: Workflow, actionId: number): string[] { // const selectors: string[] = []; // // Validate actionId // if (actionId <= 0) { // console.log("No previous selectors to collect."); // return selectors; // Empty array as there are no previous steps // } // // Iterate from the start up to (but not including) actionId // for (let index = 0; index < actionId; index++) { // const currentSelectors = workflow[index]?.where?.selectors; // console.log(`Selectors at step ${index}:`, currentSelectors); // if (currentSelectors && currentSelectors.length > 0) { // currentSelectors.forEach((selector) => { // if (!selectors.includes(selector)) { // selectors.push(selector); // Avoid duplicates // } // }); // } // } // console.log("Collected Selectors:", selectors); // return selectors; // } private getSelectors(workflow: Workflow): string[] { const selectorsSet = new Set(); if (workflow.length === 0) { return []; } for (let index = workflow.length - 1; index >= 0; index--) { const currentSelectors = workflow[index]?.where?.selectors; if (currentSelectors && currentSelectors.length > 0) { currentSelectors.forEach((selector) => selectorsSet.add(selector)); return Array.from(selectorsSet); } } return []; } /** * Returns the context object from given Page and the current workflow.\ * \ * `workflow` is used for selector extraction - function searches for used selectors to * look for later in the page's context. * @param page Playwright Page object * @param workflow Current **initialized** workflow (array of where-what pairs). * @returns {PageState} State of the current page. */ private async getState(page: Page, workflowCopy: Workflow, selectors: string[]): Promise { /** * All the selectors present in the current Workflow */ // const selectors = Preprocessor.extractSelectors(workflow); // console.log("Current selectors:", selectors); /** * Determines whether the element targetted by the selector is [actionable](https://playwright.dev/docs/actionability). * @param selector Selector to be queried * @returns True if the targetted element is actionable, false otherwise. */ // const actionable = async (selector: string): Promise => { // try { // const proms = [ // page.isEnabled(selector, { timeout: 10000 }), // page.isVisible(selector, { timeout: 10000 }), // ]; // return await Promise.all(proms).then((bools) => bools.every((x) => x)); // } catch (e) { // // log(e, Level.ERROR); // return false; // } // }; /** * Object of selectors present in the current page. */ // const presentSelectors: SelectorArray = await Promise.all( // selectors.map(async (selector) => { // if (await actionable(selector)) { // return [selector]; // } // return []; // }), // ).then((x) => x.flat()); const presentSelectors: SelectorArray = await Promise.all( selectors.map(async (selector) => { try { await page.waitForSelector(selector, { state: 'attached' }); return [selector]; } catch (e) { return []; } }), ).then((x) => x.flat()); const action = workflowCopy[workflowCopy.length - 1]; // console.log("Next action:", action) let url: any = page.url(); if (action && action.where.url !== url && action.where.url !== "about:blank") { url = action.where.url; } return { url, cookies: (await page.context().cookies([page.url()])) .reduce((p, cookie) => ( { ...p, [cookie.name]: cookie.value, }), {}), selectors: presentSelectors, }; } /** * Tests if the given action is applicable with the given context. * @param where Tested *where* condition * @param context Current browser context. * @returns True if `where` is applicable in the given context, false otherwise */ private applicable(where: Where, context: PageState, usedActions: string[] = []): boolean { /** * Given two arbitrary objects, determines whether `subset` is a subset of `superset`.\ * \ * For every key in `subset`, there must be a corresponding key with equal scalar * value in `superset`, or `inclusive(subset[key], superset[key])` must hold. * @param subset Arbitrary non-cyclic JS object (where clause) * @param superset Arbitrary non-cyclic JS object (browser context) * @returns `true` if `subset <= superset`, `false` otherwise. */ const inclusive = (subset: Record, superset: Record) : boolean => ( Object.entries(subset).every( ([key, value]) => { /** * Arrays are compared without order (are transformed into objects before comparison). */ const parsedValue = Array.isArray(value) ? arrayToObject(value) : value; const parsedSuperset: Record = {}; parsedSuperset[key] = Array.isArray(superset[key]) ? arrayToObject(superset[key]) : superset[key]; if ((key === 'url' || key === 'selectors') && Array.isArray(value) && Array.isArray(superset[key]) && value.length === 0 && (superset[key] as any[]).length === 0) { return true; } if (key === 'selectors' && Array.isArray(value) && Array.isArray(superset[key])) { return value.some(selector => (superset[key] as any[]).includes(selector) ); } // Every `subset` key must exist in the `superset` and // have the same value (strict equality), or subset[key] <= superset[key] return parsedSuperset[key] && ( (parsedSuperset[key] === parsedValue) || ((parsedValue).constructor.name === 'RegExp' && (parsedValue).test(parsedSuperset[key])) || ( (parsedValue).constructor.name !== 'RegExp' && typeof parsedValue === 'object' && inclusive(parsedValue, parsedSuperset[key]) ) ); }, ) ); // Every value in the "where" object should be compliant to the current state. return Object.entries(where).every( ([key, value]) => { if (operators.includes(key)) { const array = Array.isArray(value) ? value as Where[] : Object.entries(value).map((a) => Object.fromEntries([a])); // every condition is treated as a single context switch (key as keyof typeof operators) { case '$and' as keyof typeof operators: return array?.every((x) => this.applicable(x, context)); case '$or' as keyof typeof operators: return array?.some((x) => this.applicable(x, context)); case '$not' as keyof typeof operators: return !this.applicable(value, context); // $not should be a unary operator default: throw new Error('Undefined logic operator.'); } } else if (meta.includes(key)) { const testRegexString = (x: string) => { if (typeof value === 'string') { return x === value; } return (value).test(x); }; switch (key as keyof typeof meta) { case '$before' as keyof typeof meta: return !usedActions.find(testRegexString); case '$after' as keyof typeof meta: return !!usedActions.find(testRegexString); default: throw new Error('Undefined meta operator.'); } } else { // Current key is a base condition (url, cookies, selectors) return inclusive({ [key]: value }, context); } }, ); } /** * Given a Playwright's page object and a "declarative" list of actions, this function * calls all mentioned functions on the Page object.\ * \ * Manipulates the iterator indexes (experimental feature, likely to be removed in * the following versions of maxun-core) * @param page Playwright Page object * @param steps Array of actions. */ private async carryOutSteps(page: Page, steps: What[]): Promise { if (this.isAborted) { this.log('Workflow aborted, stopping execution', Level.WARN); return; } /** * Defines overloaded (or added) methods/actions usable in the workflow. * If a method overloads any existing method of the Page class, it accepts the same set * of parameters *(but can override some!)*\ * \ * Also, following piece of code defines functions to be run in the browser's context. * Beware of false linter errors - here, we know better! */ const wawActions: Record void> = { screenshot: async ( params: PageScreenshotOptions, nameOverride?: string ) => { if (this.options.debugChannel?.setActionType) { this.options.debugChannel.setActionType("screenshot"); } const screenshotBuffer = await page.screenshot({ ...params, path: undefined, }); const explicitName = (typeof nameOverride === 'string' && nameOverride.trim().length > 0) ? nameOverride.trim() : null; let screenshotName: string; if (explicitName) { screenshotName = explicitName; } else { this.screenshotCounter += 1; screenshotName = `Screenshot ${this.screenshotCounter}`; } await this.options.binaryCallback( { name: screenshotName, data: screenshotBuffer, mimeType: "image/png", }, "image/png" ); }, enqueueLinks: async (selector: string) => { if (this.options.debugChannel?.setActionType) { this.options.debugChannel.setActionType('enqueueLinks'); } const links: string[] = await page.locator(selector) .evaluateAll( // @ts-ignore (elements) => elements.map((a) => a.href).filter((x) => x), ); const context = page.context(); for (const link of links) { // eslint-disable-next-line this.concurrency.addJob(async () => { let newPage = null; try { newPage = await context.newPage(); await newPage.goto(link); await newPage.waitForLoadState('networkidle'); await this.runLoop(newPage, this.initializedWorkflow!); } catch (e) { // `runLoop` uses soft mode, so it recovers from it's own exceptions // but newPage(), goto() and waitForLoadState() don't (and will kill // the interpreter by throwing). this.log(e, Level.ERROR); } finally { if (newPage && !newPage.isClosed()) { try { await newPage.close(); } catch (closeError) { this.log('Failed to close enqueued page', Level.WARN); } } } }); } await page.close(); }, scrape: async (selector?: string) => { if (this.options.debugChannel?.setActionType) { this.options.debugChannel.setActionType('scrape'); } await this.ensureScriptsLoaded(page); const scrapeResults: Record[] = await page.evaluate((s) => window.scrape(s ?? null), selector); await this.options.serializableCallback(scrapeResults); }, scrapeSchema: async (schema: Record, actionName: string = "") => { if (this.isAborted) { this.log('Workflow aborted, stopping scrapeSchema', Level.WARN); return; } if (this.options.debugChannel?.setActionType) { this.options.debugChannel.setActionType('scrapeSchema'); } if (this.options.mode && this.options.mode === 'editor') { await this.options.serializableCallback({}); return; } await this.ensureScriptsLoaded(page); const normalizedSchema = Object.fromEntries( Object.entries(schema).map(([key, value]) => [ key, typeof value === 'string' ? { selector: value, tag: '', attribute: 'innerText', shadow: '' } : value, ]) ); const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), normalizedSchema); if (!this.cumulativeResults || !Array.isArray(this.cumulativeResults)) { this.cumulativeResults = []; } const resultToProcess = Array.isArray(scrapeResult) ? scrapeResult[0] : scrapeResult; if (this.cumulativeResults.length === 0) { const newRow = {}; Object.entries(resultToProcess).forEach(([key, value]) => { if (value !== undefined) { newRow[key] = value; } }); this.cumulativeResults.push(newRow); } else { const lastRow = this.cumulativeResults[this.cumulativeResults.length - 1]; const newResultKeys = Object.keys(resultToProcess).filter(key => resultToProcess[key] !== undefined); const hasRepeatedKeys = newResultKeys.some(key => lastRow.hasOwnProperty(key)); if (hasRepeatedKeys) { const newRow = {}; Object.entries(resultToProcess).forEach(([key, value]) => { if (value !== undefined) { newRow[key] = value; } }); this.cumulativeResults.push(newRow); } else { Object.entries(resultToProcess).forEach(([key, value]) => { if (value !== undefined) { lastRow[key] = value; } }); } } const actionType = "scrapeSchema"; const name = actionName || "Texts"; if (!this.namedResults[actionType]) this.namedResults[actionType] = {}; this.namedResults[actionType][name] = this.cumulativeResults; if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {}; if (!this.serializableDataByType[actionType][name]) { this.serializableDataByType[actionType][name] = []; } this.serializableDataByType[actionType][name] = [...this.cumulativeResults]; await this.options.serializableCallback({ scrapeList: this.serializableDataByType.scrapeList, scrapeSchema: this.serializableDataByType.scrapeSchema, crawl: this.serializableDataByType.crawl || {}, search: this.serializableDataByType.search || {} }); }, scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }, actionName: string = "") => { if (this.isAborted) { this.log('Workflow aborted, stopping scrapeList', Level.WARN); return; } if (this.options.debugChannel?.setActionType) { this.options.debugChannel.setActionType('scrapeList'); } if (this.options.mode && this.options.mode === 'editor') { await this.options.serializableCallback({}); return; } try { await this.ensureScriptsLoaded(page); if (this.options.debugChannel?.incrementScrapeListIndex) { this.options.debugChannel.incrementScrapeListIndex(); } let scrapeResults = []; let paginationUsed = false; if (!config.pagination) { scrapeResults = await page.evaluate((cfg) => { try { return window.scrapeList(cfg); } catch (error) { console.warn('ScrapeList evaluation failed:', error.message); return []; } }, config); } else { paginationUsed = true; scrapeResults = await this.handlePagination(page, config, actionName); } if (!Array.isArray(scrapeResults)) { scrapeResults = []; } console.log(`ScrapeList completed with ${scrapeResults.length} results`); if (!paginationUsed) { const actionType = "scrapeList"; let name = actionName || ""; if (!name || name.trim() === "") { this.scrapeListCounter++; name = `List ${this.scrapeListCounter}`; } if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {}; if (!this.serializableDataByType[actionType][name]) { this.serializableDataByType[actionType][name] = []; } this.serializableDataByType[actionType][name].push(...scrapeResults); await this.options.serializableCallback({ scrapeList: this.serializableDataByType.scrapeList, scrapeSchema: this.serializableDataByType.scrapeSchema }); } } catch (error) { console.error('ScrapeList action failed completely:', error.message); const actionType = "scrapeList"; let name = actionName || ""; if (!name || name.trim() === "") { this.scrapeListCounter++; name = `List ${this.scrapeListCounter}`; } if (!this.namedResults[actionType]) this.namedResults[actionType] = {}; this.namedResults[actionType][name] = []; if (!this.serializableDataByType[actionType]) this.serializableDataByType[actionType] = {}; this.serializableDataByType[actionType][name] = []; await this.options.serializableCallback({ scrapeList: this.serializableDataByType.scrapeList, scrapeSchema: this.serializableDataByType.scrapeSchema }); } }, scrapeListAuto: async (config: { listSelector: string }) => { if (this.options.debugChannel?.setActionType) { this.options.debugChannel.setActionType('scrapeListAuto'); } await this.ensureScriptsLoaded(page); const scrapeResults: { selector: string, innerText: string }[] = await page.evaluate((listSelector) => { return window.scrapeListAuto(listSelector); }, config.listSelector); await this.options.serializableCallback(scrapeResults); }, scroll: async (pages?: number) => { if (this.options.debugChannel?.setActionType) { this.options.debugChannel.setActionType('scroll'); } await page.evaluate(async (pagesInternal) => { for (let i = 1; i <= (pagesInternal ?? 1); i += 1) { // @ts-ignore window.scrollTo(0, window.scrollY + window.innerHeight); } }, pages ?? 1); }, script: async (code: string) => { if (this.options.debugChannel?.setActionType) { this.options.debugChannel.setActionType('script'); } try { const AsyncFunction: FunctionConstructor = Object.getPrototypeOf( async () => { }, ).constructor; const x = new AsyncFunction('page', 'log', code); await x(page, this.log); } catch (error) { this.log(`Script execution failed: ${error.message}`, Level.ERROR); throw new Error(`Script execution error: ${error.message}`); } }, crawl: async (crawlConfig: { mode: 'domain' | 'subdomain' | 'path'; limit: number; maxDepth: number; includePaths: string[]; excludePaths: string[]; useSitemap: boolean; followLinks: boolean; respectRobots: boolean; }) => { if (this.isAborted) { this.log('Workflow aborted, stopping crawl', Level.WARN); return; } if (this.options.debugChannel?.setActionType) { this.options.debugChannel.setActionType('crawl'); } this.log('Starting crawl operation', Level.LOG); try { const currentUrl = page.url(); this.log(`Current page URL: ${currentUrl}`, Level.LOG); if (!currentUrl || currentUrl === 'about:blank' || currentUrl === '') { this.log('Page not yet navigated, waiting for navigation...', Level.WARN); await page.waitForLoadState('load', { timeout: 10000 }).catch(() => {}); } const baseUrl = page.url(); this.log(`Using base URL for crawl: ${baseUrl}`, Level.LOG); const parsedBase = new URL(baseUrl); const baseDomain = parsedBase.hostname; interface RobotRules { disallowedPaths: string[]; allowedPaths: string[]; crawlDelay: number | null; } let robotRules: RobotRules = { disallowedPaths: [], allowedPaths: [], crawlDelay: null }; if (crawlConfig.respectRobots) { this.log('Fetching robots.txt...', Level.LOG); try { const robotsUrl = `${parsedBase.protocol}//${parsedBase.host}/robots.txt`; const robotsContent = await page.evaluate((url) => { return new Promise((resolve) => { const xhr = new XMLHttpRequest(); xhr.open('GET', url, true); xhr.onload = function() { if (xhr.status === 200) { resolve(xhr.responseText); } else { resolve(''); } }; xhr.onerror = function() { resolve(''); }; xhr.send(); }); }, robotsUrl); if (robotsContent) { const lines = robotsContent.split('\n'); let isRelevantUserAgent = false; let foundSpecificUserAgent = false; for (const line of lines) { const trimmedLine = line.trim().toLowerCase(); if (trimmedLine.startsWith('#') || trimmedLine === '') { continue; } const colonIndex = line.indexOf(':'); if (colonIndex === -1) continue; const directive = line.substring(0, colonIndex).trim().toLowerCase(); const value = line.substring(colonIndex + 1).trim(); if (directive === 'user-agent') { const agent = value.toLowerCase(); if (agent === '*' && !foundSpecificUserAgent) { isRelevantUserAgent = true; } else if (agent.includes('bot') || agent.includes('crawler') || agent.includes('spider')) { isRelevantUserAgent = true; foundSpecificUserAgent = true; } else { if (!foundSpecificUserAgent) { isRelevantUserAgent = false; } } } else if (isRelevantUserAgent) { if (directive === 'disallow' && value) { robotRules.disallowedPaths.push(value); } else if (directive === 'allow' && value) { robotRules.allowedPaths.push(value); } else if (directive === 'crawl-delay' && value) { const delay = parseFloat(value); if (!isNaN(delay) && delay > 0) { robotRules.crawlDelay = delay * 1000; } } } } this.log(`Robots.txt parsed: ${robotRules.disallowedPaths.length} disallowed paths, ${robotRules.allowedPaths.length} allowed paths, crawl-delay: ${robotRules.crawlDelay || 'none'}`, Level.LOG); } else { this.log('No robots.txt found or not accessible, proceeding without restrictions', Level.WARN); } } catch (error) { this.log(`Failed to fetch robots.txt: ${error.message}, proceeding without restrictions`, Level.WARN); } } const isUrlAllowedByRobots = (url: string): boolean => { if (!crawlConfig.respectRobots) return true; try { const urlObj = new URL(url); const pathname = urlObj.pathname; for (const allowedPath of robotRules.allowedPaths) { if (allowedPath === pathname || pathname.startsWith(allowedPath)) { return true; } if (allowedPath.includes('*')) { const regex = new RegExp('^' + allowedPath.replace(/\*/g, '.*').replace(/\?/g, '.') + '$'); if (regex.test(pathname)) { return true; } } } for (const disallowedPath of robotRules.disallowedPaths) { if (disallowedPath === '/') { return false; } if (pathname.startsWith(disallowedPath)) { return false; } if (disallowedPath.includes('*')) { const regex = new RegExp('^' + disallowedPath.replace(/\*/g, '.*').replace(/\?/g, '.') + '$'); if (regex.test(pathname)) { return false; } } if (disallowedPath.endsWith('$')) { const pattern = disallowedPath.slice(0, -1); if (pathname === pattern || pathname.endsWith(pattern)) { return false; } } } return true; } catch (error) { return true; } }; const isUrlAllowedByConfig = (url: string): boolean => { try { const urlObj = new URL(url); if (crawlConfig.mode === 'domain') { if (urlObj.hostname !== baseDomain) return false; } else if (crawlConfig.mode === 'subdomain') { if (!urlObj.hostname.endsWith(baseDomain) && urlObj.hostname !== baseDomain) return false; } else if (crawlConfig.mode === 'path') { if (urlObj.hostname !== baseDomain || !urlObj.pathname.startsWith(parsedBase.pathname)) return false; } if (crawlConfig.includePaths && crawlConfig.includePaths.length > 0) { const matches = crawlConfig.includePaths.some(pattern => { try { const regex = new RegExp(pattern); return regex.test(url); } catch { return url.includes(pattern); } }); if (!matches) return false; } if (crawlConfig.excludePaths && crawlConfig.excludePaths.length > 0) { const matches = crawlConfig.excludePaths.some(pattern => { try { const regex = new RegExp(pattern); return regex.test(url); } catch { return url.includes(pattern); } }); if (matches) return false; } return true; } catch (error) { return false; } }; const normalizeUrl = (url: string): string => { return url.replace(/#.*$/, '').replace(/\/$/, ''); }; const extractLinksFromPage = async (): Promise => { try { await page.waitForLoadState('load', { timeout: 15000 }).catch(() => {}); await page.waitForLoadState('networkidle', { timeout: 10000 }).catch(() => {}); await new Promise(resolve => setTimeout(resolve, 1000)); const pageLinks = await page.evaluate(() => { const links: string[] = []; const allAnchors = document.querySelectorAll('a'); for (let i = 0; i < allAnchors.length; i++) { const anchor = allAnchors[i] as HTMLAnchorElement; const fullHref = anchor.href; if (fullHref && (fullHref.startsWith('http://') || fullHref.startsWith('https://'))) { links.push(fullHref); } } return links; }); return pageLinks; } catch (error) { this.log(`Link extraction failed: ${error.message}`, Level.WARN); return []; } }; const scrapePageContent = async (url: string) => { const pageData = await page.evaluate(() => { const getMeta = (name: string) => { const meta = document.querySelector(`meta[name="${name}"], meta[property="${name}"]`); return meta?.getAttribute('content') || ''; }; const getAllMeta = () => { const metadata: Record = {}; const metaTags = document.querySelectorAll('meta'); metaTags.forEach(tag => { const name = tag.getAttribute('name') || tag.getAttribute('property'); const content = tag.getAttribute('content'); if (name && content) { metadata[name] = content; } }); return metadata; }; const title = document.title || ''; const bodyText = document.body?.innerText || ''; const elementsWithMxId = document.querySelectorAll('[data-mx-id]'); elementsWithMxId.forEach(el => el.removeAttribute('data-mx-id')); const html = document.documentElement.outerHTML; const links = Array.from(document.querySelectorAll('a')).map(a => a.href); const allMetadata = getAllMeta(); return { title, description: getMeta('description'), text: bodyText, html: html, links: links, wordCount: bodyText.split(/\s+/).filter(w => w.length > 0).length, metadata: { ...allMetadata, title, language: document.documentElement.lang || '', favicon: (document.querySelector('link[rel="icon"], link[rel="shortcut icon"]') as HTMLLinkElement)?.href || '', statusCode: 200 } }; }); return { metadata: { ...pageData.metadata, url: url, sourceURL: url } as Record, html: pageData.html, text: pageData.text, links: pageData.links, wordCount: pageData.wordCount, scrapedAt: new Date().toISOString() }; }; const visitedUrls = new Set(); const crawlResults: any[] = []; interface CrawlQueueItem { url: string; depth: number; } const crawlQueue: CrawlQueueItem[] = []; const normalizedBaseUrl = normalizeUrl(baseUrl); visitedUrls.add(normalizedBaseUrl); crawlQueue.push({ url: baseUrl, depth: 0 }); this.log(`Starting breadth-first crawl with maxDepth: ${crawlConfig.maxDepth}, limit: ${crawlConfig.limit}`, Level.LOG); if (crawlConfig.useSitemap) { this.log('Fetching sitemap URLs...', Level.LOG); try { const sitemapUrl = `${parsedBase.protocol}//${parsedBase.host}/sitemap.xml`; const sitemapUrls = await page.evaluate((url) => { return new Promise((resolve) => { const xhr = new XMLHttpRequest(); xhr.open('GET', url, true); xhr.onload = function() { if (xhr.status === 200) { const text = xhr.responseText; const locMatches = text.match(/(.*?)<\/loc>/g) || []; const urls = locMatches.map(match => match.replace(/<\/?loc>/g, '')); resolve(urls); } else { resolve([]); } }; xhr.onerror = function() { resolve([]); }; xhr.send(); }); }, sitemapUrl); if (sitemapUrls.length > 0) { const nestedSitemaps = sitemapUrls.filter(url => url.endsWith('/sitemap') || url.endsWith('sitemap.xml') || url.includes('/sitemap/') ); const regularUrls = sitemapUrls.filter(url => !url.endsWith('/sitemap') && !url.endsWith('sitemap.xml') && !url.includes('/sitemap/') ); for (const sitemapPageUrl of regularUrls) { const normalized = normalizeUrl(sitemapPageUrl); if (!visitedUrls.has(normalized) && isUrlAllowedByConfig(sitemapPageUrl) && isUrlAllowedByRobots(sitemapPageUrl)) { visitedUrls.add(normalized); crawlQueue.push({ url: sitemapPageUrl, depth: 1 }); } } this.log(`Found ${regularUrls.length} regular URLs from main sitemap`, Level.LOG); for (const nestedUrl of nestedSitemaps.slice(0, 10)) { try { this.log(`Fetching nested sitemap: ${nestedUrl}`, Level.LOG); const nestedUrls = await page.evaluate((url) => { return new Promise((resolve) => { const xhr = new XMLHttpRequest(); xhr.open('GET', url, true); xhr.onload = function() { if (xhr.status === 200) { const text = xhr.responseText; const locMatches = text.match(/(.*?)<\/loc>/g) || []; const urls = locMatches.map(match => match.replace(/<\/?loc>/g, '')); resolve(urls); } else { resolve([]); } }; xhr.onerror = function() { resolve([]); }; xhr.send(); }); }, nestedUrl); for (const nestedPageUrl of nestedUrls) { const normalized = normalizeUrl(nestedPageUrl); if (!visitedUrls.has(normalized) && isUrlAllowedByConfig(nestedPageUrl) && isUrlAllowedByRobots(nestedPageUrl)) { visitedUrls.add(normalized); crawlQueue.push({ url: nestedPageUrl, depth: 1 }); } } this.log(`Found ${nestedUrls.length} URLs from nested sitemap ${nestedUrl}`, Level.LOG); } catch (error) { this.log(`Failed to fetch nested sitemap ${nestedUrl}: ${error.message}`, Level.WARN); } } this.log(`Total URLs queued from sitemaps: ${crawlQueue.length - 1}`, Level.LOG); } else { this.log('No URLs found in sitemap or sitemap not available', Level.WARN); } } catch (error) { this.log(`Sitemap fetch failed: ${error.message}`, Level.WARN); } } let processedCount = 0; while (crawlQueue.length > 0 && crawlResults.length < crawlConfig.limit) { if (this.isAborted) { this.log('Workflow aborted during crawl', Level.WARN); break; } const { url, depth } = crawlQueue.shift()!; processedCount++; this.log(`[${crawlResults.length + 1}/${crawlConfig.limit}] Crawling (depth ${depth}): ${url}`, Level.LOG); try { if (robotRules.crawlDelay && crawlResults.length > 0) { this.log(`Applying crawl delay: ${robotRules.crawlDelay}ms`, Level.LOG); await new Promise(resolve => setTimeout(resolve, robotRules.crawlDelay!)); } await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }).catch((err) => { throw new Error(`Navigation failed: ${err.message}`); }); await page.waitForLoadState('load', { timeout: 10000 }).catch(() => {}); const pageResult = await scrapePageContent(url); pageResult.metadata.depth = depth; crawlResults.push(pageResult); this.log(`✓ Scraped ${url} (${pageResult.wordCount} words, depth ${depth})`, Level.LOG); if (crawlConfig.followLinks && depth < crawlConfig.maxDepth) { const newLinks = await extractLinksFromPage(); let addedCount = 0; for (const link of newLinks) { const normalized = normalizeUrl(link); if (!visitedUrls.has(normalized) && isUrlAllowedByConfig(link) && isUrlAllowedByRobots(link)) { visitedUrls.add(normalized); crawlQueue.push({ url: link, depth: depth + 1 }); addedCount++; } } if (addedCount > 0) { this.log(`Added ${addedCount} new URLs to queue at depth ${depth + 1}`, Level.LOG); } } } catch (error) { this.log(`Failed to crawl ${url}: ${error.message}`, Level.WARN); crawlResults.push({ metadata: { url: url, sourceURL: url, depth: depth }, error: error.message, scrapedAt: new Date().toISOString() }); } } this.log(`Crawl completed: ${crawlResults.length} pages scraped (${processedCount} URLs processed, ${visitedUrls.size} URLs discovered)`, Level.LOG); const actionType = "crawl"; const actionName = "Crawl Results"; if (!this.serializableDataByType[actionType]) { this.serializableDataByType[actionType] = {}; } if (!this.serializableDataByType[actionType][actionName]) { this.serializableDataByType[actionType][actionName] = []; } this.serializableDataByType[actionType][actionName] = crawlResults; await this.options.serializableCallback({ scrapeList: this.serializableDataByType.scrapeList || {}, scrapeSchema: this.serializableDataByType.scrapeSchema || {}, crawl: this.serializableDataByType.crawl || {}, search: this.serializableDataByType.search || {} }); } catch (error) { this.log(`Crawl action failed: ${error.message}`, Level.ERROR); throw new Error(`Crawl execution error: ${error.message}`); } }, search: async (searchConfig: { query: string; limit: number; provider?: 'duckduckgo'; filters?: { timeRange?: 'day' | 'week' | 'month' | 'year'; }; mode: 'discover' | 'scrape'; }) => { if (this.isAborted) { this.log('Workflow aborted, stopping search', Level.WARN); return; } if (this.options.debugChannel?.setActionType) { this.options.debugChannel.setActionType('search'); } searchConfig.provider = 'duckduckgo'; this.log(`Performing DuckDuckGo search for: ${searchConfig.query}`, Level.LOG); try { let searchUrl = `https://duckduckgo.com/?q=${encodeURIComponent(searchConfig.query)}`; if (searchConfig.filters?.timeRange) { const timeMap: Record = { 'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y' }; searchUrl += `&df=${timeMap[searchConfig.filters.timeRange]}`; } const initialDelay = 500 + Math.random() * 1000; await new Promise(resolve => setTimeout(resolve, initialDelay)); await page.goto(searchUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }); await page.waitForLoadState('load', { timeout: 10000 }).catch(() => { this.log('Load state timeout, continuing anyway', Level.WARN); }); const pageLoadDelay = 2000 + Math.random() * 1500; await new Promise(resolve => setTimeout(resolve, pageLoadDelay)); let searchResults: any[] = []; let retryCount = 0; const maxRetries = 2; while (searchResults.length === 0 && retryCount <= maxRetries) { if (retryCount > 0) { this.log(`Retry attempt ${retryCount}/${maxRetries} for DuckDuckGo search...`, Level.LOG); const retryDelay = 1000 * Math.pow(2, retryCount) + Math.random() * 1000; await new Promise(resolve => setTimeout(resolve, retryDelay)); } this.log('Attempting to extract DuckDuckGo search results...', Level.LOG); await page.waitForSelector('[data-testid="result"], .result', { timeout: 5000 }).catch(() => { this.log('DuckDuckGo results not found on initial wait', Level.WARN); }); let currentResultCount = 0; const maxLoadAttempts = Math.ceil(searchConfig.limit / 10) * 2; let loadAttempts = 0; let noNewResultsCount = 0; while (currentResultCount < searchConfig.limit && loadAttempts < maxLoadAttempts && noNewResultsCount < 3) { const previousCount = currentResultCount; currentResultCount = await page.evaluate(() => { const selectors = [ '[data-testid="result"]', 'article[data-testid="result"]', 'li[data-layout="organic"]', '.result', 'article[data-testid]' ]; for (const selector of selectors) { const elements = document.querySelectorAll(selector); if (elements.length > 0) { return elements.length; } } return 0; }); if (currentResultCount >= searchConfig.limit) { this.log(`Reached desired result count: ${currentResultCount}`, Level.LOG); break; } if (currentResultCount === previousCount) { noNewResultsCount++; this.log(`No new results after load more (attempt ${noNewResultsCount}/3)`, Level.WARN); if (noNewResultsCount >= 3) break; } else { noNewResultsCount = 0; this.log(`Current results count: ${currentResultCount}/${searchConfig.limit}`, Level.LOG); } await page.evaluate(() => { window.scrollTo(0, document.body.scrollHeight); }); await new Promise(resolve => setTimeout(resolve, 800)); const loadMoreClicked = await page.evaluate(() => { const selectors = [ '#more-results', 'button:has-text("More results")', 'button:has-text("more results")', 'button[id*="more"]', 'button:has-text("Load more")' ]; for (const selector of selectors) { try { const button = document.querySelector(selector) as HTMLButtonElement; if (button && button.offsetParent !== null) { button.click(); console.log(`Clicked load more button with selector: ${selector}`); return true; } } catch (e) { continue; } } return false; }); if (loadMoreClicked) { this.log('Clicked "More results" button', Level.LOG); await new Promise(resolve => setTimeout(resolve, 1500 + Math.random() * 1000)); } else { this.log('No "More results" button found, results may be limited', Level.WARN); break; } loadAttempts++; } this.log(`Finished pagination. Total results available: ${currentResultCount}`, Level.LOG); searchResults = await page.evaluate((limit: number) => { const results: any[] = []; const cleanDescription = (text: string): string => { if (!text) return ''; let cleaned = text.replace(/^\d+\s+(second|minute|hour|day|week|month|year)s?\s+ago\s*/i, ''); cleaned = cleaned.replace(/^[A-Z][a-z]{2}\s+\d{1,2},?\s+\d{4}\s*[—\-]\s*/i, ''); cleaned = cleaned.replace(/^\d{4}-\d{2}-\d{2}\s*[—\-]\s*/i, ''); cleaned = cleaned.trim().replace(/\s+/g, ' '); return cleaned; }; const selectors = [ '[data-testid="result"]', 'article[data-testid="result"]', 'li[data-layout="organic"]', '.result', 'article[data-testid]' ]; let allElements: Element[] = []; for (const selector of selectors) { const elements = Array.from(document.querySelectorAll(selector)); if (elements.length > 0) { console.log(`Found ${elements.length} DDG elements with: ${selector}`); allElements = elements; break; } } for (let i = 0; i < Math.min(allElements.length, limit); i++) { const element = allElements[i]; const titleEl = element.querySelector('h2, [data-testid="result-title-a"], h3, [data-testid="result-title"]'); let linkEl = titleEl?.querySelector('a[href]') as HTMLAnchorElement; if (!linkEl) { linkEl = element.querySelector('a[href]') as HTMLAnchorElement; } if (!linkEl || !linkEl.href) continue; let actualUrl = linkEl.href; if (actualUrl.includes('uddg=')) { try { const urlParams = new URLSearchParams(actualUrl.split('?')[1]); const uddgUrl = urlParams.get('uddg'); if (uddgUrl) { actualUrl = decodeURIComponent(uddgUrl); } } catch (e) { console.log('Failed to parse uddg parameter:', e); } } if (actualUrl.includes('duckduckgo.com')) { console.log(`Skipping DDG internal URL: ${actualUrl}`); continue; } const descEl = element.querySelector('[data-result="snippet"], .result__snippet, [data-testid="result-snippet"]'); if (titleEl && titleEl.textContent && actualUrl) { const rawDescription = (descEl?.textContent || '').trim(); const cleanedDescription = cleanDescription(rawDescription); results.push({ url: actualUrl, title: titleEl.textContent.trim(), description: cleanedDescription, position: results.length + 1 }); } } console.log(`Extracted ${results.length} DuckDuckGo search results`); return results; }, searchConfig.limit); if (searchResults.length === 0) { this.log(`No DuckDuckGo results found (attempt ${retryCount + 1}/${maxRetries + 1})`, Level.WARN); retryCount++; } else { this.log(`Successfully extracted ${searchResults.length} results`, Level.LOG); break; } } this.log(`Search found ${searchResults.length} results`, Level.LOG); if (searchConfig.mode === 'discover') { const actionType = "search"; const actionName = "Search Results"; if (!this.serializableDataByType[actionType]) { this.serializableDataByType[actionType] = {}; } if (!this.serializableDataByType[actionType][actionName]) { this.serializableDataByType[actionType][actionName] = {}; } const searchData = { query: searchConfig.query, provider: searchConfig.provider, filters: searchConfig.filters || {}, resultsCount: searchResults.length, results: searchResults, searchedAt: new Date().toISOString() }; this.serializableDataByType[actionType][actionName] = searchData; await this.options.serializableCallback({ scrapeList: this.serializableDataByType.scrapeList || {}, scrapeSchema: this.serializableDataByType.scrapeSchema || {}, crawl: this.serializableDataByType.crawl || {}, search: this.serializableDataByType.search || {} }); this.log(`Search completed in discover mode with ${searchResults.length} results`, Level.LOG); return; } this.log(`Starting to scrape content from ${searchResults.length} search results...`, Level.LOG); const scrapedResults = []; for (let i = 0; i < searchResults.length; i++) { const result = searchResults[i]; try { this.log(`[${i + 1}/${searchResults.length}] Scraping: ${result.url}`, Level.LOG); await page.goto(result.url, { waitUntil: 'domcontentloaded', timeout: 30000 }).catch(() => { this.log(`Failed to navigate to ${result.url}, skipping...`, Level.WARN); }); await page.waitForLoadState('load', { timeout: 10000 }).catch(() => {}); const pageData = await page.evaluate(() => { const getMeta = (name: string) => { const meta = document.querySelector(`meta[name="${name}"], meta[property="${name}"]`); return meta?.getAttribute('content') || ''; }; const getAllMeta = () => { const metadata: Record = {}; const metaTags = document.querySelectorAll('meta'); metaTags.forEach(tag => { const name = tag.getAttribute('name') || tag.getAttribute('property'); const content = tag.getAttribute('content'); if (name && content) { metadata[name] = content; } }); return metadata; }; const title = document.title || ''; const bodyText = document.body?.innerText || ''; const elementsWithMxId = document.querySelectorAll('[data-mx-id]'); elementsWithMxId.forEach(el => el.removeAttribute('data-mx-id')); const html = document.documentElement.outerHTML; const links = Array.from(document.querySelectorAll('a')).map(a => a.href); const allMetadata = getAllMeta(); return { title, description: getMeta('description'), text: bodyText, html: html, links: links, wordCount: bodyText.split(/\s+/).filter(w => w.length > 0).length, metadata: { ...allMetadata, title, language: document.documentElement.lang || '', favicon: (document.querySelector('link[rel="icon"], link[rel="shortcut icon"]') as HTMLLinkElement)?.href || '', statusCode: 200 } }; }); scrapedResults.push({ searchResult: { query: searchConfig.query, position: result.position, searchTitle: result.title, searchDescription: result.description, }, metadata: { ...pageData.metadata, url: result.url, sourceURL: result.url }, html: pageData.html, text: pageData.text, links: pageData.links, wordCount: pageData.wordCount, scrapedAt: new Date().toISOString() }); this.log(`✓ Scraped ${result.url} (${pageData.wordCount} words)`, Level.LOG); } catch (error) { this.log(`Failed to scrape ${result.url}: ${error.message}`, Level.WARN); scrapedResults.push({ searchResult: { query: searchConfig.query, position: result.position, searchTitle: result.title, searchDescription: result.description, }, url: result.url, error: error.message, scrapedAt: new Date().toISOString() }); } } this.log(`Successfully scraped ${scrapedResults.length} search results`, Level.LOG); const actionType = "search"; const actionName = "Search Results"; if (!this.serializableDataByType[actionType]) { this.serializableDataByType[actionType] = {}; } if (!this.serializableDataByType[actionType][actionName]) { this.serializableDataByType[actionType][actionName] = {}; } const searchData = { query: searchConfig.query, provider: searchConfig.provider, filters: searchConfig.filters || {}, mode: searchConfig.mode, resultsCount: scrapedResults.length, results: scrapedResults, searchedAt: new Date().toISOString() }; this.serializableDataByType[actionType][actionName] = searchData; await this.options.serializableCallback({ scrapeList: this.serializableDataByType.scrapeList || {}, scrapeSchema: this.serializableDataByType.scrapeSchema || {}, crawl: this.serializableDataByType.crawl || {}, search: this.serializableDataByType.search || {} }); } catch (error) { this.log(`Search action failed: ${error.message}`, Level.ERROR); throw new Error(`Search execution error: ${error.message}`); } }, flag: async () => new Promise((res) => { if (this.options.debugChannel?.setActionType) { this.options.debugChannel.setActionType('flag'); } this.emit('flag', page, res); }), }; const executeAction = async (invokee: any, methodName: string, args: any) => { console.log("Executing action:", methodName, args); if (methodName === 'press' || methodName === 'type') { // Extract only the first two arguments for these methods const limitedArgs = Array.isArray(args) ? args.slice(0, 2) : [args]; await (invokee[methodName])(...limitedArgs); return; } if (!args || Array.isArray(args)) { await (invokee[methodName])(...(args ?? [])); } else { await (invokee[methodName])(args); } }; for (const step of steps) { if (this.isAborted) { this.log('Workflow aborted during step execution', Level.WARN); return; } this.log(`Launching ${String(step.action)}`, Level.LOG); let stepName: string | null = null; try { const debug = this.options.debugChannel; if (debug?.setActionType) { debug.setActionType(String(step.action)); } stepName = (step as any)?.name || String(step.action); if (debug && typeof (debug as any).setActionName === "function") { (debug as any).setActionName(stepName); } } catch (err) { this.log(`Failed to set action name/type: ${(err as Error).message}`, Level.WARN); } if (step.action in wawActions) { // "Arrayifying" here should not be needed (TS + syntax checker - only arrays; but why not) const params = !step.args || Array.isArray(step.args) ? step.args : [step.args]; if (step.action === 'screenshot') { await (wawActions.screenshot as any)(...(params ?? []), stepName ?? undefined); } else if (step.action === 'scrapeList' || step.action === 'scrapeSchema') { const actionName = (step as any).name || ""; await wawActions[step.action as CustomFunctions](...(params ?? []), actionName); } else { await wawActions[step.action as CustomFunctions](...(params ?? [])); } } else { if (this.options.debugChannel?.setActionType) { this.options.debugChannel.setActionType(String(step.action)); } // Implements the dot notation for the "method name" in the workflow const levels = String(step.action).split('.'); const methodName = levels[levels.length - 1]; let invokee: any = page; for (const level of levels.splice(0, levels.length - 1)) { invokee = invokee[level]; } if (methodName === 'waitForLoadState') { try { let args = step.args; if (Array.isArray(args) && args.length === 1) { args = [args[0], { timeout: 30000 }]; } else if (!Array.isArray(args)) { args = [args, { timeout: 30000 }]; } await executeAction(invokee, methodName, step.args); } catch (error) { await executeAction(invokee, methodName, 'domcontentloaded'); } } else if (methodName === 'click') { try { await executeAction(invokee, methodName, step.args); } catch (error) { try{ await executeAction(invokee, methodName, [step.args[0], { force: true }]); } catch (error) { this.log(`Click action failed: ${error.message}`, Level.WARN); continue; } } } else { try { await executeAction(invokee, methodName, step.args); } catch (error) { this.log(`Action ${methodName} failed: ${error.message}`, Level.ERROR); // Continue with next action instead of crashing continue; } } } await new Promise((res) => { setTimeout(res, 500); }); } } private async handlePagination(page: Page, config: { listSelector: string, fields: any, limit?: number, pagination: any }, providedActionName: string = "") { if (this.isAborted) { this.log('Workflow aborted, stopping pagination', Level.WARN); return []; } const actionType = "scrapeList"; let actionName = providedActionName || ""; if (!actionName || actionName.trim() === "") { this.scrapeListCounter++; actionName = `List ${this.scrapeListCounter}`; } if (!this.serializableDataByType[actionType]) { this.serializableDataByType[actionType] = {}; } if (!this.serializableDataByType[actionType][actionName]) { this.serializableDataByType[actionType][actionName] = []; } let allResults: Record[] = []; let previousHeight = 0; let scrapedItems: Set = new Set(); let visitedUrls: Set = new Set(); const MAX_RETRIES = 3; const RETRY_DELAY = 1000; const MAX_UNCHANGED_RESULTS = 5; const debugLog = (message: string, ...args: any[]) => { console.log(`[Page ${visitedUrls.size}] [URL: ${page.url()}] ${message}`, ...args); }; const scrapeCurrentPage = async () => { if (this.isAborted) { debugLog("Workflow aborted, stopping scrapeCurrentPage"); return; } const evaluationPromise = page.evaluate((cfg) => window.scrapeList(cfg), config); const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('Page evaluation timeout')), 10000) ); let results; try { results = await Promise.race([evaluationPromise, timeoutPromise]); } catch (error) { debugLog(`Page evaluation failed: ${error.message}`); return; } const newResults = results.filter(item => { const uniqueKey = JSON.stringify(item); if (scrapedItems.has(uniqueKey)) return false; scrapedItems.add(uniqueKey); return true; }); let itemsToAdd = newResults; if (config.limit) { const remainingCapacity = config.limit - allResults.length; if (remainingCapacity <= 0) { itemsToAdd = []; } else if (newResults.length > remainingCapacity) { itemsToAdd = newResults.slice(0, remainingCapacity); } } allResults = allResults.concat(itemsToAdd); this.serializableDataByType[actionType][actionName] = [...allResults]; await this.options.serializableCallback({ scrapeList: this.serializableDataByType.scrapeList, scrapeSchema: this.serializableDataByType.scrapeSchema, crawl: this.serializableDataByType.crawl || {}, search: this.serializableDataByType.search || {} }); }; const checkLimit = () => { if (config.limit && allResults.length >= config.limit) { allResults = allResults.slice(0, config.limit); return true; } return false; }; // Helper function to detect if a selector is XPath const isXPathSelector = (selector: string): boolean => { return selector.startsWith('//') || selector.startsWith('/') || selector.startsWith('./') || selector.includes('contains(@') || selector.includes('[count(') || selector.includes('@class=') || selector.includes('@id=') || selector.includes(' and ') || selector.includes(' or '); }; // Helper function to wait for selector (CSS or XPath) const waitForSelectorUniversal = async (selector: string, options: any = {}): Promise => { try { if (isXPathSelector(selector)) { // Use XPath locator const locator = page.locator(`xpath=${selector}`); await locator.waitFor({ state: 'attached', timeout: options.timeout || 10000 }); return await locator.elementHandle(); } else { // Use CSS selector return await page.waitForSelector(selector, { state: 'attached', timeout: options.timeout || 10000 }); } } catch (error) { return null; } }; // Enhanced button finder with retry mechanism const findWorkingButton = async (selectors: string[]): Promise<{ button: ElementHandle | null, workingSelector: string | null, updatedSelectors: string[] }> => { const startTime = Date.now(); const MAX_BUTTON_SEARCH_TIME = 15000; let updatedSelectors = [...selectors]; for (let i = 0; i < selectors.length; i++) { if (Date.now() - startTime > MAX_BUTTON_SEARCH_TIME) { debugLog(`Button search timeout reached (${MAX_BUTTON_SEARCH_TIME}ms), aborting`); break; } const selector = selectors[i]; let retryCount = 0; let selectorSuccess = false; while (retryCount < MAX_RETRIES && !selectorSuccess) { try { const button = await waitForSelectorUniversal(selector, { timeout: 2000 }); if (button) { debugLog('Found working selector:', selector); return { button, workingSelector: selector, updatedSelectors }; } else { retryCount++; debugLog(`Selector "${selector}" not found: attempt ${retryCount}/${MAX_RETRIES}`); if (retryCount < MAX_RETRIES) { await page.waitForTimeout(RETRY_DELAY); } else { debugLog(`Removing failed selector "${selector}" after ${MAX_RETRIES} attempts`); updatedSelectors = updatedSelectors.filter(s => s !== selector); selectorSuccess = true; } } } catch (error) { retryCount++; debugLog(`Selector "${selector}" error: attempt ${retryCount}/${MAX_RETRIES} - ${error.message}`); if (retryCount < MAX_RETRIES) { await page.waitForTimeout(RETRY_DELAY); } else { debugLog(`Removing failed selector "${selector}" after ${MAX_RETRIES} attempts`); updatedSelectors = updatedSelectors.filter(s => s !== selector); selectorSuccess = true; } } } } return { button: null, workingSelector: null, updatedSelectors }; }; const retryOperation = async (operation: () => Promise, retryCount = 0): Promise => { try { return await operation(); } catch (error) { if (retryCount < MAX_RETRIES) { debugLog(`Retrying operation. Attempt ${retryCount + 1} of ${MAX_RETRIES}`); await page.waitForTimeout(RETRY_DELAY); return retryOperation(operation, retryCount + 1); } debugLog(`Operation failed after ${MAX_RETRIES} retries`); return false; } }; let availableSelectors = config.pagination.selector.split(','); let unchangedResultCounter = 0; try { while (true) { if (this.isAborted) { this.log('Workflow aborted during pagination loop', Level.WARN); return allResults; } switch (config.pagination.type) { case 'scrollDown': { let previousResultCount = allResults.length; await scrapeCurrentPage(); if (checkLimit()) { return allResults; } const scrollIterations = 3; for (let i = 0; i < scrollIterations; i++) { await page.evaluate(() => { window.scrollBy(0, window.innerHeight * 0.8); }); await page.waitForTimeout(500); } await page.waitForTimeout(2000); try { await page.evaluate((listSelector) => { const isXPath = listSelector.startsWith('//') || listSelector.startsWith('/'); let lastElement: Element | null = null; if (isXPath) { const result = document.evaluate(listSelector, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); if (result.snapshotLength > 0) { lastElement = result.snapshotItem(result.snapshotLength - 1) as Element; } } else { const elements = document.querySelectorAll(listSelector); if (elements.length > 0) { lastElement = elements[elements.length - 1] as Element; } } if (lastElement) { lastElement.scrollIntoView({ behavior: 'smooth', block: 'end' }); } }, config.listSelector); await page.waitForTimeout(1500); } catch (e) { } const currentHeight = await page.evaluate(() => { return Math.max( document.body.scrollHeight, document.documentElement.scrollHeight ); }); const currentResultCount = allResults.length; if (currentResultCount === previousResultCount) { unchangedResultCounter++; if (unchangedResultCounter >= MAX_UNCHANGED_RESULTS) { return allResults; } } else { unchangedResultCounter = 0; } if (currentHeight === previousHeight) { return allResults; } previousHeight = currentHeight; break; } case 'scrollUp': { let previousResultCount = allResults.length; await scrapeCurrentPage(); if (checkLimit()) { return allResults; } await page.evaluate(() => window.scrollTo(0, 0)); await page.waitForTimeout(2000); const currentTopHeight = await page.evaluate(() => document.documentElement.scrollTop); const currentResultCount = allResults.length; if (currentResultCount === previousResultCount) { unchangedResultCounter++; if (unchangedResultCounter >= MAX_UNCHANGED_RESULTS) { return allResults; } } else { unchangedResultCounter = 0; } if (currentTopHeight === 0) { return allResults; } previousHeight = currentTopHeight; break; } case 'clickNext': { const currentUrl = page.url(); visitedUrls.add(currentUrl); await scrapeCurrentPage(); if (checkLimit()) return allResults; const { button, workingSelector, updatedSelectors } = await findWorkingButton(availableSelectors); availableSelectors = updatedSelectors; if (!button || !workingSelector) { // Final retry for navigation when no selectors work const success = await retryOperation(async () => { try { await page.evaluate(() => window.history.forward()); const newUrl = page.url(); return !visitedUrls.has(newUrl); } catch { return false; } }); if (!success) return allResults; break; } let retryCount = 0; let paginationSuccess = false; // Capture basic content signature before click - with XPath support const captureContentSignature = async () => { return await page.evaluate((listSelector) => { const isXPath = (selector: string) => { return selector.startsWith('//') || selector.startsWith('./') || selector.includes('::'); }; let items: NodeListOf | Element[] = []; if (isXPath(listSelector)) { try { // Use XPath to find elements const xpathResult = document.evaluate( listSelector, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ); items = []; for (let i = 0; i < xpathResult.snapshotLength; i++) { const node = xpathResult.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { items.push(node as Element); } } } catch (xpathError) { console.warn('XPath evaluation failed, trying CSS selector as fallback:', xpathError); // Fallback to CSS selector try { items = document.querySelectorAll(listSelector); } catch (cssError) { console.warn('CSS selector fallback also failed:', cssError); items = []; } } } else { try { // Use CSS selector items = document.querySelectorAll(listSelector); } catch (cssError) { console.warn('CSS selector failed:', cssError); items = []; } } return { url: window.location.href, itemCount: items.length, firstItems: Array.from(items).slice(0, 3).map(el => el.textContent || '').join('|') }; }, config.listSelector); }; const beforeSignature = await captureContentSignature(); debugLog(`Before click: ${beforeSignature.itemCount} items`); while (retryCount < MAX_RETRIES && !paginationSuccess) { try { try { await Promise.all([ page.waitForNavigation({ waitUntil: 'networkidle', timeout: 15000 }).catch(e => { throw e; }), page.locator(workingSelector).first().click() ]); debugLog("Navigation successful after regular click"); await page.waitForTimeout(2000); paginationSuccess = true; } catch (navError) { debugLog("Regular click with navigation failed, trying dispatch event with navigation"); try { await Promise.all([ page.waitForNavigation({ waitUntil: 'networkidle', timeout: 15000 }).catch(e => { throw e; }), page.locator(workingSelector).first().dispatchEvent('click') ]); debugLog("Navigation successful after dispatch event"); await page.waitForTimeout(2000); paginationSuccess = true; } catch (dispatchNavError) { try { await page.locator(workingSelector).first().click(); await page.waitForTimeout(2000); } catch (clickError) { await page.locator(workingSelector).first().dispatchEvent('click'); await page.waitForTimeout(2000); } } } await page.waitForLoadState('networkidle', { timeout: 5000 }).catch(() => {}); if (!paginationSuccess) { const newUrl = page.url(); const afterSignature = await captureContentSignature(); if (newUrl !== currentUrl) { debugLog(`URL changed to ${newUrl}`); visitedUrls.add(newUrl); paginationSuccess = true; } else if (afterSignature.firstItems !== beforeSignature.firstItems) { debugLog("Content changed without URL change"); paginationSuccess = true; } else if (afterSignature.itemCount !== beforeSignature.itemCount) { debugLog(`Item count changed from ${beforeSignature.itemCount} to ${afterSignature.itemCount}`); paginationSuccess = true; } } } catch (error) { debugLog(`Pagination attempt ${retryCount + 1} failed: ${error.message}`); } if (!paginationSuccess) { retryCount++; if (retryCount < MAX_RETRIES) { debugLog(`Retrying pagination - attempt ${retryCount + 1} of ${MAX_RETRIES}`); await page.waitForTimeout(RETRY_DELAY); } } } if (!paginationSuccess) { debugLog(`Pagination failed after ${MAX_RETRIES} attempts`); return allResults; } break; } case 'clickLoadMore': { await scrapeCurrentPage(); if (checkLimit()) return allResults; let loadMoreCounter = 0; let previousResultCount = allResults.length; let noNewItemsCounter = 0; const MAX_NO_NEW_ITEMS = 5; while (true) { if (this.isAborted) { this.log('Workflow aborted during pagination loop', Level.WARN); return allResults; } // Find working button with retry mechanism const { button: loadMoreButton, workingSelector, updatedSelectors } = await findWorkingButton(availableSelectors); availableSelectors = updatedSelectors; if (!workingSelector || !loadMoreButton) { debugLog('No working Load More selector found after retries'); return allResults; } // Implement retry mechanism for clicking the button let retryCount = 0; let clickSuccess = false; while (retryCount < MAX_RETRIES && !clickSuccess) { try { try { await loadMoreButton.click(); clickSuccess = true; } catch (error) { debugLog(`Regular click failed on attempt ${retryCount + 1}. Trying DispatchEvent`); // If regular click fails, try dispatchEvent try { await loadMoreButton.dispatchEvent('click'); clickSuccess = true; } catch (dispatchError) { debugLog(`DispatchEvent failed on attempt ${retryCount + 1}.`); throw dispatchError; // Propagate error to trigger retry } } if (clickSuccess) { await page.waitForTimeout(1000); loadMoreCounter++; debugLog(`Successfully clicked Load More button (${loadMoreCounter} times)`); } } catch (error) { debugLog(`Click attempt ${retryCount + 1} failed completely.`); retryCount++; if (retryCount < MAX_RETRIES) { debugLog(`Retrying click - attempt ${retryCount + 1} of ${MAX_RETRIES}`); await page.waitForTimeout(RETRY_DELAY); } } } if (!clickSuccess) { debugLog(`Load More clicking failed after ${MAX_RETRIES} attempts`); return allResults; } // Wait for content to load and check scroll height await page.waitForTimeout(2000); await page.evaluate(() => { const scrollHeight = Math.max( document.body.scrollHeight, document.documentElement.scrollHeight ); window.scrollTo(0, scrollHeight); }); await page.waitForTimeout(2000); const currentHeight = await page.evaluate(() => { return Math.max( document.body.scrollHeight, document.documentElement.scrollHeight ); }); const heightChanged = currentHeight !== previousHeight; previousHeight = currentHeight; await scrapeCurrentPage(); const currentResultCount = allResults.length; const newItemsAdded = currentResultCount > previousResultCount; if (!newItemsAdded) { noNewItemsCounter++; debugLog(`No new items added after click (${noNewItemsCounter}/${MAX_NO_NEW_ITEMS})`); if (noNewItemsCounter >= MAX_NO_NEW_ITEMS) { debugLog(`Stopping after ${MAX_NO_NEW_ITEMS} clicks with no new items`); return allResults; } } else { noNewItemsCounter = 0; previousResultCount = currentResultCount; } if (checkLimit()) return allResults; if (!heightChanged) { debugLog('No more items loaded after Load More'); return allResults; } } } default: { await scrapeCurrentPage(); return allResults; } } if (checkLimit()) break; } } catch (error) { debugLog(`Fatal error: ${error.message}`); return allResults; } return allResults; } private getMatchingActionId(workflow: Workflow, pageState: PageState, usedActions: string[]) { for (let actionId = workflow.length - 1; actionId >= 0; actionId--) { const step = workflow[actionId]; const isApplicable = this.applicable(step.where, pageState, usedActions); console.log("-------------------------------------------------------------"); console.log(`Where:`, step.where); console.log(`Page state:`, pageState); console.log(`Match result: ${isApplicable}`); console.log("-------------------------------------------------------------"); if (isApplicable) { return actionId; } } } private removeShadowSelectors(workflow: Workflow) { for (let actionId = workflow.length - 1; actionId >= 0; actionId--) { const step = workflow[actionId]; // Check if step has where and selectors if (step.where && Array.isArray(step.where.selectors)) { // Filter out selectors that contain ">>" step.where.selectors = step.where.selectors.filter(selector => !selector.includes('>>')); } } return workflow; } private removeSpecialSelectors(workflow: Workflow) { for (let actionId = workflow.length - 1; actionId >= 0; actionId--) { const step = workflow[actionId]; if (step.where && Array.isArray(step.where.selectors)) { // Filter out if selector has EITHER ":>>" OR ">>" step.where.selectors = step.where.selectors.filter(selector => !(selector.includes(':>>') || selector.includes('>>')) ); } } return workflow; } private async runLoop(p: Page, workflow: Workflow) { if (this.isAborted) { this.log('Workflow aborted in runLoop', Level.WARN); return; } let workflowCopy: Workflow = JSON.parse(JSON.stringify(workflow)); workflowCopy = this.removeSpecialSelectors(workflowCopy); const usedActions: string[] = []; let selectors: string[] = []; let lastAction = null; let actionId = -1 let repeatCount = 0; /** * Enables the interpreter functionality for popup windows. * User-requested concurrency should be entirely managed by the concurrency manager, * e.g. via `enqueueLinks`. */ const popupHandler = (popup) => { this.concurrency.addJob(() => this.runLoop(popup, workflowCopy)); }; p.on('popup', popupHandler); /* eslint no-constant-condition: ["warn", { "checkLoops": false }] */ let loopIterations = 0; const MAX_LOOP_ITERATIONS = 1000; // Circuit breaker // Cleanup function to remove popup listener const cleanup = () => { try { if (!p.isClosed()) { p.removeListener('popup', popupHandler); } } catch (cleanupError) { } }; while (true) { if (this.isAborted) { this.log('Workflow aborted during step execution', Level.WARN); cleanup(); return; } // Circuit breaker to prevent infinite loops if (++loopIterations > MAX_LOOP_ITERATIONS) { this.log('Maximum loop iterations reached, terminating to prevent infinite loop', Level.ERROR); cleanup(); return; } // Checks whether the page was closed from outside, // or the workflow execution has been stopped via `interpreter.stop()` if (p.isClosed() || !this.stopper) { cleanup(); return; } try { await p.waitForLoadState(); } catch (e) { cleanup(); await p.close(); return; } if (workflowCopy.length === 0) { this.log('All actions completed. Workflow finished.', Level.LOG); cleanup(); return; } // let pageState = {}; // try { // // Check if page is still valid before accessing state // if (p.isClosed()) { // this.log('Page was closed during execution', Level.WARN); // return; // } // pageState = await this.getState(p, workflowCopy, selectors); // selectors = []; // console.log("Empty selectors:", selectors) // } catch (e: any) { // this.log(`Failed to get page state: ${e.message}`, Level.ERROR); // // If state access fails, attempt graceful recovery // if (p.isClosed()) { // this.log('Browser has been closed, terminating workflow', Level.WARN); // return; // } // // For other errors, continue with empty state to avoid complete failure // pageState = { url: p.url(), selectors: [], cookies: {} }; // } // if (this.options.debug) { // this.log(`Current state is: \n${JSON.stringify(pageState, null, 2)}`, Level.WARN); // } // const actionId = workflow.findIndex((step) => { // const isApplicable = this.applicable(step.where, pageState, usedActions); // console.log("-------------------------------------------------------------"); // console.log(`Where:`, step.where); // console.log(`Page state:`, pageState); // console.log(`Match result: ${isApplicable}`); // console.log("-------------------------------------------------------------"); // return isApplicable; // }); // actionId = this.getMatchingActionId(workflowCopy, pageState, usedActions); const actionId = workflowCopy.length - 1; const action = workflowCopy[actionId]; console.log("MATCHED ACTION:", action); console.log("MATCHED ACTION ID:", actionId); this.log(`Matched ${JSON.stringify(action?.where)}`, Level.LOG); if (action) { // action is matched if (this.options.debugChannel?.activeId) { this.options.debugChannel.activeId(actionId); } repeatCount = action === lastAction ? repeatCount + 1 : 0; console.log("REPEAT COUNT", repeatCount); if (this.options.maxRepeats && repeatCount > this.options.maxRepeats) { return; } lastAction = action; if (this.isAborted) { this.log('Workflow aborted before action execution', Level.WARN); return; } try { console.log("Carrying out:", action.what); await this.carryOutSteps(p, action.what); usedActions.push(action.id ?? 'undefined'); workflowCopy.splice(actionId, 1); console.log(`Action with ID ${action.id} removed from the workflow copy.`); this.executedActions++; const percentage = Math.round((this.executedActions / this.totalActions) * 100); if (this.options.debugChannel?.progressUpdate) { this.options.debugChannel.progressUpdate( this.executedActions, this.totalActions, percentage ); } // const newSelectors = this.getPreviousSelectors(workflow, actionId); // const newSelectors = this.getSelectors(workflowCopy); // newSelectors.forEach(selector => { // if (!selectors.includes(selector)) { // selectors.push(selector); // } // }); // Reset loop iteration counter on successful action loopIterations = 0; } catch (e) { this.log(e, Level.ERROR); // Don't crash on individual action failures - continue with next iteration continue; } } else { //await this.disableAdBlocker(p); cleanup(); return; } } } private async ensureScriptsLoaded(page: Page) { try { const evaluationPromise = page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function' && typeof window.scrapeList === 'function' && typeof window.scrapeListAuto === 'function' && typeof window.scrollDown === 'function' && typeof window.scrollUp === 'function' ); const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('Script check timeout')), 3000) ); const isScriptLoaded = await Promise.race([ evaluationPromise, timeoutPromise ]); if (!isScriptLoaded) { await page.addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); } } catch (error) { this.log(`Script check failed, adding script anyway: ${error.message}`, Level.WARN); try { await page.addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); } catch (scriptError) { this.log(`Failed to add script: ${scriptError.message}`, Level.ERROR); } } } /** * Spawns a browser context and runs given workflow. * \ * Resolves after the playback is finished. * @param {Page} [page] Page to run the workflow on. * @param {ParamType} params Workflow specific, set of parameters * for the `{$param: nameofparam}` fields. */ public async run(page: Page, params?: ParamType): Promise { this.log('Starting the workflow.', Level.LOG); const context = page.context(); page.setDefaultNavigationTimeout(100000); // Check proxy settings from context options const contextOptions = (context as any)._options; const hasProxy = !!contextOptions?.proxy; this.log(`Proxy settings: ${hasProxy ? `Proxy is configured...` : 'No proxy configured...'}`); if (hasProxy) { if (contextOptions.proxy.username) { this.log(`Proxy authenticated...`); } } if (this.stopper) { throw new Error('This Interpreter is already running a workflow. To run another workflow, please, spawn another Interpreter.'); } /** * `this.workflow` with the parameters initialized. */ this.initializedWorkflow = Preprocessor.initWorkflow(this.workflow, params); this.totalActions = this.initializedWorkflow.length; this.executedActions = 0; if (this.options.debugChannel?.progressUpdate) { this.options.debugChannel.progressUpdate(0, this.totalActions, 0); } await this.ensureScriptsLoaded(page); this.stopper = () => { this.stopper = null; }; this.concurrency.addJob(() => this.runLoop(page, this.initializedWorkflow!)); await this.concurrency.waitForCompletion(); this.stopper = null; } public async stop(): Promise { if (this.stopper) { await this.stopper(); this.stopper = null; } else { throw new Error('Cannot stop, there is no running workflow!'); } } /** * Cleanup method to release resources and prevent memory leaks * Call this when the interpreter is no longer needed */ public async cleanup(): Promise { try { // Stop any running workflows first if (this.stopper) { try { await this.stop(); } catch (error: any) { this.log(`Error stopping workflow during cleanup: ${error.message}`, Level.WARN); } } // Clear accumulated data to free memory this.cumulativeResults = []; this.namedResults = {}; this.serializableDataByType = { scrapeList: {}, scrapeSchema: {}, crawl: {}, search: {} }; // Reset state this.isAborted = false; this.initializedWorkflow = null; this.log('Interpreter cleanup completed', Level.DEBUG); } catch (error: any) { this.log(`Error during interpreter cleanup: ${error.message}`, Level.ERROR); throw error; } } } ================================================ FILE: maxun-core/src/preprocessor.ts ================================================ import Joi from 'joi'; import { Workflow, WorkflowFile, ParamType, SelectorArray, Where, } from './types/workflow'; import { operators } from './types/logic'; /** * Class for static processing the workflow files/objects. */ export default class Preprocessor { static validateWorkflow(workflow: WorkflowFile): any { const regex = Joi.object({ $regex: Joi.string().required(), }); const whereSchema = Joi.object({ url: [Joi.string().uri(), regex], selectors: Joi.array().items(Joi.string()), cookies: Joi.object({}).pattern(Joi.string(), Joi.string()), $after: [Joi.string(), regex], $before: [Joi.string(), regex], $and: Joi.array().items(Joi.link('#whereSchema')), $or: Joi.array().items(Joi.link('#whereSchema')), $not: Joi.link('#whereSchema'), }).id('whereSchema'); const schema = Joi.object({ meta: Joi.object({ name: Joi.string(), desc: Joi.string(), }), workflow: Joi.array().items( Joi.object({ id: Joi.string(), where: whereSchema.required(), what: Joi.array().items({ action: Joi.string().required(), args: Joi.array().items(Joi.any()), name: Joi.string(), actionId: Joi.string() }).required(), }), ).required(), }); const { error } = schema.validate(workflow); return error; } /** * Extracts parameter names from the workflow. * @param {WorkflowFile} workflow The given workflow * @returns {String[]} List of parameters' names. */ static getParams(workflow: WorkflowFile): string[] { const getParamsRecurse = (object: any): string[] => { if (typeof object === 'object' && object !== null) { // Recursion base case if (object.$param) { return [object.$param]; } // Recursion general case return (Object.values(object) as any[]) .reduce((p: string[], v: any): string[] => [...p, ...getParamsRecurse(v)], []); } return []; }; return getParamsRecurse(workflow.workflow); } /** * List all the selectors used in the given workflow (only literal "selector" * field in WHERE clauses so far) */ // TODO : add recursive selector search (also in click/fill etc. events?) static extractSelectors(workflow: Workflow): SelectorArray { /** * Given a Where condition, this function extracts * all the existing selectors from it (recursively). */ const selectorsFromCondition = (where: Where): SelectorArray => { // the `selectors` field is either on the top level let out = where.selectors ?? []; if (!Array.isArray(out)) { out = [out]; } // or nested in the "operator" array operators.forEach((op) => { let condWhere = where[op]; if (condWhere) { condWhere = Array.isArray(condWhere) ? condWhere : [condWhere]; (condWhere).forEach((subWhere) => { out = [...out, ...selectorsFromCondition(subWhere)]; }); } }); return out; }; // Iterate through all the steps and extract the selectors from all of them. return workflow.reduce((p: SelectorArray, step) => [ ...p, ...selectorsFromCondition(step.where).filter((x) => !p.includes(x)), ], []); } /** * Recursively crawl `object` and initializes params - replaces the `{$param : paramName}` objects * with the defined value. * @returns {Workflow} Copy of the given workflow, modified (the initial workflow is left untouched). */ static initWorkflow(workflow: Workflow, params?: ParamType): Workflow { const paramNames = this.getParams({ workflow }); if (Object.keys(params ?? {}).sort().join(',') !== paramNames.sort().join(',')) { throw new Error(`Provided parameters do not match the workflow parameters provided: ${Object.keys(params ?? {}).sort().join(',')}, expected: ${paramNames.sort().join(',')} `); } /** * A recursive method for initializing special `{key: value}` syntax objects in the workflow. * @param object Workflow to initialize (or a part of it). * @param k key to look for ($regex, $param) * @param f function mutating the special `{}` syntax into * its true representation (RegExp...) * @returns Updated object */ const initSpecialRecurse = ( object: unknown, k: string, f: (value: string) => unknown, ): unknown => { if (!object || typeof object !== 'object') { return object; } const out = object; Object.keys(object!).forEach((key) => { const childValue = (object)[key]; if (!childValue || typeof childValue !== 'object') { return; } try { const childKeys = Object.keys(childValue); if (childKeys.length === 1 && childValue[k]) { (out)[key] = f(childValue[k]); } else { initSpecialRecurse(childValue, k, f); } } catch (error) { console.warn(`Error processing key "${key}" in initSpecialRecurse:`, error); } }); return out; }; // TODO: do better deep copy, this is hideous. let workflowCopy = JSON.parse(JSON.stringify(workflow)); if (params) { workflowCopy = initSpecialRecurse( workflowCopy, '$param', (paramName) => { if (params && params[paramName]) { return params[paramName]; } throw new SyntaxError(`Unspecified parameter found ${paramName}.`); }, ); } workflowCopy = initSpecialRecurse( workflowCopy, '$regex', (regex) => new RegExp(regex), ); return workflowCopy; } } ================================================ FILE: maxun-core/src/types/logic.ts ================================================ export const unaryOperators = ['$not'] as const; export const naryOperators = ['$and', '$or'] as const; export const operators = [...unaryOperators, ...naryOperators] as const; export const meta = ['$before', '$after'] as const; ================================================ FILE: maxun-core/src/types/workflow.ts ================================================ import { Page } from 'playwright-core'; import { naryOperators, unaryOperators, operators, meta, } from './logic'; export type Operator = typeof operators[number]; export type UnaryOperator = typeof unaryOperators[number]; export type NAryOperator = typeof naryOperators[number]; export type Meta = typeof meta[number]; export type SelectorArray = string[]; type RegexableString = string | { '$regex': string }; type BaseConditions = { 'url': RegexableString, 'cookies': Record, 'selectors': SelectorArray, // (CSS/Playwright) selectors use their own logic, there is no reason (and several technical difficulties) to allow regular expression notation } & Record; export type Where = Partial<{ [key in NAryOperator]: Where[] }> & // either a logic operator (arity N) Partial<{ [key in UnaryOperator]: Where }> & // or an unary operator Partial; // or one of the base conditions type MethodNames = { [K in keyof T]: T[K] extends Function ? K : never; }[keyof T]; export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag' | 'scrapeList' | 'scrapeListAuto' | 'crawl' | 'search'; export type What = { action: MethodNames | CustomFunctions, args?: any[], name?: string, actionId?: string }; export type PageState = Partial; export type ParamType = Record; export type MetaData = { name?: string, desc?: string, }; export interface WhereWhatPair { id?: string where: Where what: What[] } export type Workflow = WhereWhatPair[]; export type WorkflowFile = { meta?: MetaData, workflow: Workflow }; ================================================ FILE: maxun-core/src/utils/concurrency.ts ================================================ /** * Concurrency class for running concurrent tasks while managing a limited amount of resources. */ export default class Concurrency { /** * Maximum number of workers running in parallel. If set to `null`, there is no limit. */ maxConcurrency: number = 1; /** * Number of currently active workers. */ activeWorkers: number = 0; /** * Queue of jobs waiting to be completed. */ private jobQueue: Function[] = []; /** * "Resolve" callbacks of the waitForCompletion() promises. */ private waiting: Function[] = []; /** * Constructs a new instance of concurrency manager. * @param {number} maxConcurrency Maximum number of workers running in parallel. */ constructor(maxConcurrency: number) { this.maxConcurrency = maxConcurrency; } /** * Takes a waiting job out of the queue and runs it. */ private runNextJob(): void { const job = this.jobQueue.pop(); if (job) { // console.debug("Running a job..."); job().then(() => { // console.debug("Job finished, running the next waiting job..."); this.runNextJob(); }).catch((error) => { console.error(`Job failed with error: ${error.message}`); // Continue processing other jobs even if one fails this.runNextJob(); }); } else { // console.debug("No waiting job found!"); this.activeWorkers -= 1; if (this.activeWorkers === 0) { // console.debug("This concurrency manager is idle!"); this.waiting.forEach((x) => x()); } } } /** * Pass a job (a time-demanding async function) to the concurrency manager. \ * The time of the job's execution depends on the concurrency manager itself * (given a generous enough `maxConcurrency` value, it might be immediate, * but this is not guaranteed). * @param worker Async function to be executed (job to be processed). */ addJob(job: () => Promise): void { // console.debug("Adding a worker!"); this.jobQueue.push(job); if (!this.maxConcurrency || this.activeWorkers < this.maxConcurrency) { this.runNextJob(); this.activeWorkers += 1; } else { // console.debug("No capacity to run a worker now, waiting!"); } } /** * Waits until there is no running nor waiting job. \ * If the concurrency manager is idle at the time of calling this function, * it waits until at least one job is completed (can be "presubscribed"). * @returns Promise, resolved after there is no running/waiting worker. */ waitForCompletion(): Promise { return new Promise((res) => { this.waiting.push(res); }); } } ================================================ FILE: maxun-core/src/utils/logger.ts ================================================ /* * Logger class for more detailed and comprehensible logs (with colors and timestamps) */ export enum Level { DATE = 36, LOG = 0, WARN = 93, ERROR = 31, DEBUG = 95, RESET = 0, } export default function logger( message: string | Error, level: (Level.LOG | Level.WARN | Level.ERROR | Level.DEBUG) = Level.LOG, ) { let m = message; if (message.constructor.name.includes('Error') && typeof message !== 'string') { m = (message).message; } process.stdout.write(`\x1b[${Level.DATE}m[${(new Date()).toLocaleString()}]\x1b[0m `); process.stdout.write(`\x1b[${level}m`); if (level === Level.ERROR || level === Level.WARN) { process.stderr.write(m); } else { process.stdout.write(m); } process.stdout.write(`\x1b[${Level.RESET}m\n`); } ================================================ FILE: maxun-core/src/utils/utils.ts ================================================ /** * ESLint rule in case there is only one util function * (it still does not represent the "utils" file) */ /* eslint-disable import/prefer-default-export */ /** * Converts an array of scalars to an object with **items** of the array **for keys**. */ export function arrayToObject(array : any[]) { return array.reduce((p, x) => ({ ...p, [x]: [] }), {}); } ================================================ FILE: maxun-core/tsconfig.json ================================================ { "compilerOptions": { "outDir": "./build", "declaration": true, "allowJs": true, "target": "es6", "module": "commonjs", "esModuleInterop": true, "skipLibCheck": true }, "include": ["src"] } ================================================ FILE: nginx.conf ================================================ server { listen 80; server_name _; root /var/www/maxun; index index.html; # Serve the frontend location / { try_files $uri $uri/ /index.html; } # Proxy for backend location ~ ^/(auth|storage|record|workflow|robot|proxy|api-docs|api|webhook)(/|$) { proxy_pass http://localhost:8080; # change as per your setup proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection 'upgrade'; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; } } ================================================ FILE: package.json ================================================ { "name": "maxun", "version": "0.0.35", "author": "Maxun", "license": "AGPL-3.0-or-later", "dependencies": { "@anthropic-ai/sdk": "^0.71.2", "@cliqz/adblocker-playwright": "^1.30.0", "@emotion/react": "^11.9.0", "@emotion/styled": "^11.8.1", "@mui/icons-material": "^5.5.1", "@mui/lab": "^5.0.0-alpha.80", "@mui/material": "^5.6.2", "@react-oauth/google": "^0.12.1", "@tanstack/react-query": "^5.90.2", "@types/bcrypt": "^5.0.2", "@types/body-parser": "^1.19.5", "@types/csurf": "^1.11.5", "@types/express-session": "^1.18.1", "@types/jsonwebtoken": "^9.0.7", "@types/node": "22.7.9", "@types/react": "^18.0.5", "@types/react-dom": "^18.0.1", "@types/uuid": "^8.3.4", "airtable": "^0.12.2", "axios": "^1.9.0", "bcrypt": "^5.1.1", "body-parser": "^1.20.3", "buffer": "^6.0.3", "cheerio": "^1.1.2", "connect-pg-simple": "^10.0.0", "cookie-parser": "^1.4.6", "cors": "^2.8.5", "cron-parser": "^4.9.0", "cross-fetch": "^4.0.0", "csurf": "^1.11.0", "dotenv": "^16.0.0", "express": "^4.17.2", "express-session": "^1.18.1", "fingerprint-generator": "^2.1.77", "fingerprint-injector": "^2.1.77", "fortawesome": "^0.0.1-security", "google-auth-library": "^9.14.1", "googleapis": "^144.0.0", "i18next": "^24.0.2", "i18next-browser-languagedetector": "^8.0.0", "i18next-http-backend": "^3.0.1", "idcac-playwright": "^0.1.3", "joi": "^17.6.0", "joplin-turndown-plugin-gfm": "^1.0.12", "jsonwebtoken": "^9.0.2", "jszip": "^3.10.1", "jwt-decode": "^4.0.0", "lodash": "^4.17.21", "loglevel": "^1.8.0", "loglevel-plugin-remote": "^0.6.8", "maxun-core": "^0.0.32", "minio": "^8.0.1", "moment-timezone": "^0.5.45", "node-cron": "^3.0.3", "pg": "^8.13.0", "pg-boss": "^10.1.6", "pkce-challenge": "^4.1.0", "playwright-core": "1.57.0", "posthog-node": "^4.2.1", "react": "^18.0.0", "react-dom": "^18.0.0", "react-i18next": "^15.1.3", "react-router-dom": "^6.26.1", "rrweb": "^2.0.0-alpha.4", "sequelize": "^6.37.3", "sequelize-typescript": "^2.1.6", "sharp": "^0.33.5", "socket.io": "^4.4.1", "socket.io-client": "^4.4.1", "styled-components": "^5.3.3", "swagger-jsdoc": "^6.2.8", "swagger-ui-express": "^5.0.1", "turndown": "^7.2.2", "typedoc": "^0.23.8", "typescript": "^5.0.0", "uuid": "^8.3.2", "uuidv4": "^6.2.12", "web-vitals": "^2.1.4", "winston": "^3.5.1" }, "scripts": { "start": "npm run build:server && concurrently -k \"npm run server\" \"npm run client\"", "server": "cross-env NODE_OPTIONS='--max-old-space-size=4096' node server/dist/server/src/server.js", "start:dev": "concurrently -k \"npm run server:dev\" \"npm run client\"", "server:dev": "cross-env NODE_OPTIONS='--max-old-space-size=2048' nodemon server/src/server.ts", "client": "vite", "build": "vite build", "build:server": "tsc -p server/tsconfig.json", "preview": "vite preview", "lint": "./node_modules/.bin/eslint .", "migrate": "sequelize-cli db:migrate", "migrate:undo": "sequelize-cli db:migrate:undo", "migrate:undo:all": "sequelize-cli db:migrate:undo:all", "seed": "sequelize-cli db:seed:all", "seed:undo:all": "sequelize-cli db:seed:undo:all", "migration:generate": "sequelize-cli migration:generate --name", "mcp:build": "tsc --project server/tsconfig.mcp.json" }, "eslintConfig": { "extends": [ "react-app" ] }, "devDependencies": { "@modelcontextprotocol/sdk": "^1.12.1", "@types/connect-pg-simple": "^7.0.3", "@types/cookie-parser": "^1.4.7", "@types/express": "^4.17.13", "@types/js-cookie": "^3.0.6", "@types/lodash": "^4.17.14", "@types/loglevel": "^1.6.3", "@types/node": "22.7.9", "@types/node-cron": "^3.0.11", "@types/node-fetch": "^2.6.12", "@types/styled-components": "^5.1.23", "@types/swagger-jsdoc": "^6.0.4", "@types/swagger-ui-express": "^4.1.6", "@types/turndown": "^5.0.6", "@vitejs/plugin-react": "^4.3.3", "ajv": "^8.8.2", "concurrently": "^7.0.0", "cross-env": "^7.0.3", "esbuild": "^0.25.10", "js-cookie": "^3.0.5", "nodemon": "^2.0.15", "sequelize-cli": "^6.6.2", "ts-node": "^10.4.0", "vite": "^5.4.10", "zod": "^3.25.62" } } ================================================ FILE: public/locales/de.json ================================================ { "login": { "title": "Willkommen zurück!", "email": "Geben Sie Ihre geschäftliche E-Mail-Adresse ein", "password": "Passwort", "button": "Einloggen", "loading": "Lädt", "register_prompt": "Noch keinen Account?", "register_link": "Registrieren", "welcome_notification": "Willkommen bei Maxun!", "validation": { "required_fields": "E-Mail und Passwort sind erforderlich", "password_length": "Das Passwort muss mindestens 6 Zeichen lang sein" }, "error": { "user_not_found": "Benutzer existiert nicht", "invalid_credentials": "Ungültige E-Mail oder Passwort", "server_error": "Anmeldung fehlgeschlagen. Bitte versuchen Sie es später erneut", "generic": "Ein Fehler ist aufgetreten. Bitte versuchen Sie es erneut" } }, "register": { "title": "Konto registrieren", "email": "Geben Sie Ihre geschäftliche E-Mail-Adresse ein", "password": "Passwort", "button": "Registrieren", "loading": "Lädt", "register_prompt": "Bereits ein Konto?", "login_link": "Einloggen", "welcome_notification": "Willkommen bei Maxun!", "validation": { "email_required": "E-Mail-Adresse ist erforderlich", "password_requirements": "Das Passwort muss mindestens 6 Zeichen lang sein" }, "error": { "user_exists": "Ein Benutzer mit dieser E-Mail existiert bereits", "creation_failed": "Konto konnte nicht erstellt werden", "server_error": "Serverfehler aufgetreten", "generic": "Registrierung fehlgeschlagen. Bitte versuchen Sie es erneut" } }, "recordingtable": { "run": "Ausführen", "name": "Name", "schedule": "Zeitplan", "integrate": "Integrieren", "settings": "Einstellungen", "options": "Optionen", "heading": "Meine Roboter", "new": "Roboter erstellen", "search_criteria": "Versuchen Sie, Ihre Suchkriterien anzupassen", "placeholder": { "title": "Alles bereit für den Start", "body": "Roboter, die Sie erstellen, werden hier angezeigt. Klicken Sie auf „Roboter erstellen“, um loszulegen!", "search": "Keine Roboter entsprechen Ihrer Suche" }, "modal": { "title": "Geben Sie die URL ein", "login_title": "Ist für diese Website eine Anmeldung erforderlich?", "label": "URL", "button": "Aufnahme starten" }, "retrain": "Neu trainieren", "edit": "Bearbeiten", "delete": "Löschen", "duplicate": "Duplizieren", "search": "Roboter suchen...", "warning_modal": { "title": "Aktiver Browser erkannt", "message": "Es läuft bereits eine Browser-Aufzeichnungssitzung. Möchten Sie sie verwerfen und eine neue Aufzeichnung erstellen?", "discard_and_create": "Verwerfen & Neu erstellen", "cancel": "Abbrechen" }, "notifications": { "delete_warning": "Der Roboter hat zugehörige Ausführungen. Löschen Sie zuerst die Ausführungen, um den Roboter zu löschen", "delete_success": "Roboter erfolgreich gelöscht", "auth_success": "Roboter erfolgreich authentifiziert", "browser_limit_warning": "Remote-Browser sind derzeit ausgelastet. Bitte warten Sie einige Minuten und versuchen Sie es erneut" } }, "mainmenu": { "recordings": "Roboter", "runs": "Ausführungen", "proxy": "Proxy", "apikey": "API-Schlüssel", "feedback": "Maxun Cloud beitreten", "apidocs": "Website zu API" }, "runstable": { "runs": "Alle Ausführungen", "runStatus": "Status", "runName": "Name", "name": "Name", "startedAt": "Gestartet am", "finishedAt": "Beendet am", "delete": "Löschen", "settings": "Einstellungen", "search": "Ausführungen suchen...", "sort_tooltip": "Zum Sortieren klicken", "placeholder": { "title": "Keine Durchläufe gefunden", "body": "Hier werden alle Ihre Roboter-Durchläufe angezeigt. Sobald ein Roboter aktiv ist, werden seine Durchläufe hier protokolliert.", "search": "Keine Durchläufe entsprechen Ihrer Suche" }, "notifications": { "no_runs": "Keine Ausführungen gefunden. Bitte versuchen Sie es erneut.", "delete_success": "Ausführung erfolgreich gelöscht" } }, "proxy": { "title": "Proxy-Konfiguration", "tab_standard": "Standard-Proxy", "tab_rotation": "Automatische Proxy-Rotation", "server_url": "Proxy-Server-URL", "server_url_helper": "Proxy für alle Roboter. HTTP- und SOCKS-Proxys werden unterstützt. Beispiel http://myproxy.com:3128 oder socks5://myproxy.com:3128. Kurzform myproxy.com:3128 wird als HTTP-Proxy behandelt.", "requires_auth": "Authentifizierung erforderlich?", "username": "Benutzername", "password": "Passwort", "add_proxy": "Proxy hinzufügen", "test_proxy": "Proxy testen", "remove_proxy": "Proxy entfernen", "table": { "proxy_url": "Proxy-URL", "requires_auth": "Authentifizierung erforderlich" }, "coming_soon": "Demnächst verfügbar - In Open Source (Basis-Rotation) & Cloud (Erweiterte Rotation). Wenn Sie die Infrastruktur nicht selbst verwalten möchten, tragen Sie sich in unsere Cloud-Warteliste ein.", "join_waitlist": "Maxun Cloud Warteliste beitreten", "alert": { "title": "Wenn Ihr Proxy einen Benutzernamen und ein Passwort erfordert, geben Sie diese immer separat von der Proxy-URL an.", "right_way": "Der richtige Weg", "wrong_way": "Der falsche Weg", "proxy_url": "Proxy-URL:", "username": "Benutzername:", "password": "Passwort:" }, "notifications": { "config_success": "Proxy-Konfiguration erfolgreich übermittelt", "config_error": "Fehler beim Übermitteln der Proxy-Konfiguration. Bitte erneut versuchen.", "test_success": "Proxy-Konfiguration funktioniert", "test_error": "Fehler beim Testen der Proxy-Konfiguration. Bitte erneut versuchen.", "fetch_success": "Proxy-Konfiguration erfolgreich abgerufen", "remove_success": "Proxy-Konfiguration erfolgreich entfernt", "remove_error": "Fehler beim Entfernen der Proxy-Konfiguration. Bitte erneut versuchen." } }, "apikey": { "title": "API-Schlüssel verwalten", "default_name": "Maxun API-Schlüssel", "table": { "name": "API-Schlüssel Name", "key": "API-Schlüssel", "actions": "Aktionen" }, "actions": { "copy": "Kopieren", "show": "Anzeigen", "hide": "Ausblenden", "delete": "Löschen" }, "no_key_message": "Sie haben noch keinen API-Schlüssel generiert.", "generate_button": "API-Schlüssel generieren", "notifications": { "fetch_error": "API-Schlüssel konnte nicht abgerufen werden - {{error}}", "generate_success": "API-Schlüssel erfolgreich generiert", "generate_error": "API-Schlüssel konnte nicht generiert werden - {{error}}", "delete_success": "API-Schlüssel erfolgreich gelöscht", "delete_error": "API-Schlüssel konnte nicht gelöscht werden - {{error}}", "copy_success": "API-Schlüssel erfolgreich kopiert" } }, "action_description": { "text": { "title": "Text erfassen", "description": "Fahren Sie über die Texte, die Sie extrahieren möchten, und klicken Sie, um sie auszuwählen" }, "screenshot": { "title": "Screenshot erfassen", "description": "Erfassen Sie einen Teil- oder Vollbildschirmfoto der aktuellen Seite." }, "list": { "title": "Liste erfassen", "description": "Fahren Sie über die Liste, die Sie extrahieren möchten. Nach der Auswahl können Sie über alle Texte in der ausgewählten Liste fahren. Klicken Sie zum Auswählen." }, "default": { "title": "Welche Daten möchten Sie extrahieren?", "description": "Ein Roboter kann eine oder mehrere Aktionen ausführen. Wählen Sie aus den unten aufgeführten Optionen." }, "list_stages": { "initial": "Wählen Sie die Liste aus, die Sie extrahieren möchten, zusammen mit den darin enthaltenen Texten", "pagination": "Wählen Sie aus, wie der Roboter den Rest der Liste erfassen kann", "limit": "Wählen Sie die Anzahl der zu extrahierenden Elemente", "complete": "Erfassung ist abgeschlossen" }, "actions": { "text": "Text erfassen", "list": "Liste erfassen", "screenshot": "Screenshot erfassen" } }, "right_panel": { "buttons": { "capture_list": "Liste erfassen", "capture_text": "Text erfassen", "capture_screenshot": "Screenshot erfassen", "confirm": "Bestätigen", "discard": "Verwerfen", "confirm_capture": "Erfassung bestätigen", "confirm_pagination": "Bestätigen", "confirm_limit": "Bestätigen", "confirm_reset": "Bestätigen", "finish_capture": "Erfassung abschließen", "back": "Zurück", "reset": "Starten Sie die Aufnahme neu", "finish": "Fertig", "cancel": "Abbrechen", "delete": "Löschen" }, "screenshot": { "capture_fullpage": "Vollständige Seite erfassen", "capture_visible": "Sichtbaren Bereich erfassen", "display_fullpage": "Vollständige Seite Screenshot", "display_visible": "Sichtbarer Bereich Screenshot" }, "pagination": { "title": "Wie können wir das nächste Listenelement auf der Seite finden?", "click_next": "Auf 'Weiter' klicken, um zur nächsten Seite zu navigieren", "click_load_more": "Auf 'Mehr laden' klicken, um weitere Elemente zu laden", "scroll_down": "Nach unten scrollen, um mehr Elemente zu laden", "scroll_up": "Nach oben scrollen, um mehr Elemente zu laden", "none": "Keine weiteren Elemente zu laden" }, "limit": { "title": "Wie viele Zeilen möchten Sie maximal extrahieren?", "custom": "Benutzerdefiniert", "enter_number": "Nummer eingeben" }, "fields": { "label": "Bezeichnung", "data": "Daten", "field_label": "Feldbezeichnung", "field_data": "Felddaten" }, "messages": { "list_selected": "Liste erfolgreich ausgewählt", "list_empty": "Liste ausgewählt. Bitte wählen Sie Felder innerhalb der Liste aus." }, "errors": { "select_pagination": "Bitte wählen Sie einen Paginierungstyp aus.", "select_pagination_element": "Bitte wählen Sie zuerst das Paginierungselement aus.", "select_limit": "Bitte wählen Sie ein Limit oder geben Sie ein benutzerdefiniertes Limit ein.", "invalid_limit": "Bitte geben Sie ein gültiges Limit ein.", "confirm_text_fields": "Bitte bestätigen Sie alle Textfelder", "unable_create_settings": "Listeneinstellungen können nicht erstellt werden. Stellen Sie sicher, dass Sie ein Feld für die Liste definiert haben.", "capture_text_discarded": "Texterfassung verworfen", "capture_list_discarded": "Listenerfassung verworfen", "label_required": "Beschriftung darf nicht leer sein", "duplicate_label": "Diese Beschriftung existiert bereits. Bitte verwenden Sie eine eindeutige Beschriftung.", "no_text_captured": "Bitte markieren und wählen Sie Textelemente aus, bevor Sie bestätigen.", "capture_list_first": "Bitte bewegen Sie die Maus über eine Liste und wählen Sie Textfelder darin aus", "confirm_all_list_fields": "Bitte bestätigen Sie alle erfassten Listenfelder, bevor Sie fortfahren" }, "tooltips": { "capture_list_first": "Bewegen Sie die Maus über eine Liste und wählen Sie Textfelder darin aus", "confirm_all_list_fields": "Bitte bestätigen Sie alle erfassten Listenfelder, bevor Sie fortfahren" } }, "save_recording": { "title": "Roboter speichern", "robot_name": "Roboter Name", "buttons": { "save": "Speichern", "confirm": "Bestätigen" }, "notifications": { "save_success": "Roboter erfolgreich gespeichert", "retrain_success": "Roboter erfolgreich neu trainiert", "save_error": "Fehler beim Speichern des Roboters" }, "errors": { "user_not_logged": "Benutzer nicht angemeldet. Aufnahme kann nicht gespeichert werden.", "exists_warning": "Ein Roboter mit diesem Namen existiert bereits, bitte bestätigen Sie das Überschreiben des Roboters.", "no_actions_performed": "Roboter kann nicht gespeichert werden. Bitte führen Sie mindestens eine Erfassungsaktion durch, bevor Sie speichern." }, "tooltips": { "saving": "Workflow wird optimiert und gespeichert" } }, "browser_recording": { "modal": { "confirm_discard": "Sind Sie sicher, dass Sie die Aufnahme verwerfen möchten?", "confirm_reset": "Sind Sie sicher, dass Sie zurücksetzen möchten?", "reset_warning": "Dadurch werden alle vorherigen Aufnahmen in der aktuellen Sitzung gelöscht. Die Aufnahmesitzung wird für dieselbe Website neu gestartet." }, "notifications": { "terminated": "Aktuelle Aufnahme wurde beendet", "environment_reset": "Browser-Umgebung wurde zurückgesetzt", "reset_successful": "Alle Aufnahmen erfolgreich zurückgesetzt und zum Ausgangszustand zurückgekehrt" } }, "interpretation_log": { "titles": { "output_preview": "Vorschau der Ausgabedaten", "screenshot": "Bildschirmfoto" }, "messages": { "additional_rows": "Weitere Datenzeilen werden nach Abschluss der Aufnahme extrahiert.", "successful_training": "Sie haben den Roboter erfolgreich für Aktionen trainiert! Klicken Sie auf die Schaltfläche unten, um eine Vorschau der Daten zu erhalten, die Ihr Roboter extrahieren wird.", "no_selection": "Sie haben noch nichts zur Extraktion ausgewählt. Sobald Sie dies tun, wird der Roboter hier eine Vorschau Ihrer Auswahl anzeigen." }, "data_sections": { "binary_received": "---------- Binäre Ausgabedaten empfangen ----------", "serializable_received": "---------- Serialisierbare Ausgabedaten empfangen ----------", "mimetype": "Medientyp: ", "image_below": "Bild wird unten angezeigt:", "separator": "--------------------------------------------------" }, "notifications": { "reset_success": "Vorschau erfolgreich zurückgesetzt" } }, "interpretation_buttons": { "buttons": { "preview": "Vorschau der Ausgabedaten anzeigen", "reset": "Zurücksetzen", "yes": "Ja", "no": "Nein" }, "messages": { "extracting": "Daten werden extrahiert...bitte warten", "restart_required": "Bitte starten Sie die Interpretation nach der Aktualisierung der Aufnahme neu", "run_finished": "Durchlauf beendet", "run_failed": "Start fehlgeschlagen" }, "modal": { "use_previous": "Möchten Sie Ihre vorherige Auswahl als Bedingung für diese Aktion verwenden?", "previous_action": "Ihre vorherige Aktion war: ", "element_text": "auf einem Element mit Text " }, "notifications": { "reset_success": "Ausgabevorschau erfolgreich zurückgesetzt" } }, "recording_page": { "loader": { "browser_startup": "Browser wird gestartet...Festhalten" } }, "integration_settings": { "title": "Integrationseinstellungen", "descriptions": { "authenticated_as": "Authentifiziert als: {{email}}" }, "buttons": { "submit": "Absenden", "remove_integration": "Integration entfernen" }, "google": { "title": "Mit Google Sheet integrieren", "descriptions": { "sync_info": "Wenn Sie diese Option aktivieren, werden bei jeder erfolgreichen Ausführung einer Aufgabe durch diesen Roboter die erfassten Daten an Ihr Google Sheet angehängt.", "authenticated_as": "Authentifiziert als: {{email}}" }, "alerts": { "success": { "title": "Google Sheet erfolgreich integriert.", "content": "Jedes Mal, wenn dieser Roboter einen erfolgreichen Lauf erstellt, werden seine erfassten Daten an Ihr Google Sheet {{sheetName}} angehängt. Sie können die Datenaktualisierungen überprüfen", "here": "hier", "note": "Hinweis:", "sync_limitation": "Die vor der Integration mit Google Sheets extrahierten Daten werden nicht im Google Sheet synchronisiert. Nur die nach der Integration extrahierten Daten werden synchronisiert." } }, "buttons": { "authenticate": "Mit Google authentifizieren", "fetch_sheets": "Google Tabellenkalkulationen abrufen", "remove_integration": "Integration entfernen", "submit": "Absenden" }, "fields": { "select_sheet": "Google Sheet auswählen", "selected_sheet": "Ausgewähltes Sheet: {{name}} (ID: {{id}})" }, "errors": { "auth_error": "Fehler bei der Authentifizierung mit Google", "fetch_error": "Fehler beim Abrufen der Tabellenkalkulationsdateien: {{message}}", "update_error": "Fehler beim Aktualisieren der Google Sheet-ID: {{message}}", "remove_error": "Fehler beim Entfernen der Google Sheets-Integration: {{message}}" }, "notifications": { "sheet_selected": "Google Sheet erfolgreich ausgewählt", "integration_removed": "Google Sheets-Integration erfolgreich entfernt" } }, "airtable": { "title": "Mit Airtable integrieren", "descriptions": { "sync_info": "Wenn Sie diese Option aktivieren, werden bei jeder erfolgreichen Ausführung einer Aufgabe durch diesen Roboter die erfassten Daten an Ihre Airtable angehängt.", "authenticated_as": "Erfolgreich bei Airtable authentifiziert. Sie können nun die Basis und den Tisch auswählen, in die Sie integrieren möchten." }, "alerts": { "success": { "title": "Airtable Base erfolgreich integriert", "content": "Jedes Mal, wenn dieser Roboter einen erfolgreichen Lauf erstellt, werden seine erfassten Daten an Ihre {{baseName}}-Basis und Ihre {{tableName}}-Tabelle angehängt. Sie können Updates überprüfen", "here": "hier", "note": "Hinweis:", "sync_limitation": "Nur nach der Integration erfasste Daten werden mit Airtable synchronisiert." } }, "buttons": { "authenticate": "Mit Airtable verbinden", "fetch_bases": "Airtable Basen abrufen", "fetch_tables": "Airtable Tabellen abrufen", "remove_integration": "Integration entfernen", "submit": "Basis und Tabelle auswählen" }, "fields": { "select_base": "Airtable Basis auswählen", "select_table": "Airtable Tabelle auswählen", "selected_base": "Ausgewählte Basis: {{name}}", "selected_table": "Ausgewählte Tabelle: {{name}}" }, "errors": { "auth_error": "Fehler bei der Authentifizierung mit Airtable", "fetch_error": "Fehler beim Abrufen von Airtable-Basen: {{message}}", "fetch_tables_error": "Fehler beim Abrufen von Airtable-Tabellen: {{message}}", "update_error": "Fehler beim Aktualisieren der Airtable-Basis: {{message}}", "remove_error": "Fehler beim Entfernen der Airtable-Integration: {{message}}" }, "notifications": { "base_selected": "Airtable-Basis erfolgreich ausgewählt", "table_selected": "Airtable-Tabelle erfolgreich ausgewählt", "integration_removed": "Airtable-Integration erfolgreich entfernt" } } }, "robot_duplication": { "title": "Roboter duplizieren", "descriptions": { "purpose": "Die Roboter-Duplizierung ist nützlich, um Daten von Seiten mit der gleichen Struktur zu extrahieren.", "example": "Beispiel: Wenn Sie einen Roboter für {{url1}} erstellt haben, können Sie ihn duplizieren, um ähnliche Seiten wie {{url2}} zu durchsuchen, ohne einen Roboter von Grund auf neu zu trainieren.", "warning": "⚠️ Stellen Sie sicher, dass die neue Seite die gleiche Struktur wie die Originalseite hat." }, "fields": { "target_url": "Roboter Ziel-URL" }, "buttons": { "duplicate": "Roboter duplizieren", "cancel": "Abbrechen" }, "notifications": { "robot_not_found": "Roboterdetails konnten nicht gefunden werden. Bitte versuchen Sie es erneut.", "url_required": "Ziel-URL ist erforderlich.", "duplicate_success": "Roboter erfolgreich dupliziert.", "duplicate_error": "Fehler beim Aktualisieren der Ziel-URL. Bitte versuchen Sie es erneut.", "unknown_error": "Beim Aktualisieren der Ziel-URL ist ein Fehler aufgetreten." } }, "robot_settings": { "title": "Roboter-Einstellungen", "target_url": "Roboter-Ziel-URL", "robot_id": "Roboter-ID", "robot_limit": "Roboter-Limit", "created_by_user": "Erstellt von Benutzer", "created_at": "Erstellungsdatum des Roboters", "errors": { "robot_not_found": "Roboterdetails konnten nicht gefunden werden. Bitte versuchen Sie es erneut." }, "buttons": { "close": "Schließen" } }, "robot_edit": { "title": "Roboter bearbeiten", "change_name": "Roboternamen ändern", "robot_limit": "Roboter-Limit", "save": "Änderungen speichern", "cancel": "Abbrechen", "notifications": { "update_success": "Roboter erfolgreich aktualisiert.", "update_failed": "Aktualisierung des Roboters fehlgeschlagen. Bitte erneut versuchen.", "update_error": "Beim Aktualisieren des Roboters ist ein Fehler aufgetreten." } }, "schedule_settings": { "title": "Zeitplan-Einstellungen", "run_every": "Ausführen alle", "start_from": "Beginnen ab", "on_day": "An Tag", "at_around": "Um", "timezone": "Zeitzone", "buttons": { "delete_schedule": "Zeitplan löschen", "save_schedule": "Zeitplan speichern", "cancel": "Abbrechen" }, "labels": { "in_between": "Zwischen", "run_once_every": "Ausführen alle", "start_from_label": "Beginnen ab", "on_day_of_month": "Tag des Monats", "on_day": { "st": ".", "nd": ".", "rd": ".", "th": "." } } }, "main_page": { "notifications": { "interpretation_success": "Interpretation des Roboters {{name}} erfolgreich", "interpretation_failed": "Interpretation des Roboters {{name}} fehlgeschlagen", "run_started": "Roboter wird ausgeführt: {{name}}", "run_start_failed": "Fehler beim Ausführen des Roboters: {{name}}", "schedule_success": "Roboter {{name}} erfolgreich geplant", "schedule_failed": "Planen des Roboters {{name}} fehlgeschlagen", "abort_success": "Interpretation des Roboters {{name}} erfolgreich abgebrochen", "abort_failed": "Abbrechen der Interpretation des Roboters {{name}} fehlgeschlagen", "abort_initiated": "Interpretation des Roboters {{name}} wird abgebrochen" }, "menu": { "recordings": "Roboter", "runs": "Ausführungen", "proxy": "Proxy", "apikey": "API-Schlüssel" } }, "browser_window": { "attribute_modal": { "title": "Attribut auswählen", "notifications": { "list_select_success": "Liste erfolgreich ausgewählt. Wählen Sie die zu extrahierenden Textdaten.", "pagination_select_success": "Paginierungselement erfolgreich ausgewählt." } }, "attribute_options": { "anchor": { "text": "Text: {{text}}", "url": "URL: {{url}}" }, "image": { "alt_text": "Alt-Text: {{altText}}", "image_url": "Bild-URL: {{imageUrl}}" }, "default": { "text": "Text: {{text}}" } } }, "runs_table": { "run_type_chips": { "manual_run": "Manuelle Ausführung", "scheduled_run": "Geplante Ausführung", "api": "API", "unknown_run_type": "Unbekannter Ausführungstyp" }, "run_status_chips": { "success": "Erfolg", "running": "Läuft", "scheduled": "Geplant", "queued": "In Warteschlange", "failed": "Fehlgeschlagen", "aborted": "Abgebrochen" }, "run_settings_modal": { "title": "Ausführungseinstellungen", "labels": { "run_id": "Ausführungs-ID", "run_by_user": "Ausgeführt von Benutzer", "run_by_schedule": "Ausgeführt nach Zeitplan-ID", "run_by_api": "Ausgeführt durch API", "run_type": "Ausführungstyp" } } }, "run_content": { "tabs": { "output_data": "Ausgabedaten", "log": "Protokoll" }, "buttons": { "stop": "Stoppen" }, "loading": "Daten werden geladen...", "empty_output": "Keine Ausgabedaten verfügbar", "captured_data": { "title": "Erfasste Daten", "download_csv": "CSV herunterladen", "view_full": "Vollständige Daten anzeigen", "items": "Elemente", "schema_title": "Erfasste Texte", "list_title": "Erfasste Listen" }, "captured_screenshot": { "title": "Erfasste Screenshots", "download": "Herunterladen", "render_failed": "Fehler beim Rendern des Screenshots" } }, "navbar": { "project_name": "Maxun", "notifications": { "success": { "logout": "Erfolgreich abgemeldet" }, "errors": { "logout": { "unauthorized": "Sie sind nicht berechtigt, diese Aktion durchzuführen", "server": "Serverfehler beim Abmelden", "network": "Netzwerkfehler beim Abmelden", "unknown": "Ein unerwarteter Fehler ist beim Abmelden aufgetreten" } } }, "upgrade": { "button": "Upgrade", "modal": { "up_to_date": "🎉 Du bist auf dem neuesten Stand!", "new_version_available": "Eine neue Version ist verfügbar: {{version}}. Aktualisieren Sie auf die neueste Version für Fehlerkorrekturen, Verbesserungen und neue Funktionen!", "view_updates": "Alle Updates anzeigen", "view_updates_link": "hier", "tabs": { "manual_setup": "Manuelles Setup-Upgrade", "docker_setup": "Docker Compose Setup-Upgrade" } } }, "menu_items": { "logout": "Abmelden", "discord": "Discord", "youtube": "YouTube", "twitter": "Twitter (X)", "language": "Sprache" }, "recording": { "discard": "Verwerfen" } }, "language_menu": { "en": "Englisch", "es": "Spanisch", "ja": "Japanisch", "zh": "Chinesisch", "de": "Deutsch", "tr": "Türkisch" } } ================================================ FILE: public/locales/en.json ================================================ { "login": { "title": "Welcome Back!", "email": "Enter Work Email", "password": "Password", "button": "Login", "loading": "Loading", "register_prompt": "Don't have an account?", "register_link": "Register", "welcome_notification": "Welcome to Maxun!", "validation": { "required_fields": "Email and password are required", "password_length": "Password must be at least 6 characters" }, "error": { "user_not_found": "User does not exist", "invalid_credentials": "Invalid email or password", "server_error": "Login failed. Please try again later", "generic": "An error occurred. Please try again" } }, "register": { "title": "Create An Account", "email": "Enter Work Email", "password": "Password", "button": "Register", "loading": "Loading", "register_prompt": "Already have an account?", "login_link": "Login", "welcome_notification": "Welcome to Maxun!", "validation": { "email_required": "Email is required", "password_requirements": "Password must be at least 6 characters" }, "error": { "user_exists": "User with this email already exists", "creation_failed": "Could not create account", "server_error": "Server error occurred", "generic": "Registration failed. Please try again" } }, "recordingtable":{ "run": "Run", "name": "Name", "schedule": "Schedule", "integrate": "Integrate", "settings": "Settings", "options": "Options", "heading":"My Robots", "new":"Create Robot", "search_criteria": "Try adjusting your search criteria", "placeholder": { "title": "You're All Set to Start", "body": "Robots you create will appear here. Click \"Create Robot\" to get started!", "search": "No robots match your search" }, "modal":{ "title":"Enter the URL", "login_title": "Does this website require logging in?", "label":"URL", "button":"Start Recording" }, "warning_modal":{ "title":"Active Browser Detected", "message": "There is already a browser recording session running. Would you like to discard it and create a new recording?", "discard_and_create":"Discard & Create New", "cancel":"Cancel" }, "retrain": "Retrain", "edit":"Edit", "delete":"Delete", "duplicate":"Duplicate", "search":"Search Robots...", "notifications": { "delete_warning": "The robot has associated runs. First delete runs to delete the robot", "delete_success": "Robot deleted successfully", "auth_success": "Robot successfully authenticated", "browser_limit_warning": "Remote browsers are currently busy. Please wait for a few minutes and try again" } }, "mainmenu":{ "recordings": "Robots", "runs": "Runs", "proxy": "Proxy", "apikey": "API Key", "feedback":"Join Maxun Cloud", "apidocs":"Website To API" }, "runstable":{ "runs":"All Runs", "runStatus":"Status", "runName":"Name", "name":"Name", "startedAt":"Started At", "finishedAt":"Finished At", "delete":"Delete", "settings":"Settings", "search":"Search Runs...", "sort_tooltip": "Click to sort", "placeholder": { "title": "No Runs Found", "body": "This is where all your robot runs will appear. Once a robot is active, its runs will be logged here.", "search":"No runs match your search" }, "notifications": { "no_runs": "No runs found. Please try again.", "delete_success": "Run deleted successfully" } }, "proxy": { "title": "Proxy Configuration", "tab_standard": "Standard Proxy", "tab_rotation": "Automatic Proxy Rotation", "server_url": "Proxy Server URL", "server_url_helper": "Proxy to be used for all robots. HTTP and SOCKS proxies are supported. Example http://myproxy.com:3128 or socks5://myproxy.com:3128. Short form myproxy.com:3128 is considered an HTTP proxy.", "requires_auth": "Requires Authentication?", "username": "Username", "password": "Password", "add_proxy": "Add Proxy", "test_proxy": "Test Proxy", "remove_proxy": "Remove Proxy", "table": { "proxy_url": "Proxy URL", "requires_auth": "Requires Authentication" }, "coming_soon": "Coming Soon - In Open Source (Basic Rotation) & Cloud (Advanced Rotation). If you don't want to manage the infrastructure, join our cloud waitlist to get early access.", "join_waitlist": "Join Maxun Cloud Waitlist", "alert": { "title": "If your proxy requires a username and password, always provide them separate from the proxy URL.", "right_way": "The right way", "wrong_way": "The wrong way", "proxy_url": "Proxy URL:", "username": "Username:", "password": "Password:" }, "notifications": { "config_success": "Proxy configuration submitted successfully", "config_error": "Failed to submit proxy configuration. Try again.", "test_success": "Proxy configuration is working", "test_error": "Failed to test proxy configuration. Try again.", "fetch_success": "Proxy configuration fetched successfully", "remove_success": "Proxy configuration removed successfully", "remove_error": "Failed to remove proxy configuration. Try again." } }, "apikey": { "title": "Manage Your API Key", "default_name": "Maxun API Key", "table": { "name": "API Key Name", "key": "API Key", "actions": "Actions" }, "actions": { "copy": "Copy", "show": "Show", "hide": "Hide", "delete": "Delete" }, "no_key_message": "You haven't generated an API key yet.", "generate_button": "Generate API Key", "notifications": { "fetch_error": "Failed to fetch API Key - {{error}}", "generate_success": "Generated API Key successfully", "generate_error": "Failed to generate API Key - {{error}}", "delete_success": "API Key deleted successfully", "delete_error": "Failed to delete API Key - {{error}}", "copy_success": "Copied API Key successfully" } }, "action_description": { "text": { "title": "Capture Text", "description": "Hover over the texts you want to extract and click to select them" }, "screenshot": { "title": "Capture Screenshot", "description": "Capture a partial or full page screenshot of the current page." }, "list": { "title": "Capture List", "description": "Hover over the list you want to extract. Once selected, you can hover over all texts inside the list you selected. Click to select them." }, "default": { "title": "What data do you want to extract?", "description": "A robot can perform one or multiple actions. Choose from the options listed below." }, "list_stages": { "initial": "Select the list you want to extract along with the texts inside it", "pagination": "Select how the robot can capture the rest of the list", "limit": "Choose the number of items to extract", "complete": "Capture is complete" }, "actions": { "text": "Capture Text", "list": "Capture List", "screenshot": "Capture Screenshot" } }, "right_panel": { "buttons": { "capture_list": "Capture List", "capture_text": "Capture Text", "capture_screenshot": "Capture Screenshot", "confirm": "Confirm", "discard": "Discard", "confirm_capture": "Confirm Capture", "confirm_pagination": "Confirm", "confirm_limit": "Confirm", "confirm_reset": "Confirm", "finish_capture": "Finish Capture", "back": "Back", "reset": "Restart Recording", "finish": "Finish", "cancel": "Cancel", "delete": "Delete" }, "screenshot": { "capture_fullpage": "Capture Fullpage", "capture_visible": "Capture Visible Part", "display_fullpage": "Take Fullpage Screenshot", "display_visible": "Take Visible Part Screenshot" }, "pagination": { "title": "How can we find the next list item on the page?", "click_next": "Click on next to navigate to the next page", "click_load_more": "Click on load more to load more items", "scroll_down": "Scroll down to load more items", "scroll_up": "Scroll up to load more items", "none": "No more items to load" }, "limit": { "title": "What is the maximum number of rows you want to extract?", "custom": "Custom", "enter_number": "Enter number" }, "fields": { "label": "Label", "data": "Data", "field_label": "Field Label", "field_data": "Field Data" }, "messages": { "list_selected": "List Selected Successfully", "list_empty": "List selected. Please select fields inside the list." }, "errors": { "select_pagination": "Please select a pagination type.", "select_pagination_element": "Please select the pagination element first.", "select_limit": "Please select a limit or enter a custom limit.", "invalid_limit": "Please enter a valid limit.", "confirm_text_fields": "Please confirm all text fields", "unable_create_settings": "Unable to create list settings. Make sure you have defined a field for the list.", "capture_text_discarded": "Capture Text Discarded", "capture_list_discarded": "Capture List Discarded", "label_required": "Label cannot be empty", "duplicate_label": "This label already exists. Please use a unique label.", "no_text_captured": "Please highlight and select text elements before confirming.", "capture_list_first": "Please hover over a list and select text fields inside it first", "confirm_all_list_fields": "Please confirm all captured list fields before proceeding" }, "tooltips": { "capture_list_first": "Hover over a list and select text fields inside it first", "confirm_all_list_fields": "Please confirm all captured list fields before proceeding" } }, "save_recording": { "title": "Save Robot", "robot_name": "Name", "buttons": { "save": "Save", "confirm": "Confirm" }, "notifications": { "save_success": "Robot saved successfully", "retrain_success": "Robot retrained successfully", "save_error": "Error saving robot" }, "errors": { "user_not_logged": "User not logged in. Cannot save recording.", "exists_warning": "Robot with this name already exists, please confirm the Robot's overwrite.", "no_actions_performed": "Cannot save robot. Please perform at least one capture action before saving." }, "tooltips": { "saving": "Optimizing and saving the workflow" } }, "browser_recording": { "modal": { "confirm_discard": "Are you sure you want to discard the recording?", "confirm_reset": "Are you sure you want to restart the recording?", "reset_warning": "This will clear all previous captures in the current session. The recording session will restart for the same website." }, "notifications": { "terminated": "Current Recording was terminated", "environment_reset": "Browser environment has been reset", "reset_successful": "Successfully reset all captures and returned to initial state" } }, "interpretation_log": { "titles": { "output_preview": "Output Data Preview", "screenshot": "Screenshot" }, "messages": { "additional_rows": "Additional rows of data will be extracted once you finish recording.", "successful_training": "You've successfully trained the robot to perform actions! Click on the button below to get a preview of the data your robot will extract.", "no_selection": "It looks like you have not selected anything for extraction yet. Once you do, the robot will show a preview of your selections here." }, "data_sections": { "binary_received": "---------- Binary output data received ----------", "serializable_received": "---------- Serializable output data received ----------", "mimetype": "mimetype: ", "image_below": "Image is rendered below:", "separator": "--------------------------------------------------" }, "notifications": { "reset_success": "Output Preview reset successfully" } }, "interpretation_buttons": { "buttons": { "preview": "Get Preview of Output Data", "reset": "Reset", "yes": "Yes", "no": "No" }, "messages": { "extracting": "Extracting data...please wait", "restart_required": "Please restart the interpretation after updating the recording", "run_finished": "Run finished", "run_failed": "Run failed to start" }, "modal": { "use_previous": "Do you want to use your previous selection as a condition for performing this action?", "previous_action": "Your previous action was: ", "element_text": "on an element with text " }, "notifications": { "reset_success": "Output Preview reset successfully" } }, "recording_page": { "loader": { "browser_startup": "Spinning up a browser...Hold tight" } }, "integration_settings": { "title": "Integration Settings", "descriptions": { "authenticated_as": "Authenticated as: {{email}}" }, "buttons": { "submit": "Submit", "remove_integration": "Remove Integration" }, "google": { "title": "Integrate with Google Sheet", "descriptions": { "sync_info": "If you enable this option, every time this robot runs a task successfully, its captured data will be appended to your Google Sheet.", "authenticated_as": "Authenticated as: {{email}}" }, "alerts": { "success": { "title": "Google Sheet Integrated Successfully.", "content": "Every time this robot creates a successful run, its captured data is appended to your {{sheetName}} Google Sheet. You can check the data updates", "here": "here", "note": "Note:", "sync_limitation": "The data extracted before integrating with Google Sheets will not be synced in the Google Sheet. Only the data extracted after the integration will be synced." } }, "buttons": { "authenticate": "Authenticate with Google", "fetch_sheets": "Fetch Google Spreadsheets", "remove_integration": "Remove Integration", "submit": "Submit" }, "fields": { "select_sheet": "Select Google Sheet", "selected_sheet": "Selected Sheet: {{name}} (ID: {{id}})" }, "errors": { "auth_error": "Error authenticating with Google", "fetch_error": "Error fetching spreadsheet files: {{message}}", "update_error": "Error updating Google Sheet ID: {{message}}", "remove_error": "Error removing Google Sheets integration: {{message}}" }, "notifications": { "sheet_selected": "Google Sheet selected successfully", "integration_removed": "Google Sheets integration removed successfully" } }, "airtable": { "title": "Integrate with Airtable", "descriptions": { "sync_info": "If you enable this option, every time this robot runs a task successfully, its captured data will be appended to your Airtable Base.", "authenticated_as": "Authenticated with Airtable successfully. You can now select the base and table to integrate with." }, "alerts": { "success": { "title": "Airtable Base Integrated Successfully", "content": "Every time this robot creates a successful run, its captured data is appended to your {{baseName}} Base and {{tableName}} Table. You can check updates", "here": "here", "note": "Note:", "sync_limitation": "Only data captured after integration will be synced to Airtable." } }, "buttons": { "authenticate": "Connect with Airtable", "fetch_bases": "Fetch Airtable Bases", "fetch_tables": "Fetch Airtable Tables", "remove_integration": "Remove Integration", "submit": "Select Base and Table" }, "fields": { "select_base": "Select Airtable Base", "select_table": "Select Airtable Table", "selected_base": "Selected Base: {{name}}", "selected_table": "Selected Table: {{name}}" }, "errors": { "auth_error": "Error authenticating with Airtable", "fetch_error": "Error fetching Airtable bases: {{message}}", "fetch_tables_error": "Error fetching Airtable tables: {{message}}", "update_error": "Error updating Airtable base: {{message}}", "remove_error": "Error removing Airtable integration: {{message}}" }, "notifications": { "base_selected": "Airtable base selected successfully", "table_selected": "Airtable table selected successfully", "integration_removed": "Airtable integration removed successfully" } } }, "robot_duplication": { "title": "Duplicate Robot", "descriptions": { "purpose": "Robot duplication is useful to extract data from pages with the same structure.", "example": "Example: If you've created a robot for <0>{{url1}}, you can duplicate it to scrape similar pages like <1>{{url2}} without training a robot from scratch.", "warning": "⚠️ Ensure the new page has the same structure as the original page." }, "fields": { "target_url": "Target URL" }, "buttons": { "duplicate": "Duplicate Robot", "cancel": "Cancel" }, "notifications": { "robot_not_found": "Could not find robot details. Please try again.", "url_required": "Target URL is required.", "duplicate_success": "Robot duplicated successfully.", "duplicate_error": "Failed to update the Target URL. Please try again.", "unknown_error": "An error occurred while updating the Target URL." } }, "robot_settings": { "title": "Robot Settings", "target_url": "Target URL", "robot_id": "Robot ID", "robot_limit": "Robot Limit", "created_by_user": "Created By User", "created_at": "Robot Created At", "errors": { "robot_not_found": "Could not find robot details. Please try again." }, "buttons": { "close": "Close" } }, "robot_edit": { "title": "Edit Robot", "change_name": "Name", "robot_limit": "Robot Limit", "save": "Save Changes", "cancel": "Cancel", "notifications": { "update_success": "Robot updated successfully.", "update_failed": "Failed to update the robot. Please try again.", "update_error": "An error occurred while updating the robot." } }, "schedule_settings": { "title": "Schedule Settings", "run_every": "Run every", "start_from": "Start From", "on_day": "On day", "at_around": "At around", "timezone": "Timezone", "buttons": { "delete_schedule": "Delete Schedule", "save_schedule": "Save Schedule", "cancel": "Cancel" }, "labels": { "in_between": "In Between", "run_once_every": "Run once every", "start_from_label": "Start From", "on_day_of_month": "On Day of the Month", "on_day": { "st": "st", "nd": "nd", "rd": "rd", "th": "th" } } }, "main_page": { "notifications": { "interpretation_success": "Interpretation of robot {{name}} succeeded", "interpretation_failed": "Failed to interpret robot {{name}}", "run_started": "Running robot: {{name}}", "run_start_failed": "Failed to run robot: {{name}}", "schedule_success": "Robot {{name}} scheduled successfully", "schedule_failed": "Failed to schedule robot {{name}}", "abort_success": "Interpretation of robot {{name}} aborted successfully", "abort_failed": "Failed to abort the interpretation of robot {{name}}", "abort_initiated": "Aborting the interpretation of robot {{name}}" }, "menu": { "recordings": "Robots", "runs": "Runs", "proxy": "Proxy", "apikey": "API Key" } }, "browser_window": { "attribute_modal": { "title": "Select Attribute", "notifications": { "list_select_success": "List has been successfully selected. Please select the text data to extract.", "pagination_select_success": "Pagination element has been successfully selected." } }, "attribute_options": { "anchor": { "text": "Text: {{text}}", "url": "URL: {{url}}" }, "image": { "alt_text": "Alt Text: {{altText}}", "image_url": "Image URL: {{imageUrl}}" }, "default": { "text": "Text: {{text}}" } } }, "runs_table": { "run_type_chips": { "manual_run": "Manual", "scheduled_run": "Scheduled", "api": "API", "sdk": "SDK", "unknown_run_type": "Unknown Run Type" }, "run_status_chips": { "success": "Success", "running": "Running", "scheduled": "Scheduled", "queued": "Queued", "failed": "Failed", "aborted": "Aborted" }, "run_settings_modal": { "title": "Run Settings", "labels": { "run_id": "Run ID", "run_by_user": "Run by User", "run_by_schedule": "Run by Schedule ID", "run_by_api": "Run by API", "run_type": "Run Type" } } }, "run_content": { "tabs": { "output_data": "Output Data", "log": "Log" }, "buttons": { "stop": "Stop" }, "loading": "Extracting data...", "empty_output": "No output data available", "captured_data": { "title": "Captured Data", "download_csv": "Download CSV", "view_full": "View Full Data", "items": "items", "schema_title": "Captured Texts", "list_title": "Captured Lists" }, "captured_screenshot": { "title": "Captured Screenshots", "download": "Download", "render_failed": "Failed to render screenshot" } }, "navbar": { "project_name": "Maxun", "notifications": { "success": { "logout": "Logged out successfully" }, "errors": { "logout": { "unauthorized": "You are not authorized to perform this action", "server": "Server error occurred during logout", "network": "Network error while logging out", "unknown": "An unexpected error occurred during logout" } } }, "upgrade": { "button": "Upgrade", "modal": { "up_to_date": "🎉 You're up to date!", "new_version_available": "A new version is available: {{version}}. Upgrade to the latest version for bug fixes, enhancements and new features!", "view_updates": "View all the new updates", "view_updates_link": "here", "tabs": { "manual_setup": "Manual Setup Upgrade", "docker_setup": "Docker Compose Setup Upgrade" } } }, "menu_items": { "logout": "Logout", "discord": "Discord", "youtube": "YouTube", "twitter": "Twitter (X)", "language": "Language" }, "recording": { "discard": "Discard" } }, "language_menu": { "en": "English", "es": "Spanish", "ja": "Japanese", "zh": "Chinese", "de": "German", "tr": "Turkish" } } ================================================ FILE: public/locales/es.json ================================================ { "login": { "title": "¡Bienvenido de nuevo!", "email": "Introducir correo electrónico de trabajo", "password": "Contraseña", "button": "Iniciar sesión", "loading": "Cargando", "register_prompt": "¿No tienes una cuenta?", "register_link": "Registrarse", "welcome_notification": "¡Bienvenido a Maxun!", "validation": { "required_fields": "El correo electrónico y la contraseña son obligatorios", "password_length": "La contraseña debe tener al menos 6 caracteres" }, "error": { "user_not_found": "El usuario no existe", "invalid_credentials": "Correo electrónico o contraseña inválidos", "server_error": "Error al iniciar sesión. Por favor, inténtelo de nuevo más tarde", "generic": "Se produjo un error. Por favor, inténtelo de nuevo" } }, "register": { "title": "Crear cuenta", "email": "Introducir correo electrónico de trabajo", "password": "Contraseña", "button": "Registrarse", "loading": "Cargando", "register_prompt": "¿Ya tienes una cuenta?", "login_link": "Iniciar sesión", "welcome_notification": "¡Bienvenido a Maxun!", "validation": { "email_required": "El correo electrónico es obligatorio", "password_requirements": "La contraseña debe tener al menos 6 caracteres" }, "error": { "user_exists": "Ya existe un usuario con este correo electrónico", "creation_failed": "No se pudo crear la cuenta", "server_error": "Ocurrió un error en el servidor", "generic": "Error en el registro. Por favor, inténtelo de nuevo" } }, "recordingtable": { "run": "Ejecutar", "name": "Nombre", "schedule": "Programar", "integrate": "Integrar", "settings": "Ajustes", "options": "Opciones", "heading": "Mis Robots", "new": "Crear Robot", "search_criteria": "Intente ajustar sus criterios de búsqueda", "placeholder": { "title": "Todo listo para empezar", "body": "Los robots que cree aparecerán aquí. ¡Haga clic en \"Crear robot\" para comenzar!", "search": "Ningún robot coincide con su búsqueda" }, "modal": { "title": "Ingresa la URL", "login_title": "¿Este sitio web requiere iniciar sesión?", "label": "URL", "button": "Comenzar grabación" }, "retrain": "Reentrenar", "edit": "Editar", "delete": "Eliminar", "duplicate": "Duplicar", "search": "Buscar robots...", "warning_modal": { "title": "Navegador Activo Detectado", "message": "Ya hay una sesión de grabación del navegador en ejecución. ¿Le gustaría descartarla y crear una nueva grabación?", "discard_and_create": "Descartar y Crear Nueva", "cancel": "Cancelar" }, "notifications": { "delete_warning": "El robot tiene ejecuciones asociadas. Primero elimine las ejecuciones para eliminar el robot", "delete_success": "Robot eliminado exitosamente", "auth_success": "Robot autenticado exitosamente", "browser_limit_warning": "Los navegadores remotos están ocupados actualmente. Por favor, espere unos minutos e inténtelo de nuevo" } }, "mainmenu": { "recordings": "Robots", "runs": "Ejecuciones", "proxy": "Proxy", "apikey": "Clave API", "feedback": "Unirse a Maxun Cloud", "apidocs": "Sitio Web a API" }, "runstable": { "runs": "Todas las ejecuciones", "runStatus": "Estado", "runName": "Nombre", "name": "Nombre", "startedAt": "Iniciado el", "finishedAt": "Finalizado el", "delete": "Eliminar", "settings": "Ajustes", "search": "Buscar ejecuciones...", "sort_tooltip": "Haga clic para ordenar", "placeholder": { "title": "No se encontraron ejecuciones", "body": "Aquí aparecerán todas las ejecuciones de sus robots. Una vez que un robot esté activo, sus ejecuciones se registrarán aquí.", "search": "Ninguna ejecución coincide con su búsqueda" }, "notifications": { "no_runs": "No se encontraron ejecuciones. Por favor, inténtelo de nuevo.", "delete_success": "Ejecución eliminada con éxito" } }, "proxy": { "title": "Configuración del Proxy", "tab_standard": "Proxy Estándar", "tab_rotation": "Rotación Automática de Proxy", "server_url": "URL del Servidor Proxy", "server_url_helper": "Proxy para usar en todos los robots. Se admiten proxies HTTP y SOCKS. Ejemplo http://myproxy.com:3128 o socks5://myproxy.com:3128. La forma corta myproxy.com:3128 se considera un proxy HTTP.", "requires_auth": "¿Requiere Autenticación?", "username": "Usuario", "password": "Contraseña", "add_proxy": "Agregar Proxy", "test_proxy": "Probar Proxy", "remove_proxy": "Eliminar Proxy", "table": { "proxy_url": "URL del Proxy", "requires_auth": "Requiere Autenticación" }, "coming_soon": "Próximamente - En Open Source (Rotación Básica) y Cloud (Rotación Avanzada). Si no desea administrar la infraestructura, únase a nuestra lista de espera en la nube para obtener acceso anticipado.", "join_waitlist": "Unirse a la Lista de Espera de Maxun Cloud", "alert": { "title": "Si su proxy requiere un nombre de usuario y contraseña, proporcione siempre estos datos por separado de la URL del proxy.", "right_way": "La forma correcta", "wrong_way": "La forma incorrecta", "proxy_url": "URL del Proxy:", "username": "Usuario:", "password": "Contraseña:" }, "notifications": { "config_success": "Configuración del proxy enviada con éxito", "config_error": "Error al enviar la configuración del proxy. Inténtelo de nuevo.", "test_success": "La configuración del proxy funciona correctamente", "test_error": "Error al probar la configuración del proxy. Inténtelo de nuevo.", "fetch_success": "Configuración del proxy recuperada con éxito", "remove_success": "Configuración del proxy eliminada con éxito", "remove_error": "Error al eliminar la configuración del proxy. Inténtelo de nuevo." } }, "apikey": { "title": "Gestionar tu Clave API", "default_name": "Clave API de Maxun", "table": { "name": "Nombre de la Clave API", "key": "Clave API", "actions": "Acciones" }, "actions": { "copy": "Copiar", "show": "Mostrar", "hide": "Ocultar", "delete": "Eliminar" }, "no_key_message": "Aún no has generado una clave API.", "generate_button": "Generar Clave API", "notifications": { "fetch_error": "Error al obtener la clave API - {{error}}", "generate_success": "Clave API generada con éxito", "generate_error": "Error al generar la clave API - {{error}}", "delete_success": "Clave API eliminada con éxito", "delete_error": "Error al eliminar la clave API - {{error}}", "copy_success": "Clave API copiada con éxito" } }, "action_description": { "text": { "title": "Capturar Texto", "description": "Pase el cursor sobre los textos que desea extraer y haga clic para seleccionarlos" }, "screenshot": { "title": "Capturar Pantalla", "description": "Capture una captura de pantalla parcial o completa de la página actual." }, "list": { "title": "Capturar Lista", "description": "Pase el cursor sobre la lista que desea extraer. Una vez seleccionada, puede pasar el cursor sobre todos los textos dentro de la lista seleccionada. Haga clic para seleccionarlos." }, "default": { "title": "¿Qué datos desea extraer?", "description": "Un robot puede realizar una o varias acciones. Elija entre las opciones que aparecen a continuación." }, "list_stages": { "initial": "Seleccione la lista que desea extraer junto con los textos que contiene", "pagination": "Seleccione cómo puede el robot capturar el resto de la lista", "limit": "Elija el número de elementos a extraer", "complete": "Captura completada" }, "actions": { "text": "Capturar Texto", "list": "Capturar Lista", "screenshot": "Capturar Pantalla" } }, "right_panel": { "buttons": { "capture_list": "Capturar Lista", "capture_text": "Capturar Texto", "capture_screenshot": "Capturar Pantalla", "confirm": "Confirmar", "discard": "Descartar", "confirm_capture": "Confirmar Captura", "confirm_pagination": "Confirmar", "confirm_limit": "Confirmar", "confirm_reset": "Confirmar", "finish_capture": "Finalizar Captura", "back": "Atrás", "reset": "Reiniciar", "finish": "Finalizar", "cancel": "Cancelar", "delete": "Eliminar" }, "screenshot": { "capture_fullpage": "Capturar Página Completa", "capture_visible": "Capturar Parte Visible", "display_fullpage": "Capturar Screenshot de Página Completa", "display_visible": "Capturar Screenshot de Parte Visible" }, "pagination": { "title": "¿Cómo podemos encontrar el siguiente elemento de la lista en la página?", "click_next": "Hacer clic en siguiente para navegar a la siguiente página", "click_load_more": "Hacer clic en cargar más para cargar más elementos", "scroll_down": "Desplazarse hacia abajo para cargar más elementos", "scroll_up": "Desplazarse hacia arriba para cargar más elementos", "none": "No hay más elementos para cargar" }, "limit": { "title": "¿Cuál es el número máximo de filas que desea extraer?", "custom": "Personalizado", "enter_number": "Ingrese número" }, "fields": { "label": "Etiqueta", "data": "Datos", "field_label": "Etiqueta del Campo", "field_data": "Datos del Campo" }, "messages": { "list_selected": "Lista seleccionada exitosamente", "list_empty": "Lista seleccionada. Seleccione los campos dentro de la lista." }, "errors": { "select_pagination": "Por favor seleccione un tipo de paginación.", "select_pagination_element": "Por favor seleccione primero el elemento de paginación.", "select_limit": "Por favor seleccione un límite o ingrese un límite personalizado.", "invalid_limit": "Por favor ingrese un límite válido.", "confirm_text_fields": "Por favor confirme todos los campos de texto", "unable_create_settings": "No se pueden crear las configuraciones de la lista. Asegúrese de haber definido un campo para la lista.", "capture_text_discarded": "Captura de texto descartada", "capture_list_discarded": "Captura de lista descartada", "label_required": "La etiqueta no puede estar vacía", "duplicate_label": "Esta etiqueta ya existe. Por favor use una etiqueta única.", "no_text_captured": "Por favor resalte y seleccione elementos de texto antes de confirmar.", "capture_list_first": "Por favor posicione el cursor sobre una lista y seleccione campos de texto dentro de ella primero", "confirm_all_list_fields": "Por favor confirme todos los campos de lista capturados antes de continuar" }, "tooltips": { "capture_list_first": "Posicione el cursor sobre una lista y seleccione campos de texto dentro de ella primero", "confirm_all_list_fields": "Por favor confirme todos los campos de lista capturados antes de continuar" } }, "save_recording": { "title": "Guardar Robot", "robot_name": "Nombre del Robot", "buttons": { "save": "Guardar", "confirm": "Confirmar" }, "notifications": { "save_success": "Robot guardado correctamente", "retrain_success": "Robot reentrenado correctamente", "save_error": "Error al guardar el robot" }, "errors": { "user_not_logged": "Usuario no conectado. No se puede guardar la grabación.", "exists_warning": "Ya existe un robot con este nombre, por favor confirme la sobrescritura del robot.", "no_actions_performed": "No se puede guardar el robot. Por favor realice al menos una acción de captura antes de guardar." }, "tooltips": { "saving": "Optimizando y guardando el flujo de trabajo" } }, "browser_recording": { "modal": { "confirm_discard": "¿Está seguro de que desea descartar la grabación?", "confirm_reset": "¿Está seguro de que desea reiniciar?", "reset_warning": "Esto borrará todas las capturas anteriores de la sesión actual. La sesión de grabación se reiniciará para el mismo sitio web." }, "notifications": { "terminated": "La grabación actual fue terminada", "environment_reset": "El entorno del navegador ha sido reiniciado", "reset_successful": "Se reiniciaron correctamente todas las capturas y se volvió al estado inicial" } }, "interpretation_buttons": { "buttons": { "preview": "Obtener Vista Previa de Datos de Salida", "reset": "Restablecer", "yes": "Sí", "no": "No" }, "messages": { "extracting": "Extrayendo datos...espere", "restart_required": "Por favor, reinicie la interpretación después de actualizar la grabación", "run_finished": "Ejecución finalizada", "run_failed": "Error al iniciar la ejecución" }, "modal": { "use_previous": "¿Desea usar su selección anterior como condición para realizar esta acción?", "previous_action": "Su acción anterior fue: ", "element_text": "en un elemento con texto " }, "notifications": { "reset_success": "Vista previa restablecida correctamente" } }, "interpretation_log": { "titles": { "output_preview": "Vista Previa de Datos de Salida", "screenshot": "Captura de pantalla" }, "messages": { "additional_rows": "Se extraerán filas adicionales de datos una vez que termine la grabación.", "successful_training": "¡Has entrenado exitosamente al robot para realizar acciones! Haz clic en el botón de abajo para obtener una vista previa de los datos que tu robot extraerá.", "no_selection": "Parece que aún no has seleccionado nada para extraer. Una vez que lo hagas, el robot mostrará una vista previa de tus selecciones aquí." }, "data_sections": { "binary_received": "---------- Datos binarios de salida recibidos ----------", "serializable_received": "---------- Datos serializables de salida recibidos ----------", "mimetype": "tipo MIME: ", "image_below": "La imagen se muestra a continuación:", "separator": "--------------------------------------------------" }, "notifications": { "reset_success": "Vista previa restablecida correctamente" } }, "recording_page": { "loader": { "browser_startup": "Iniciando el navegador...Mantener apretado" } }, "integration_settings": { "title": "Ajustes de Integración", "descriptions": { "authenticated_as": "Autenticado como: {{email}}" }, "buttons": { "submit": "Enviar", "remove_integration": "Eliminar Integración" }, "google": { "title": "Integrar con Google Sheet", "descriptions": { "sync_info": "Si habilita esta opción, cada vez que este robot ejecute una tarea correctamente, sus datos capturados se añadirán a su Google Sheet.", "authenticated_as": "Autenticado como: {{email}}" }, "alerts": { "success": { "title": "Google Sheet Integrado Correctamente.", "content": "Cada vez que este robot crea una ejecución exitosa, sus datos capturados se añaden a su Google Sheet {{sheetName}}. Puede comprobar las actualizaciones de datos", "here": "aquí", "note": "Nota:", "sync_limitation": "Los datos extraídos antes de la integración con Google Sheets no se sincronizarán en la Google Sheet. Solo los datos extraídos después de la integración se sincronizarán." } }, "buttons": { "authenticate": "Autenticar con Google", "fetch_sheets": "Obtener Hojas de Cálculo de Google", "remove_integration": "Eliminar Integración", "submit": "Enviar" }, "fields": { "select_sheet": "Seleccionar Google Sheet", "selected_sheet": "Hoja Seleccionada: {{name}} (ID: {{id}})" }, "errors": { "auth_error": "Error al autenticar con Google", "fetch_error": "Error al obtener archivos de hojas de cálculo: {{message}}", "update_error": "Error al actualizar ID de Google Sheet: {{message}}", "remove_error": "Error al eliminar la integración de Google Sheets: {{message}}" }, "notifications": { "sheet_selected": "Google Sheet seleccionado correctamente", "integration_removed": "Integración de Google Sheets eliminada correctamente" } }, "airtable": { "title": "Integrar con Airtable", "descriptions": { "sync_info": "Si habilita esta opción, cada vez que este robot ejecute una tarea correctamente, sus datos capturados se añadirán a su Airtable.", "authenticated_as": "Autenticado con Airtable exitosamente. Ahora puede seleccionar la base y la mesa con las que desea integrar." }, "alerts": { "success": { "title": "Base Airtable integrada con éxito", "content": "Cada vez que este robot crea una ejecución exitosa, los datos capturados se agregan a su base {{baseName}} y a su tabla {{tableName}}. Puedes consultar actualizaciones", "here": "aquí", "note": "Nota:", "sync_limitation": "Solo los datos capturados después de la integración se sincronizarán con Airtable." } }, "buttons": { "authenticate": "Conectar con Airtable", "fetch_bases": "Obtener Bases de Airtable", "fetch_tables": "Obtener Tablas de Airtable", "remove_integration": "Eliminar Integración", "submit": "Seleccionar Base y Tabla" }, "fields": { "select_base": "Seleccionar Base de Airtable", "select_table": "Seleccionar Tabla de Airtable", "selected_base": "Base Seleccionada: {{name}}", "selected_table": "Tabla Seleccionada: {{name}}" }, "errors": { "auth_error": "Error al autenticar con Airtable", "fetch_error": "Error al obtener bases de Airtable: {{message}}", "fetch_tables_error": "Error al obtener tablas de Airtable: {{message}}", "update_error": "Error al actualizar base de Airtable: {{message}}", "remove_error": "Error al eliminar la integración de Airtable: {{message}}" }, "notifications": { "base_selected": "Base de Airtable seleccionada correctamente", "table_selected": "Tabla de Airtable seleccionada correctamente", "integration_removed": "Integración de Airtable eliminada correctamente" } } }, "robot_duplication": { "title": "Duplicar Robot", "descriptions": { "purpose": "La duplicación de robots es útil para extraer datos de páginas con la misma estructura.", "example": "Ejemplo: Si has creado un robot para {{url1}}, puedes duplicarlo para extraer páginas similares como {{url2}} sin tener que entrenar un robot desde cero.", "warning": "⚠️ Asegúrate de que la nueva página tenga la misma estructura que la página original." }, "fields": { "target_url": "URL Destino del Robot" }, "buttons": { "duplicate": "Duplicar Robot", "cancel": "Cancelar" }, "notifications": { "robot_not_found": "No se pudieron encontrar los detalles del robot. Por favor, inténtalo de nuevo.", "url_required": "Se requiere la URL de destino.", "duplicate_success": "Robot duplicado con éxito.", "duplicate_error": "Error al actualizar la URL de destino. Por favor, inténtalo de nuevo.", "unknown_error": "Ocurrió un error al actualizar la URL de destino." } }, "robot_settings": { "title": "Configuración del Robot", "target_url": "URL de Destino del Robot", "robot_id": "ID del Robot", "robot_limit": "Límite del Robot", "created_by_user": "Creado por Usuario", "created_at": "Fecha de Creación del Robot", "errors": { "robot_not_found": "No se pudieron encontrar los detalles del robot. Inténtelo de nuevo." }, "buttons": { "close": "Cerrar" } }, "robot_edit": { "title": "Editar Robot", "change_name": "Cambiar Nombre del Robot", "robot_limit": "Límite del Robot", "save": "Guardar Cambios", "cancel": "Cancelar", "notifications": { "update_success": "Robot actualizado exitosamente.", "update_failed": "Error al actualizar el robot. Intente de nuevo.", "update_error": "Ocurrió un error al actualizar el robot." } }, "schedule_settings": { "title": "Configuración de Programación", "run_every": "Ejecutar cada", "start_from": "Iniciar desde", "on_day": "En día", "at_around": "Alrededor de", "timezone": "Zona horaria", "buttons": { "delete_schedule": "Eliminar Programación", "save_schedule": "Guardar Programación", "cancel": "Cancelar" }, "labels": { "in_between": "Entre", "run_once_every": "Ejecutar cada", "start_from_label": "Iniciar desde", "on_day_of_month": "Día del mes", "on_day": { "st": "º", "nd": "º", "rd": "º", "th": "º" } } }, "main_page": { "notifications": { "interpretation_success": "Interpretación del robot {{name}} completada con éxito", "interpretation_failed": "Error al interpretar el robot {{name}}", "run_started": "Ejecutando robot: {{name}}", "run_start_failed": "Error al ejecutar el robot: {{name}}", "schedule_success": "Robot {{name}} programado exitosamente", "schedule_failed": "Error al programar el robot {{name}}", "abort_success": "Interpretación del robot {{name}} abortada exitosamente", "abort_failed": "Error al abortar la interpretación del robot {{name}}", "abort_initiated": "Cancelando la interpretación del robot {{name}}" }, "menu": { "recordings": "Robots", "runs": "Ejecuciones", "proxy": "Proxy", "apikey": "Clave API" } }, "browser_window": { "attribute_modal": { "title": "Seleccionar Atributo", "notifications": { "list_select_success": "Lista seleccionada correctamente. Seleccione los datos de texto para extracción.", "pagination_select_success": "Elemento de paginación seleccionado correctamente." } }, "attribute_options": { "anchor": { "text": "Texto: {{text}}", "url": "URL: {{url}}" }, "image": { "alt_text": "Texto Alt: {{altText}}", "image_url": "URL de Imagen: {{imageUrl}}" }, "default": { "text": "Texto: {{text}}" } } }, "runs_table": { "run_type_chips": { "manual_run": "Ejecución Manual", "scheduled_run": "Ejecución Programada", "api": "API", "unknown_run_type": "Tipo de Ejecución Desconocido" }, "run_status_chips": { "success": "Éxito", "running": "Ejecutando", "scheduled": "Programado", "queued": "En cola", "failed": "Fallido", "aborted": "Abortado" }, "run_settings_modal": { "title": "Configuración de Ejecución", "labels": { "run_id": "ID de Ejecución", "run_by_user": "Ejecutado por Usuario", "run_by_schedule": "Ejecutado por ID de Programación", "run_by_api": "Ejecutado por API", "run_type": "Tipo de Ejecución" } } }, "run_content": { "tabs": { "output_data": "Datos de Salida", "log": "Registro" }, "buttons": { "stop": "Detener" }, "loading": "Cargando datos...", "empty_output": "No hay datos de salida disponibles", "captured_data": { "title": "Datos capturados", "download_csv": "Descargar CSV", "view_full": "Ver datos completos", "items": "elementos", "schema_title": "Textos capturados", "list_title": "Listas capturadas" }, "captured_screenshot": { "title": "Capturas de pantalla", "download": "Descargar", "render_failed": "Error al renderizar la captura de pantalla" } }, "navbar": { "project_name": "Maxun", "notifications": { "success": { "logout": "Sesión cerrada exitosamente" }, "errors": { "logout": { "unauthorized": "No estás autorizado para realizar esta acción", "server": "Error del servidor durante el cierre de sesión", "network": "Error de red al cerrar sesión", "unknown": "Ocurrió un error inesperado al cerrar sesión" } } }, "upgrade": { "button": "Actualizar", "modal": { "up_to_date": "¡Estás actualizado!", "new_version_available": "Hay una nueva versión disponible: {{version}}. ¡Actualice a la última versión para correcciones de errores, mejoras y nuevas características!", "view_updates": "Ver todas las actualizaciones", "view_updates_link": "aquí", "tabs": { "manual_setup": "Actualización de Configuración Manual", "docker_setup": "Actualización de Configuración Docker Compose" } } }, "menu_items": { "logout": "Cerrar sesión", "discord": "Discord", "youtube": "YouTube", "twitter": "Twitter (X)", "language": "Idioma" }, "recording": { "discard": "Descartar" } }, "language_menu": { "en": "Inglés", "es": "Español", "ja": "Japonés", "zh": "Chino", "de": "Alemán", "tr": "Turco" } } ================================================ FILE: public/locales/ja.json ================================================ { "login": { "title": "お帰りなさい!", "email": "勤務先メールアドレスを入力", "password": "パスワード", "button": "ログイン", "loading": "読み込み中", "register_prompt": "アカウントをお持ちでないですか?", "register_link": "登録する", "welcome_notification": "Maxunへようこそ!", "validation": { "required_fields": "メールアドレスとパスワードは必須です", "password_length": "パスワードは6文字以上である必要があります" }, "error": { "user_not_found": "ユーザーが存在しません", "invalid_credentials": "メールアドレスまたはパスワードが無効です", "server_error": "ログインに失敗しました。後でもう一度お試しください", "generic": "エラーが発生しました。もう一度お試しください" } }, "register": { "title": "アカウントを登録する", "email": "勤務先メールアドレスを入力", "password": "パスワード", "button": "登録する", "loading": "読み込み中", "register_prompt": "既にアカウントをお持ちですか?", "login_link": "ログイン", "welcome_notification": "Maxunへようこそ!", "validation": { "email_required": "メールアドレスは必須です", "password_requirements": "パスワードは6文字以上である必要があります" }, "error": { "user_exists": "このメールアドレスは既に登録されています", "creation_failed": "アカウントを作成できませんでした", "server_error": "サーバーエラーが発生しました", "generic": "登録に失敗しました。もう一度お試しください" } }, "recordingtable": { "run": "実行", "name": "名前", "schedule": "スケジュール", "integrate": "統合", "settings": "設定", "options": "オプション", "heading": "私のロボット", "new": "ロボットを作成", "search_criteria": "検索条件を調整してみてください", "placeholder": { "title": "始める準備ができました", "body": "作成したロボットはここに表示されます。「ロボットを作成」をクリックして始めましょう!", "search": "検索に一致するロボットはありません" }, "modal": { "title": "URLを入力してください", "login_title": "このサイトはログインが必要ですか?", "label": "URL", "button": "録画を開始" }, "retrain": "再学習", "edit": "編集", "delete": "削除", "duplicate": "複製", "search": "ロボットを検索...", "warning_modal": { "title": "アクティブなブラウザが検出されました", "message": "既にブラウザ録画セッションが実行されています。破棄して新しい録画を作成しますか?", "discard_and_create": "破棄して新規作成", "cancel": "キャンセル" }, "notifications": { "delete_warning": "ロボットには関連する実行があります。ロボットを削除するには、まず実行を削除してください", "delete_success": "ロボットが正常に削除されました", "auth_success": "ロボットの認証に成功しました", "browser_limit_warning": "リモートブラウザは現在ビジー状態です。数分お待ちいただいてから再度お試しください" } }, "mainmenu": { "recordings": "ロボット", "runs": "実行", "proxy": "プロキシ", "apikey": "APIキー", "feedback": "Maxunクラウドに参加する", "apidocs": "WebサイトからAPI" }, "runstable": { "runs": "すべての実行", "runStatus": "ステータス", "runName": "名前", "name": "名前", "startedAt": "開始日時", "finishedAt": "終了日時", "delete": "削除", "settings": "設定", "search": "実行を検索...", "sort_tooltip": "クリックして並べ替え", "placeholder": { "title": "実行が見つかりません", "body": "すべてのロボットの実行はここに表示されます。ロボットがアクティブになると、その実行はここに記録されます。", "search": "検索に一致する実行はありません" }, "notifications": { "no_runs": "実行が見つかりません。もう一度お試しください。", "delete_success": "実行が正常に削除されました" } }, "proxy": { "title": "プロキシ設定", "tab_standard": "標準プロキシ", "tab_rotation": "自動プロキシローテーション", "server_url": "プロキシサーバーURL", "server_url_helper": "すべてのロボットで使用するプロキシ。HTTPとSOCKSプロキシがサポートされています。例:http://myproxy.com:3128 または socks5://myproxy.com:3128。短縮形 myproxy.com:3128 はHTTPプロキシとして扱われます。", "requires_auth": "認証が必要ですか?", "username": "ユーザー名", "password": "パスワード", "add_proxy": "プロキシを追加", "test_proxy": "プロキシをテスト", "remove_proxy": "プロキシを削除", "table": { "proxy_url": "プロキシURL", "requires_auth": "認証が必要" }, "coming_soon": "近日公開 - オープンソース(基本ローテーション)とクラウド(高度なローテーション)。インフラストラクチャを管理したくない場合は、クラウドの待機リストに参加して早期アクセスを取得してください。", "join_waitlist": "Maxun Cloud待機リストに参加", "alert": { "title": "プロキシにユーザー名とパスワードが必要な場合は、必ずプロキシURLとは別に指定してください。", "right_way": "正しい方法", "wrong_way": "間違った方法", "proxy_url": "プロキシURL:", "username": "ユーザー名:", "password": "パスワード:" }, "notifications": { "config_success": "プロキシ設定が正常に送信されました", "config_error": "プロキシ設定の送信に失敗しました。もう一度お試しください。", "test_success": "プロキシ設定は正常に動作しています", "test_error": "プロキシ設定のテストに失敗しました。もう一度お試しください。", "fetch_success": "プロキシ設定の取得に成功しました", "remove_success": "プロキシ設定が正常に削除されました", "remove_error": "プロキシ設定の削除に失敗しました。もう一度お試しください。" } }, "apikey": { "title": "APIキーの管理", "default_name": "Maxun APIキー", "table": { "name": "APIキー名", "key": "APIキー", "actions": "アクション" }, "actions": { "copy": "コピー", "show": "表示", "hide": "非表示", "delete": "削除" }, "no_key_message": "APIキーはまだ生成されていません。", "generate_button": "APIキーを生成", "notifications": { "fetch_error": "APIキーの取得に失敗しました - {{error}}", "generate_success": "APIキーの生成に成功しました", "generate_error": "APIキーの生成に失敗しました - {{error}}", "delete_success": "APIキーの削除に成功しました", "delete_error": "APIキーの削除に失敗しました - {{error}}", "copy_success": "APIキーのコピーに成功しました" } }, "action_description": { "text": { "title": "テキストを取得", "description": "抽出したいテキストにカーソルを合わせ、クリックして選択してください" }, "screenshot": { "title": "スクリーンショットを取得", "description": "現在のページの部分的または全体のスクリーンショットを取得します。" }, "list": { "title": "リストを取得", "description": "抽出したいリストにカーソルを合わせてください。選択後、選択したリスト内のすべてのテキストにカーソルを合わせることができます。クリックして選択してください。" }, "default": { "title": "どのデータを抽出しますか?", "description": "ロボットは1つまたは複数のアクションを実行できます。以下のオプションから選択してください。" }, "list_stages": { "initial": "抽出したいリストとその中のテキストを選択してください", "pagination": "ロボットがリストの残りをどのように取得するか選択してください", "limit": "抽出するアイテムの数を選択してください", "complete": "取得が完了しました" }, "actions": { "text": "テキストを取得", "list": "リストを取得", "screenshot": "スクリーンショットを取得" } }, "right_panel": { "buttons": { "capture_list": "リストを取得", "capture_text": "テキストを取得", "capture_screenshot": "スクリーンショットを取得", "confirm": "確認", "discard": "破棄", "confirm_capture": "取得を確認", "confirm_pagination": "確認", "confirm_limit": "確認", "confirm_reset": "確認", "finish_capture": "取得を完了", "back": "戻る", "reset": "リセット", "finish": "完了", "cancel": "キャンセル", "delete": "削除" }, "screenshot": { "capture_fullpage": "フルページを取得", "capture_visible": "表示部分を取得", "display_fullpage": "フルページスクリーンショットを撮影", "display_visible": "表示部分のスクリーンショットを撮影" }, "pagination": { "title": "次のリスト項目をページ上でどのように見つけますか?", "click_next": "次へをクリックして次のページへ移動", "click_load_more": "もっと読み込むをクリックして項目を追加", "scroll_down": "下にスクロールして項目を追加", "scroll_up": "上にスクロールして項目を追加", "none": "これ以上読み込む項目はありません" }, "limit": { "title": "抽出する最大行数はいくつですか?", "custom": "カスタム", "enter_number": "数値を入力" }, "fields": { "label": "ラベル", "data": "データ", "field_label": "フィールドラベル", "field_data": "フィールドデータ" }, "messages": { "list_selected": "リストが正常に選択されました", "list_empty": "リストが選択されました。リスト内のフィールドを選択してください。" }, "errors": { "select_pagination": "ページネーションタイプを選択してください。", "select_pagination_element": "まずページネーション要素を選択してください。", "select_limit": "制限を選択するかカスタム制限を入力してください。", "invalid_limit": "有効な制限を入力してください。", "confirm_text_fields": "すべてのテキストフィールドを確認してください", "unable_create_settings": "リスト設定を作成できません。リストのフィールドを定義したことを確認してください。", "capture_text_discarded": "テキスト取得が破棄されました", "capture_list_discarded": "リスト取得が破棄されました", "label_required": "ラベルは空にできません", "duplicate_label": "このラベルは既に存在します。固有のラベルを使用してください。", "no_text_captured": "確認する前にテキスト要素をハイライトして選択してください。", "capture_list_first": "まずリストの上にカーソルを置き、その中のテキストフィールドを選択してください", "confirm_all_list_fields": "続行する前にすべてのキャプチャされたリストフィールドを確認してください" }, "tooltips": { "capture_list_first": "リストの上にカーソルを置き、その中のテキストフィールドを選択してください", "confirm_all_list_fields": "すべてのキャプチャされたリストフィールドを確認してください" } }, "save_recording": { "title": "ロボットを保存", "robot_name": "ロボット名", "buttons": { "save": "保存", "confirm": "確認" }, "notifications": { "save_success": "ロボットの保存に成功しました", "retrain_success": "ロボットの再トレーニングに成功しました", "save_error": "ロボットの保存中にエラーが発生しました" }, "errors": { "user_not_logged": "ユーザーがログインしていません。録画を保存できません。", "exists_warning": "この名前のロボットは既に存在します。ロボットの上書きを確認してください。", "no_actions_performed": "ロボットを保存できません。保存する前に少なくとも1つのキャプチャアクションを実行してください。" }, "tooltips": { "saving": "ワークフローを最適化して保存中" } }, "browser_recording": { "modal": { "confirm_discard": "録画を破棄してもよろしいですか?", "confirm_reset": "リセットしてもよろしいですか?", "reset_warning": "これにより、現在のセッションの以前のキャプチャがすべてクリアされます。同じ Web サイトの記録セッションが再開されます。" }, "notifications": { "terminated": "現在の録画は終了しました", "environment_reset": "ブラウザー環境がリセットされました", "reset_successful": "すべてのキャプチャーを正常にリセットし、初期状態に戻りました" } }, "interpretation_log": { "titles": { "output_preview": "出力データプレビュー", "screenshot": "スクリーンショット" }, "messages": { "additional_rows": "記録が完了すると、追加のデータ行が抽出されます。", "successful_training": "ロボットのアクショントレーニングが成功しました!下のボタンをクリックすると、ロボットが抽出するデータのプレビューが表示されます。", "no_selection": "まだ抽出対象が選択されていません。選択すると、ロボットがここで選択内容のプレビューを表示します。" }, "data_sections": { "binary_received": "---------- バイナリ出力データを受信 ----------", "serializable_received": "---------- シリアライズ可能な出力データを受信 ----------", "mimetype": "MIMEタイプ: ", "image_below": "画像は以下に表示されます:", "separator": "--------------------------------------------------" }, "notifications": { "reset_success": "出力プレビューが正常にリセットされました" } }, "interpretation_buttons": { "buttons": { "preview": "出力データのプレビューを取得", "reset": "リセット", "yes": "はい", "no": "いいえ" }, "messages": { "extracting": "データを抽出しています...お待ちください", "restart_required": "録画を更新した後、解釈を再起動してください", "run_finished": "実行完了", "run_failed": "実行の開始に失敗しました" }, "modal": { "use_previous": "この操作の条件として前回の選択を使用しますか?", "previous_action": "前回の操作: ", "element_text": "テキスト要素 " }, "notifications": { "reset_success": "出力プレビューが正常にリセットされました" } }, "recording_page": { "loader": { "browser_startup": "ブラウザを起動中...しっかり握って" } }, "integration_settings": { "title": "連携設定", "descriptions": { "authenticated_as": "認証済み: {{email}}" }, "buttons": { "submit": "送信", "remove_integration": "連携を解除" }, "google": { "title": "Google シートと連携", "descriptions": { "sync_info": "このオプションを有効にすると、このロボットがタスクを正常に実行するたびに、取得したデータがGoogle シートに追加されます。", "authenticated_as": "認証済み: {{email}}" }, "alerts": { "success": { "title": "Google シートの連携に成功しました。", "content": "このロボットが正常に実行されるたびに、取得したデータが{{sheetName}}という名前のGoogle シートに追加されます。データの更新を確認できます", "here": "こちら", "note": "注意:", "sync_limitation": "Google シートとの連携前に抽出されたデータはGoogle シートに同期されません。連携後に抽出されたデータのみが同期されます。" } }, "buttons": { "authenticate": "Googleで認証", "fetch_sheets": "Google スプレッドシートを取得", "remove_integration": "連携を解除", "submit": "送信" }, "fields": { "select_sheet": "Google シートを選択", "selected_sheet": "選択したシート: {{name}} (ID: {{id}})" }, "errors": { "auth_error": "Googleでの認証エラー", "fetch_error": "スプレッドシートファイルの取得エラー: {{message}}", "update_error": "Google シートIDの更新エラー: {{message}}", "remove_error": "Google シート連携の解除エラー: {{message}}" }, "notifications": { "sheet_selected": "Google シートが正常に選択されました", "integration_removed": "Google シートの連携が正常に解除されました" } }, "airtable": { "title": "Airtableと連携", "descriptions": { "sync_info": "このオプションを有効にすると、このロボットがタスクを正常に実行するたびに、取得したデータがAirtableに追加されます。", "authenticated_as": "Airtableで認証に成功しました。統合するベースとテーブルを選択できるようになりました。" }, "alerts": { "success": { "title": "Airtable ベースの統合に成功", "content": "このロボットが正常な実行を作成するたびに、キャプチャされたデータが {{baseName}} ベースと {{tableName}} テーブルに追加されます。更新情報を確認できます", "here": "こちら", "note": "注意:", "sync_limitation": "連携後に取得されたデータのみがAirtableに同期されます。" } }, "buttons": { "authenticate": "Airtableと接続", "fetch_bases": "Airtableベースを取得", "fetch_tables": "Airtableテーブルを取得", "remove_integration": "連携を解除", "submit": "ベースとテーブルを選択" }, "fields": { "select_base": "Airtableベースを選択", "select_table": "Airtableテーブルを選択", "selected_base": "選択したベース: {{name}}", "selected_table": "選択したテーブル: {{name}}" }, "errors": { "auth_error": "Airtableでの認証エラー", "fetch_error": "Airtableベースの取得エラー: {{message}}", "fetch_tables_error": "Airtableテーブルの取得エラー: {{message}}", "update_error": "Airtableベースの更新エラー: {{message}}", "remove_error": "Airtable連携の解除エラー: {{message}}" }, "notifications": { "base_selected": "Airtableベースが正常に選択されました", "table_selected": "Airtableテーブルが正常に選択されました", "integration_removed": "Airtable連携が正常に解除されました" } } }, "robot_duplication": { "title": "ロボットを複製", "descriptions": { "purpose": "ロボットの複製は、同じ構造のページからデータを抽出する際に便利です。", "example": "例:{{url1}}用のロボットを作成した場合、ロボットを一から作り直すことなく、{{url2}}のような類似のページをスクレイピングするために複製できます。", "warning": "⚠️ 新しいページが元のページと同じ構造であることを確認してください。" }, "fields": { "target_url": "ロボットのターゲットURL" }, "buttons": { "duplicate": "ロボットを複製", "cancel": "キャンセル" }, "notifications": { "robot_not_found": "ロボットの詳細が見つかりません。もう一度お試しください。", "url_required": "ターゲットURLが必要です。", "duplicate_success": "ロボットが正常に複製されました。", "duplicate_error": "ターゲットURLの更新に失敗しました。もう一度お試しください。", "unknown_error": "ターゲットURLの更新中にエラーが発生しました。" } }, "robot_settings": { "title": "ロボット設定", "target_url": "ロボットのターゲットURL", "robot_id": "ロボットID", "robot_limit": "ロボットの制限", "created_by_user": "作成したユーザー", "created_at": "作成日時", "errors": { "robot_not_found": "ロボットの詳細が見つかりませんでした。もう一度試してください。" }, "buttons": { "close": "閉じる" } }, "robot_edit": { "title": "ロボットを編集", "change_name": "ロボット名の変更", "robot_limit": "ロボットの制限", "save": "変更を保存", "cancel": "キャンセル", "notifications": { "update_success": "ロボットが正常に更新されました。", "update_failed": "ロボットの更新に失敗しました。もう一度試してください。", "update_error": "ロボットの更新中にエラーが発生しました。" } }, "schedule_settings": { "title": "スケジュール設定", "run_every": "実行間隔", "start_from": "開始日", "on_day": "日付", "at_around": "時刻", "timezone": "タイムゾーン", "buttons": { "delete_schedule": "スケジュールを削除", "save_schedule": "スケジュールを保存", "cancel": "キャンセル" }, "labels": { "in_between": "間隔", "run_once_every": "実行間隔", "start_from_label": "開始日", "on_day_of_month": "月の日付", "on_day": { "st": "日", "nd": "日", "rd": "日", "th": "日" } } }, "main_page": { "notifications": { "interpretation_success": "ロボット{{name}}の解釈に成功しました", "interpretation_failed": "ロボット{{name}}の解釈に失敗しました", "run_started": "ロボット{{name}}を実行中", "run_start_failed": "ロボット{{name}}の実行に失敗しました", "schedule_success": "ロボット{{name}}のスケジュールが正常に設定されました", "schedule_failed": "ロボット{{name}}のスケジュール設定に失敗しました", "abort_success": "ロボット{{name}}の解釈を中止しました", "abort_failed": "ロボット{{name}}の解釈中止に失敗しました", "abort_initiated": "ロボット {{name}} の解釈を中止しています" }, "menu": { "recordings": "ロボット", "runs": "実行", "proxy": "プロキシ", "apikey": "APIキー" } }, "browser_window": { "attribute_modal": { "title": "属性を選択", "notifications": { "list_select_success": "リストが正常に選択されました。抽出するテキストデータを選択してください。", "pagination_select_success": "ページネーション要素が正常に選択されました。" } }, "attribute_options": { "anchor": { "text": "テキスト: {{text}}", "url": "URL: {{url}}" }, "image": { "alt_text": "代替テキスト: {{altText}}", "image_url": "画像URL: {{imageUrl}}" }, "default": { "text": "テキスト: {{text}}" } } }, "runs_table": { "run_type_chips": { "manual_run": "手動実行", "scheduled_run": "スケジュール実行", "api": "API", "unknown_run_type": "不明な実行タイプ" }, "run_status_chips": { "success": "成功", "running": "実行中", "scheduled": "スケジュール済み", "queued": "キューに入れました", "failed": "失敗", "aborted": "中止されました" }, "run_settings_modal": { "title": "実行設定", "labels": { "run_id": "実行ID", "run_by_user": "ユーザーによる実行", "run_by_schedule": "スケジュールによる実行", "run_by_api": "APIによる実行", "run_type": "実行タイプ" } } }, "run_content": { "tabs": { "output_data": "出力データ", "log": "ログ" }, "buttons": { "stop": "停止" }, "loading": "データを読み込み中...", "empty_output": "出力データがありません", "captured_data": { "title": "キャプチャしたデータ", "download_csv": "CSVをダウンロード", "view_full": "完全なデータを表示", "items": "アイテム", "schema_title": "キャプチャしたテキスト", "list_title": "キャプチャしたリスト" }, "captured_screenshot": { "title": "キャプチャしたスクリーンショット", "download": "ダウンロード", "render_failed": "スクリーンショットのレンダリングに失敗しました" } }, "navbar": { "project_name": "Maxun", "notifications": { "success": { "logout": "ログアウトに成功しました" }, "errors": { "logout": { "unauthorized": "この操作を実行する権限がありません", "server": "ログアウト中にサーバーエラーが発生しました", "network": "ログアウト中にネットワークエラーが発生しました", "unknown": "ログアウト中に予期せぬエラーが発生しました" } } }, "upgrade": { "button": "アップグレード", "modal": { "up_to_date": "最新版です!", "new_version_available": "新しいバージョンが利用可能です: {{version}}。バグ修正、機能強化のために最新版にアップグレードしてください。", "view_updates": "すべての更新を", "view_updates_link": "こちら", "tabs": { "manual_setup": "手動セットアップ", "docker_setup": "Docker Composeセットアップ" } } }, "menu_items": { "logout": "ログアウト", "discord": "Discord", "youtube": "YouTube", "twitter": "Twitter (X)", "language": "言語" }, "recording": { "discard": "破棄" } }, "language_menu": { "en": "英語", "es": "スペイン語", "ja": "日本語", "zh": "中国語", "de": "ドイツ語", "tr": "トルコ語" } } ================================================ FILE: public/locales/tr.json ================================================ { "login": { "title": "Tekrar Hoş Geldiniz!", "email": "İş E‑postası Girin", "password": "Şifre", "button": "Giriş Yap", "loading": "Yükleniyor", "register_prompt": "Hesabınız yok mu?", "register_link": "Kaydol", "welcome_notification": "Maxun’a Hoş Geldiniz!", "validation": { "required_fields": "E‑posta ve şifre zorunludur", "password_length": "Şifre en az 6 karakter olmalıdır" }, "error": { "user_not_found": "Kullanıcı mevcut değil", "invalid_credentials": "Geçersiz e‑posta veya şifre", "server_error": "Giriş başarısız. Lütfen daha sonra tekrar deneyin", "generic": "Bir hata oluştu. Lütfen tekrar deneyin" } }, "register": { "title": "Hesap Oluştur", "email": "İş E‑postası Girin", "password": "Şifre", "button": "Kaydol", "loading": "Yükleniyor", "register_prompt": "Zaten hesabınız var mı?", "login_link": "Giriş Yap", "welcome_notification": "Maxun’a Hoş Geldiniz!", "validation": { "email_required": "E‑posta zorunludur", "password_requirements": "Şifre en az 6 karakter olmalıdır" }, "error": { "user_exists": "Bu e‑posta ile kullanıcı zaten mevcut", "creation_failed": "Hesap oluşturulamadı", "server_error": "Sunucu hatası oluştu", "generic": "Kayıt başarısız. Lütfen tekrar deneyin" } }, "recordingtable": { "run": "Çalıştır", "name": "Ad", "schedule": "Zamanlama", "integrate": "Entegre Et", "settings": "Ayarlar", "options": "Seçenekler", "heading": "Robotlarım", "new": "Robot Oluştur", "search_criteria": "Arama kriterlerinizi değiştirmeyi deneyin", "placeholder": { "title": "Başlamaya Hazırsınız", "body": "Oluşturduğunuz robotlar burada görünecektir. Başlamak için \"Robot Oluştur\"a tıklayın!", "search": "Aramanızla eşleşen robot yok" }, "modal": { "title": "URL’yi Girin", "login_title": "Bu web sitesine giriş gerekiyor mu?", "label": "URL", "button": "Kaydı Başlat" }, "warning_modal": { "title": "Aktif Tarayıcı Tespit Edildi", "message": "Hâlihazırda çalışan bir tarayıcı kayıt oturumu var. İptal edip yeni bir kayıt başlatmak ister misiniz?", "discard_and_create": "İptal Et ve Yenisi Oluştur", "cancel": "İptal" }, "retrain": "Yeniden Eğit", "edit": "Düzenle", "delete": "Sil", "duplicate": "Çoğalt", "search": "Robot Ara...", "notifications": { "delete_warning": "Robota bağlı çalıştırmalar var. Robotu silmeden önce çalıştırmaları silin", "delete_success": "Robot başarıyla silindi", "auth_success": "Robot başarıyla kimlik doğrulandı", "browser_limit_warning": "Uzak tarayıcılar şu anda meşgul. Lütfen birkaç dakika bekleyip tekrar deneyin" } }, "mainmenu": { "recordings": "Robotlar", "runs": "Çalıştırmalar", "proxy": "Proxy", "apikey": "API Anahtarı", "feedback": "Maxun Cloud’a Katıl", "apidocs": "Web Siteyi API’ye Dönüştür" }, "runstable": { "runs": "Tüm Çalıştırmalar", "runStatus": "Durum", "runName": "Ad", "name": "Ad", "startedAt": "Başlama", "finishedAt": "Bitiş", "delete": "Sil", "settings": "Ayarlar", "search": "Çalıştırma Ara...", "sort_tooltip": "Sıralamak için tıkla", "placeholder": { "title": "Çalıştırma Bulunamadı", "body": "Tüm robot çalıştırmalarınız burada görünecektir. Bir robot aktif olduğunda, çalıştırmaları buraya kaydedilecektir.", "search": "Aramanızla eşleşen çalıştırma yok" }, "notifications": { "no_runs": "Çalıştırma bulunamadı. Lütfen tekrar deneyin.", "delete_success": "Çalıştırma başarıyla silindi" } }, "proxy": { "title": "Proxy Yapılandırması", "tab_standard": "Standart Proxy", "tab_rotation": "Otomatik Proxy Döndürme", "server_url": "Proxy Sunucu URL’si", "server_url_helper": "Tüm robotlar için kullanılacak proxy. HTTP ve SOCKS desteklenir. Örnek http://myproxy.com:3128 veya socks5://myproxy.com:3128. Kısa biçim myproxy.com:3128 HTTP proxy kabul edilir.", "requires_auth": "Kimlik Doğrulama Gerekli mi?", "username": "Kullanıcı Adı", "password": "Şifre", "add_proxy": "Proxy Ekle", "test_proxy": "Proxy’yi Test Et", "remove_proxy": "Proxy’yi Kaldır", "table": { "proxy_url": "Proxy URL", "requires_auth": "Kimlik Doğrulama Gerekli" }, "coming_soon": "Yakında — Açık Kaynak (Temel Döndürme) & Cloud (Gelişmiş Döndürme). Altyapı yönetmek istemiyorsanız, erken erişim için cloud bekleme listemize katılın.", "join_waitlist": "Maxun Cloud Bekleme Listesine Katıl", "alert": { "title": "Proxy’niz kullanıcı adı ve şifre gerektiriyorsa, bunları her zaman URL’den ayrı girin.", "right_way": "Doğru yol", "wrong_way": "Yanlış yol", "proxy_url": "Proxy URL:", "username": "Kullanıcı Adı:", "password": "Şifre:" }, "notifications": { "config_success": "Proxy yapılandırması başarıyla gönderildi", "config_error": "Proxy yapılandırması gönderilemedi. Tekrar deneyin.", "test_success": "Proxy yapılandırması çalışıyor", "test_error": "Proxy testi başarısız. Tekrar deneyin.", "fetch_success": "Proxy yapılandırması alındı", "remove_success": "Proxy yapılandırması kaldırıldı", "remove_error": "Proxy kaldırma başarısız. Tekrar deneyin." } }, "apikey": { "title": "API Anahtarını Yönet", "default_name": "Maxun API Anahtarı", "table": { "name": "API Anahtar Adı", "key": "API Anahtarı", "actions": "Eylemler" }, "actions": { "copy": "Kopyala", "show": "Göster", "hide": "Gizle", "delete": "Sil" }, "no_key_message": "Henüz bir API anahtarı oluşturmadınız.", "generate_button": "API Anahtarı Oluştur", "notifications": { "fetch_error": "API Anahtarı alınamadı - {{error}}", "generate_success": "API anahtarı oluşturuldu", "generate_error": "API anahtarı oluşturulamadı - {{error}}", "delete_success": "API anahtarı silindi", "delete_error": "API anahtarı silinemedi - {{error}}", "copy_success": "API anahtarı kopyalandı" } }, "action_description": { "text": { "title": "Metin Yakala", "description": "Çıkarmak istediğiniz metinlerin üzerine gelin ve tıklayarak seçin" }, "screenshot": { "title": "Ekran Görüntüsü Yakala", "description": "Sayfanın tamamının veya bir bölümünün ekran görüntüsünü alın" }, "list": { "title": "Liste Yakala", "description": "Çıkarmak istediğiniz listenin üzerine gelin. Seçtikten sonra, listenin içindeki tüm metinleri seçebilirsiniz." }, "default": { "title": "Hangi verileri çıkarmak istiyorsunuz?", "description": "Bir robot bir veya birden fazla işlem gerçekleştirebilir. Aşağıdaki seçeneklerden seçim yapın." }, "list_stages": { "initial": "Listeyi ve içindeki metinleri seçin", "pagination": "Robotun listenin geri kalanını nasıl yakalayacağını seçin", "limit": "Çıkarılacak öğe sayısını seçin", "complete": "Yakalama tamamlandı" }, "actions": { "text": "Metin Yakala", "list": "Liste Yakala", "screenshot": "Ekran Görüntüsü Yakala" } }, "right_panel": { "buttons": { "capture_list": "Liste Yakala", "capture_text": "Metin Yakala", "capture_screenshot": "Ekran Görüntüsü Yakala", "confirm": "Onayla", "discard": "İptal", "confirm_capture": "Yakalamayı Onayla", "confirm_pagination": "Onayla", "confirm_limit": "Onayla", "confirm_reset": "Onayla", "finish_capture": "Yakalamayı Bitir", "back": "Geri", "reset": "Kaydı Yeniden Başlat", "finish": "Bitir", "cancel": "İptal", "delete": "Sil" }, "screenshot": { "capture_fullpage": "Tam Sayfa Yakala", "capture_visible": "Görünen Kısmı Yakala", "display_fullpage": "Tam Sayfa Görüntü Al", "display_visible": "Görünen Kısmın Görüntüsünü Al" }, "pagination": { "title": "Sayfada sonraki liste öğesini nasıl bulalım?", "click_next": "Sonraki sayfaya gitmek için ‘sonraki’yi tıkla", "click_load_more": "Daha fazla yüklemek için ‘daha fazla yükle’yi tıkla", "scroll_down": "Daha fazla öğe için aşağı kaydır", "scroll_up": "Daha fazla öğe için yukarı kaydır", "none": "Yüklenecek başka öğe yok" }, "limit": { "title": "En fazla kaç satır çıkarmak istiyorsunuz?", "custom": "Özel", "enter_number": "Sayı gir" }, "fields": { "label": "Etiket", "data": "Veri", "field_label": "Alan Etiketi", "field_data": "Alan Verisi" }, "messages": { "list_selected": "Liste başarıyla seçildi", "list_empty": "Liste seçildi. Lütfen listenin içindeki alanları seçin." }, "errors": { "select_pagination": "Lütfen bir sayfalama tipi seçin.", "select_pagination_element": "Lütfen önce sayfalama öğesini seçin.", "select_limit": "Lütfen bir limit seçin veya özel limit girin.", "invalid_limit": "Geçerli bir limit girin.", "confirm_text_fields": "Lütfen tüm metin alanlarını onaylayın", "unable_create_settings": "Liste ayarları oluşturulamadı. Bir alan tanımladığınızdan emin olun.", "capture_text_discarded": "Metin Yakalama İptal Edildi", "capture_list_discarded": "Liste Yakalama İptal Edildi", "label_required": "Etiket boş olamaz", "no_text_captured": "Henüz metin yakalanmadı. Lütfen önce metin öğeleri seçin.", "duplicate_label": "Bu etiket zaten mevcut. Lütfen benzersiz bir etiket kullanın.", "capture_list_first": "Lütfen onaylamadan önce bir liste yakalayın ve alanlar seçin.", "confirm_all_list_fields": "Lütfen devam etmeden önce tüm liste alanlarını onaylayın." }, "tooltips": { "confirm_all_list_fields": "Lütfen bir sonraki adıma geçmeden önce tüm liste alanlarını onaylayın" } }, "save_recording": { "title": "Robotu Kaydet", "robot_name": "Robot Adı", "buttons": { "save": "Kaydet", "confirm": "Onayla" }, "notifications": { "save_success": "Robot kaydedildi", "retrain_success": "Robot yeniden eğitildi", "save_error": "Robot kaydedilirken hata" }, "errors": { "user_not_logged": "Kullanıcı girişi yok. Kaydedilemedi.", "exists_warning": "Bu isimde robot zaten var; üzerine yazmayı onaylayın.", "no_actions_performed": "Robot kaydedilemez. Lütfen kaydetmeden önce en az bir yakalama eylemi gerçekleştirin." }, "tooltips": { "saving": "Akış optimize ediliyor ve kaydediliyor" } }, "browser_recording": { "modal": { "confirm_discard": "Kaydı iptal etmek istediğinize emin misiniz?", "confirm_reset": "Kaydı yeniden başlatmak istediğinize emin misiniz?", "reset_warning": "Bu işlem, mevcut oturumdaki tüm yakalamaları temizler ve aynı site için kaydı yeniden başlatır." }, "notifications": { "terminated": "Kayıt sonlandırıldı", "environment_reset": "Tarayıcı ortamı sıfırlandı", "reset_successful": "Yakalamalar sıfırlandı ve başlangıç durumuna dönüldü" } }, "interpretation_log": { "titles": { "output_preview": "Çıktı Verisi Önizlemesi", "screenshot": "Ekran Görüntüsü" }, "messages": { "additional_rows": "Kaydı bitirdiğinizde ek satırlar çıkarılacak.", "successful_training": "Robotu başarıyla eğittiniz! Çıkaracağı verilerin önizlemesi için aşağıdaki butona tıklayın.", "no_selection": "Henüz seçim yapmadınız. Seçim yaptığınızda önizleme burada görünecek." }, "data_sections": { "binary_received": "---------- İkili çıktı verisi alındı ----------", "serializable_received": "---------- Serileştirilebilir çıktı verisi alındı ----------", "mimetype": "mimetype: ", "image_below": "Görüntü aşağıda:", "separator": "--------------------------------------------------" }, "notifications": { "reset_success": "Önizleme sıfırlandı" } }, "interpretation_buttons": { "buttons": { "preview": "Çıktı Önizle", "reset": "Sıfırla", "yes": "Evet", "no": "Hayır" }, "messages": { "extracting": "Veri çıkarılıyor... lütfen bekleyin", "restart_required": "Kaydı güncelledikten sonra yorumlamayı yeniden başlatın", "run_finished": "Çalıştırma tamamlandı", "run_failed": "Çalıştırma başlatılamadı" }, "modal": { "use_previous": "Bu işlem için önceki seçiminizi koşul olarak kullanmak ister misiniz?", "previous_action": "Önceki işleminiz:", "element_text": " metnine sahip öğe" }, "notifications": { "reset_success": "Önizleme başarıyla sıfırlandı" } }, "recording_page": { "loader": { "browser_startup": "Tarayıcı başlatılıyor... Lütfen bekleyin" } }, "integration_settings": { "title": "Entegrasyon Ayarları", "descriptions": { "authenticated_as": "Kimlik doğrulandı: {{email}}" }, "buttons": { "submit": "Gönder", "remove_integration": "Entegrasyonu Kaldır" }, "google": { "title": "Google Sheet ile Entegrasyon", "descriptions": { "sync_info": "Bu seçenek etkinse robot başarılı olduğunda veriler Google Sheet’e eklenir.", "authenticated_as": "Kimlik doğrulandı: {{email}}" }, "alerts": { "success": { "title": "Google Sheet entegrasyonu başarılı", "content": "Robot her başarılı çalıştırmada veriyi {{sheetName}} sayfanıza ekler. Kontrol etmek için", "here": "buraya", "note": "Not:", "sync_limitation": "Entegrasyon öncesi veriler senkronize edilmez." } }, "buttons": { "authenticate": "Google ile Giriş Yap", "fetch_sheets": "E‑Tabloları Getir", "remove_integration": "Entegrasyonu Kaldır", "submit": "Gönder" }, "fields": { "select_sheet": "Google Sheet Seç", "selected_sheet": "Seçilen Sheet: {{name}} (ID: {{id}})" }, "errors": { "auth_error": "Google kimlik doğrulama hatası", "fetch_error": "E‑tablo alma hatası: {{message}}", "update_error": "Sheet ID güncelleme hatası: {{message}}", "remove_error": "Google Sheets entegrasyon kaldırma hatası: {{message}}" }, "notifications": { "sheet_selected": "Google Sheet seçildi", "integration_removed": "Google Sheets entegrasyonu kaldırıldı" } }, "airtable": { "title": "Airtable ile Entegrasyon", "descriptions": { "sync_info": "Bu seçenek etkinse robot başarılı olduğunda veriler Airtable Base’e eklenir.", "authenticated_as": "Airtable kimlik doğrulandı. Base ve tablo seçebilirsiniz." }, "alerts": { "success": { "title": "Airtable entegrasyonu başarılı", "content": "Robot her başarılı çalıştırmada veriyi {{baseName}} > {{tableName}}’e ekler. Kontrol etmek için", "here": "buraya", "note": "Not:", "sync_limitation": "Yalnızca entegrasyon sonrası veriler senkronize edilir." } }, "buttons": { "authenticate": "Airtable’a Bağlan", "fetch_bases": "Base’leri Getir", "fetch_tables": "Tabloları Getir", "remove_integration": "Entegrasyonu Kaldır", "submit": "Base ve Tablo Seç" }, "fields": { "select_base": "Airtable Base Seç", "select_table": "Airtable Tablo Seç", "selected_base": "Seçilen Base: {{name}}", "selected_table": "Seçilen Tablo: {{name}}" }, "errors": { "auth_error": "Airtable kimlik doğrulama hatası", "fetch_error": "Base alma hatası: {{message}}", "fetch_tables_error": "Tablo alma hatası: {{message}}", "update_error": "Base güncelleme hatası: {{message}}", "remove_error": "Airtable entegrasyon kaldırma hatası: {{message}}" }, "notifications": { "base_selected": "Base seçildi", "table_selected": "Tablo seçildi", "integration_removed": "Airtable entegrasyonu kaldırıldı" } } }, "robot_duplication": { "title": "Robotu Çoğalt", "descriptions": { "purpose": "Aynı yapıya sahip sayfalarda veri toplamak için kullanılır.", "example": "Örnek: {{url1}} için robot oluşturduysanız, benzer {{url2}} sayfaları için çoğaltabilirsiniz.", "warning": "⚠️ Yeni sayfanın yapısının aynı olduğundan emin olun." }, "fields": { "target_url": "Robot Hedef URL" }, "buttons": { "duplicate": "Robotu Çoğalt", "cancel": "İptal" }, "notifications": { "robot_not_found": "Robot bulunamadı. Tekrar deneyin.", "url_required": "Hedef URL gerekli.", "duplicate_success": "Robot çoğaltıldı", "duplicate_error": "Hedef URL güncellenemedi. Tekrar deneyin.", "unknown_error": "Hedef URL güncellenirken hata oluştu" } }, "robot_settings": { "title": "Robot Ayarları", "target_url": "Robot Hedef URL", "robot_id": "Robot ID", "robot_limit": "Robot Limiti", "created_by_user": "Oluşturan", "created_at": "Oluşturulma", "errors": { "robot_not_found": "Robot bulunamadı. Tekrar deneyin." }, "buttons": { "close": "Kapat" } }, "robot_edit": { "title": "Robotu Düzenle", "change_name": "Robot Adı", "robot_limit": "Robot Limiti", "save": "Değişiklikleri Kaydet", "cancel": "İptal", "notifications": { "update_success": "Robot güncellendi", "update_failed": "Robot güncellenemedi. Tekrar deneyin.", "update_error": "Güncelleme sırasında hata" } }, "schedule_settings": { "title": "Zamanlama Ayarları", "run_every": "Çalıştırma aralığı", "start_from": "Başlangıç", "on_day": "Gününde", "at_around": "Saat civarı", "timezone": "Zaman Dilimi", "buttons": { "delete_schedule": "Zamanlamayı Sil", "save_schedule": "Zamanlamayı Kaydet", "cancel": "İptal" }, "labels": { "in_between": "Arasında", "run_once_every": "Her", "start_from_label": "Başlangıç", "on_day_of_month": "Ayın Günü", "on_day": { "st": ".", "nd": ".", "rd": ".", "th": "." } } }, "main_page": { "notifications": { "interpretation_success": "Robot {{name}} yorumlandı", "interpretation_failed": "Robot {{name}} yorumlanamadı", "run_started": "Robot çalıştırılıyor: {{name}}", "run_start_failed": "Robot çalıştırılamadı: {{name}}", "schedule_success": "Robot {{name}} zamanlandı", "schedule_failed": "Robot {{name}} zamanlanamadı", "abort_success": "Robot {{name}} yorumlaması iptal edildi", "abort_failed": "Robot {{name}} yorumlaması iptal edilemedi", "abort_initiated": "Robot {{name}} yorumu iptal ediliyor" }, "menu": { "recordings": "Robotlar", "runs": "Çalıştırmalar", "proxy": "Proxy", "apikey": "API Anahtarı" } }, "browser_window": { "attribute_modal": { "title": "Öznitelik Seç", "notifications": { "list_select_success": "Liste seçildi. Çıkarılacak verileri seçin.", "pagination_select_success": "Sayfalama öğesi seçildi" } }, "attribute_options": { "anchor": { "text": "Metin: {{text}}", "url": "URL: {{url}}" }, "image": { "alt_text": "Alternatif Metin: {{altText}}", "image_url": "Görsel URL: {{imageUrl}}" }, "default": { "text": "Metin: {{text}}" } } }, "runs_table": { "run_type_chips": { "manual_run": "Manuel", "scheduled_run": "Zamanlanmış", "api": "API", "unknown_run_type": "Bilinmeyen" }, "run_status_chips": { "success": "Başarılı", "running": "Çalışıyor", "scheduled": "Zamanlandı", "queued": "Kuyrukta", "failed": "Başarısız", "aborted": "İptal" }, "run_settings_modal": { "title": "Çalıştırma Ayarları", "labels": { "run_id": "Çalıştırma ID", "run_by_user": "Kullanıcı", "run_by_schedule": "Zamanlama ID", "run_by_api": "API", "run_type": "Tür" } } }, "run_content": { "tabs": { "output_data": "Çıktı Verisi", "log": "Kayıt" }, "buttons": { "stop": "Durdur" }, "loading": "Veriler yükleniyor...", "empty_output": "Çıktı verisi yok", "captured_data": { "title": "Yakalanan Veriler", "download_csv": "CSV İndir", "view_full": "Tam Veriyi Gör", "items": "öğe", "schema_title": "Yakalanan Metinler", "list_title": "Yakalanan Listeler" }, "captured_screenshot": { "title": "Yakalanan Görüntüler", "download": "İndir", "render_failed": "Görüntü render edilemedi" } }, "navbar": { "project_name": "Maxun", "notifications": { "success": { "logout": "Çıkış yapıldı" }, "errors": { "logout": { "unauthorized": "Bu işlemi yapmaya yetkiniz yok", "server": "Çıkış sırasında sunucu hatası", "network": "Çıkış sırasında ağ hatası", "unknown": "Bilinmeyen hata oluştu" } } }, "upgrade": { "button": "Yükselt", "modal": { "up_to_date": "🎉 Güncelsiniz!", "new_version_available": "Yeni sürüm mevcut: {{version}}. Güncelleyerek yeni özelliklere erişin!", "view_updates": "Tüm güncellemeleri görüntüle", "view_updates_link": "buradan", "tabs": { "manual_setup": "Manuel Kurulum", "docker_setup": "Docker Kurulumu" } } }, "menu_items": { "logout": "Çıkış Yap", "discord": "Discord", "youtube": "YouTube", "twitter": "Twitter (X)", "language": "Dil" }, "recording": { "discard": "İptal" } }, "language_menu": { "en": "İngilizce", "es": "İspanyolca", "ja": "Japonca", "zh": "Çince", "de": "Almanca", "tr": "Türkçe" } } ================================================ FILE: public/locales/zh.json ================================================ { "login": { "title": "欢迎回来!", "email": "输入工作电子邮箱", "password": "密码", "button": "登录", "loading": "加载中", "register_prompt": "还没有账号?", "register_link": "注册", "welcome_notification": "欢迎使用 Maxun!", "validation": { "required_fields": "邮箱和密码为必填项", "password_length": "密码必须至少6个字符" }, "error": { "user_not_found": "用户不存在", "invalid_credentials": "邮箱或密码无效", "server_error": "登录失败,请稍后重试", "generic": "发生错误,请重试" } }, "register": { "title": "注册账号", "email": "输入工作电子邮箱", "password": "密码", "button": "注册", "loading": "加载中", "register_prompt": "已有账号?", "login_link": "登录", "welcome_notification": "欢迎使用 Maxun!", "validation": { "email_required": "邮箱是必填项", "password_requirements": "密码必须至少6个字符" }, "error": { "user_exists": "该邮箱已被注册", "creation_failed": "无法创建账户", "server_error": "服务器错误", "generic": "注册失败,请重试" } }, "recordingtable": { "run": "运行", "name": "名称", "schedule": "计划", "integrate": "集成", "settings": "设置", "options": "选项", "heading": "我的机器人", "new": "创建机器人", "search_criteria": "请尝试调整您的搜索条件", "placeholder": { "title": "一切就绪,可以开始了", "body": "您创建的机器人将显示在这里。点击“创建机器人”即可开始!", "search": "没有与您搜索匹配的机器人" }, "modal": { "title": "输入URL", "login_title": "此网站需要登录吗?", "label": "URL", "button": "开始录制" }, "retrain": "重新训练", "edit": "编辑", "delete": "删除", "duplicate": "复制", "search": "搜索机器人...", "warning_modal": { "title": "检测到活跃浏览器", "message": "已经有一个浏览器录制会话正在运行。您想要放弃它并创建新的录制吗?", "discard_and_create": "放弃并创建新的", "cancel": "取消" }, "notifications": { "delete_warning": "该机器人有关联的运行记录。请先删除运行记录才能删除机器人", "delete_success": "机器人删除成功", "auth_success": "机器人认证成功", "browser_limit_warning": "远程浏览器当前繁忙。请稍等几分钟后重试" } }, "mainmenu": { "recordings": "机器人", "runs": "运行记录", "proxy": "代理", "apikey": "API密钥", "feedback": "加入 Maxun Cloud", "apidocs": "网站转API" }, "runstable": { "runs": "所有运行记录", "runStatus": "状态", "runName": "名称", "name": "名称", "startedAt": "开始时间", "finishedAt": "结束时间", "delete": "删除", "settings": "设置", "search": "搜索运行记录...", "sort_tooltip": "点击排序", "placeholder": { "title": "未找到运行记录", "body": "您所有的机器人运行记录都将显示在此处。一旦机器人被激活,其运行记录将在这里记下。", "search": "没有与您搜索匹配的运行记录" }, "notifications": { "no_runs": "未找到运行记录。请重试。", "delete_success": "运行记录删除成功" } }, "proxy": { "title": "代理设置", "tab_standard": "标准代理", "tab_rotation": "自动代理轮换", "server_url": "代理服务器URL", "server_url_helper": "用于所有机器人的代理。支持HTTP和SOCKS代理。示例 http://myproxy.com:3128 或 socks5://myproxy.com:3128。简短形式 myproxy.com:3128 被视为HTTP代理。", "requires_auth": "需要认证?", "username": "用户名", "password": "密码", "add_proxy": "添加代理", "test_proxy": "测试代理", "remove_proxy": "删除代理", "table": { "proxy_url": "代理URL", "requires_auth": "需要认证" }, "coming_soon": "即将推出 - 开源版(基础轮换)和云版(高级轮换)。如果您不想管理基础设施,请加入我们的云服务等候名单以获得早期访问权限。", "join_waitlist": "加入Maxun Cloud等候名单", "alert": { "title": "如果您的代理需要用户名和密码,请务必将它们与代理URL分开提供。", "right_way": "正确方式", "wrong_way": "错误方式", "proxy_url": "代理URL:", "username": "用户名:", "password": "密码:" }, "notifications": { "config_success": "代理配置提交成功", "config_error": "提交代理配置失败。请重试。", "test_success": "代理配置运行正常", "test_error": "测试代理配置失败。请重试。", "fetch_success": "成功获取代理配置", "remove_success": "成功删除代理配置", "remove_error": "删除代理配置失败。请重试。" } }, "apikey": { "title": "管理API密钥", "default_name": "Maxun API密钥", "table": { "name": "API密钥名称", "key": "API密钥", "actions": "操作" }, "actions": { "copy": "复制", "show": "显示", "hide": "隐藏", "delete": "删除" }, "no_key_message": "您还未生成API密钥。", "generate_button": "生成API密钥", "notifications": { "fetch_error": "获取API密钥失败 - {{error}}", "generate_success": "成功生成API密钥", "generate_error": "生成API密钥失败 - {{error}}", "delete_success": "成功删除API密钥", "delete_error": "删除API密钥失败 - {{error}}", "copy_success": "成功复制API密钥" } }, "action_description": { "text": { "title": "捕获文本", "description": "将鼠标悬停在要提取的文本上并点击选择" }, "screenshot": { "title": "捕获截图", "description": "捕获当前页面的部分或全部截图。" }, "list": { "title": "捕获列表", "description": "将鼠标悬停在要提取的列表上。选择后,您可以将鼠标悬停在所选列表中的所有文本上。点击选择它们。" }, "default": { "title": "您想提取什么数据?", "description": "机器人可以执行一个或多个动作。请从以下选项中选择。" }, "list_stages": { "initial": "选择要提取的列表及其中的文本", "pagination": "选择机器人如何捕获列表的其余部分", "limit": "选择要提取的项目数量", "complete": "捕获完成" }, "actions": { "text": "捕获文本", "list": "捕获列表", "screenshot": "捕获截图" } }, "right_panel": { "buttons": { "capture_list": "捕获列表", "capture_text": "捕获文本", "capture_screenshot": "捕获截图", "confirm": "确认", "discard": "放弃", "confirm_capture": "确认捕获", "confirm_pagination": "确认", "confirm_limit": "确认", "confirm_reset": "确认", "finish_capture": "完成捕获", "back": "返回", "reset": "重置", "finish": "完成", "cancel": "取消", "delete": "删除" }, "screenshot": { "capture_fullpage": "捕获整页", "capture_visible": "捕获可见部分", "display_fullpage": "获取整页截图", "display_visible": "获取可见部分截图" }, "pagination": { "title": "如何在页面上找到下一个列表项?", "click_next": "点击下一页导航到下一页", "click_load_more": "点击加载更多来加载更多项目", "scroll_down": "向下滚动加载更多项目", "scroll_up": "向上滚动加载更多项目", "none": "没有更多项目可加载" }, "limit": { "title": "您想要提取的最大行数是多少?", "custom": "自定义", "enter_number": "输入数字" }, "fields": { "label": "标签", "data": "数据", "field_label": "字段标签", "field_data": "字段数据" }, "messages": { "list_selected": "列表选择成功", "list_empty": "已选择列表。请选择列表内的字段。" }, "errors": { "select_pagination": "请选择分页类型。", "select_pagination_element": "请先选择分页元素。", "select_limit": "请选择限制或输入自定义限制。", "invalid_limit": "请输入有效的限制。", "confirm_text_fields": "请确认所有文本字段", "unable_create_settings": "无法创建列表设置。请确保您已为列表定义了字段。", "capture_text_discarded": "文本捕获已放弃", "capture_list_discarded": "列表捕获已放弃", "label_required": "标签不能为空", "duplicate_label": "此标签已存在。请使用唯一的标签。", "no_text_captured": "请在确认之前先高亮并选择文本元素。", "capture_list_first": "请先将鼠标悬停在列表上并选择其中的文本字段", "confirm_all_list_fields": "请在继续之前确认所有已捕获的列表字段" }, "tooltips": { "capture_list_first": "将鼠标悬停在列表上并选择其中的文本字段", "confirm_all_list_fields": "请确认所有已捕获的列表字段" } }, "save_recording": { "title": "保存机器人", "robot_name": "机器人名称", "buttons": { "save": "保存", "confirm": "确认" }, "notifications": { "save_success": "机器人保存成功", "retrain_success": "机器人重新训练成功", "save_error": "保存机器人时出错" }, "errors": { "user_not_logged": "用户未登录。无法保存录制。", "exists_warning": "已存在同名机器人,请确认是否覆盖机器人。", "no_actions_performed": "无法保存机器人。请在保存之前至少执行一次捕获操作。" }, "tooltips": { "saving": "正在优化并保存工作流程" } }, "browser_recording": { "modal": { "confirm_discard": "您确定要放弃此录制吗?", "confirm_reset": "您确定要重置吗?", "reset_warning": "这将清除当前会话中的所有先前捕获。录制会话将针对同一网站重新启动。" }, "notifications": { "terminated": "当前录制已终止", "environment_reset": "浏览器环境已重置", "reset_successful": "已成功重置所有捕获并返回初始状态" } }, "interpretation_log": { "titles": { "output_preview": "输出数据预览", "screenshot": "截图" }, "messages": { "additional_rows": "完成录制后将提取更多数据行。", "successful_training": "您已成功训练机器人执行操作!点击下方按钮预览机器人将提取的数据。", "no_selection": "看起来您还没有选择要提取的内容。选择后,机器人将在此处显示您的选择预览。" }, "data_sections": { "binary_received": "---------- 已接收二进制输出数据 ----------", "serializable_received": "---------- 已接收可序列化输出数据 ----------", "mimetype": "MIME类型:", "image_below": "图片显示如下:", "separator": "--------------------------------------------------" }, "notifications": { "reset_success": "输出预览已成功重置" } }, "interpretation_buttons": { "buttons": { "preview": "获取输出数据预览", "reset": "重置", "yes": "是", "no": "否" }, "messages": { "extracting": "正在提取数据...请等待", "restart_required": "更新录制后请重新启动解释", "run_finished": "运行完成", "run_failed": "运行启动失败" }, "modal": { "use_previous": "您要将之前的选择用作执行此操作的条件吗?", "previous_action": "您之前的操作是:", "element_text": "在文本元素上 " }, "notifications": { "reset_success": "输出预览已成功重置" } }, "recording_page": { "loader": { "browser_startup": "正在启动浏览器...抓紧" } }, "integration_settings": { "title": "集成设置", "descriptions": { "authenticated_as": "已认证为:{{email}}" }, "buttons": { "submit": "提交", "remove_integration": "移除集成" }, "google": { "title": "与Google表格集成", "descriptions": { "sync_info": "如果启用此选项,每次机器人成功运行任务时,其捕获的数据将附加到您的Google表格中。", "authenticated_as": "已认证为:{{email}}" }, "alerts": { "success": { "title": "Google表格集成成功。", "content": "每次此机器人创建成功运行时,其捕获的数据将附加到您的{{sheetName}} Google表格中。您可以检查数据更新", "here": "在这里", "note": "注意:", "sync_limitation": "在与Google表格集成之前提取的数据将不会在Google表格中同步。只有集成后提取的数据才会同步。" } }, "buttons": { "authenticate": "使用Google认证", "fetch_sheets": "获取Google电子表格", "remove_integration": "移除集成", "submit": "提交" }, "fields": { "select_sheet": "选择Google表格", "selected_sheet": "已选择表格:{{name}}(ID:{{id}})" }, "errors": { "auth_error": "使用Google认证时出错", "fetch_error": "获取电子表格文件时出错:{{message}}", "update_error": "更新Google表格ID时出错:{{message}}", "remove_error": "移除Google表格集成时出错:{{message}}" }, "notifications": { "sheet_selected": "Google表格选择成功", "integration_removed": "Google表格集成已成功移除" } }, "airtable": { "title": "与Airtable集成", "descriptions": { "sync_info": "如果启用此选项,每次机器人成功运行任务时,其捕获的数据将附加到您的Airtable中。", "authenticated_as": "已成功通过 Airtable 进行身份验证。您现在可以选择要集成的底座和桌子。" }, "alerts": { "success": { "title": "Airtable 基地成功集成", "content": "每次此机器人成功运行时,其捕获的数据都会附加到您的 {{baseName}} 基础和 {{tableName}} 表中。您可以检查更新情况", "here": "在这里", "note": "注意:", "sync_limitation": "只有集成后捕获的数据才会同步到Airtable。" } }, "buttons": { "authenticate": "连接Airtable", "fetch_bases": "获取Airtable基础", "fetch_tables": "获取Airtable表格", "remove_integration": "移除集成", "submit": "选择基础和表格" }, "fields": { "select_base": "选择Airtable基础", "select_table": "选择Airtable表格", "selected_base": "已选择基础:{{name}}", "selected_table": "已选择表格:{{name}}" }, "errors": { "auth_error": "使用Airtable认证时出错", "fetch_error": "获取Airtable基础时出错:{{message}}", "fetch_tables_error": "获取Airtable表格时出错:{{message}}", "update_error": "更新Airtable基础时出错:{{message}}", "remove_error": "移除Airtable集成时出错:{{message}}" }, "notifications": { "base_selected": "Airtable基础选择成功", "table_selected": "Airtable表格选择成功", "integration_removed": "Airtable集成已成功移除" } } }, "robot_duplication": { "title": "复制机器人", "descriptions": { "purpose": "机器人复制功能用于从具有相同结构的页面提取数据。", "example": "示例:如果您已经为{{url1}}创建了机器人,您可以复制它来抓取类似的页面(如{{url2}}),而无需从头开始训练机器人。", "warning": "⚠️ 确保新页面与原始页面具有相同的结构。" }, "fields": { "target_url": "机器人目标URL" }, "buttons": { "duplicate": "复制机器人", "cancel": "取消" }, "notifications": { "robot_not_found": "找不到机器人详细信息。请重试。", "url_required": "需要目标URL。", "duplicate_success": "机器人复制成功。", "duplicate_error": "更新目标URL失败。请重试。", "unknown_error": "更新目标URL时发生错误。" } }, "robot_settings": { "title": "机器人设置", "target_url": "机器人目标URL", "robot_id": "机器人ID", "robot_limit": "机器人限制", "created_by_user": "由用户创建", "created_at": "机器人创建时间", "errors": { "robot_not_found": "无法找到机器人详细信息。请重试。" }, "buttons": { "close": "关闭" } }, "robot_edit": { "title": "编辑机器人", "change_name": "更改机器人名称", "robot_limit": "机器人限制", "save": "保存更改", "cancel": "取消", "notifications": { "update_success": "机器人更新成功。", "update_failed": "无法更新机器人。请重试。", "update_error": "更新机器人时发生错误。" } }, "schedule_settings": { "title": "计划设置", "run_every": "每次运行", "start_from": "开始于", "on_day": "在日", "at_around": "大约在", "timezone": "时区", "buttons": { "delete_schedule": "删除计划", "save_schedule": "保存计划", "cancel": "取消" }, "labels": { "in_between": "之间", "run_once_every": "每次运行", "start_from_label": "开始于", "on_day_of_month": "月份日期", "on_day": { "st": "日", "nd": "日", "rd": "日", "th": "日" } } }, "main_page": { "notifications": { "interpretation_success": "机器人{{name}}解释成功", "interpretation_failed": "机器人{{name}}解释失败", "run_started": "正在运行机器人:{{name}}", "run_start_failed": "机器人运行失败:{{name}}", "schedule_success": "机器人{{name}}调度成功", "schedule_failed": "机器人{{name}}调度失败", "abort_success": "成功中止机器人{{name}}的解释", "abort_failed": "中止机器人{{name}}的解释失败", "abort_initiated": "正在中止机器人 {{name}} 的解释" }, "menu": { "recordings": "机器人", "runs": "运行", "proxy": "代理", "apikey": "API密钥" } }, "browser_window": { "attribute_modal": { "title": "选择属性", "notifications": { "list_select_success": "列表选择成功。选择要提取的文本数据。", "pagination_select_success": "分页元素选择成功。" } }, "attribute_options": { "anchor": { "text": "文本: {{text}}", "url": "URL: {{url}}" }, "image": { "alt_text": "替代文本: {{altText}}", "image_url": "图像URL: {{imageUrl}}" }, "default": { "text": "文本: {{text}}" } } }, "runs_table": { "run_type_chips": { "manual_run": "手动运行", "scheduled_run": "计划运行", "api": "API", "unknown_run_type": "未知运行类型" }, "run_status_chips": { "success": "成功", "running": "运行中", "scheduled": "已计划", "queued": "排队", "failed": "失败", "aborted": "已中止" }, "run_settings_modal": { "title": "运行设置", "labels": { "run_id": "运行ID", "run_by_user": "由用户运行", "run_by_schedule": "按计划ID运行", "run_by_api": "由API运行", "run_type": "运行类型" } } }, "run_content": { "tabs": { "output_data": "输出数据", "log": "日志" }, "buttons": { "stop": "停止" }, "loading": "加载数据中...", "empty_output": "没有可用的输出数据", "captured_data": { "title": "已捕获的数据", "download_csv": "下载CSV", "view_full": "查看完整数据", "items": "项目", "schema_title": "已捕获的文本", "list_title": "已捕获的列表" }, "captured_screenshot": { "title": "已捕获的截图", "download": "下载", "render_failed": "渲染截图失败" } }, "navbar": { "project_name": "Maxun", "notifications": { "success": { "logout": "退出登录成功" }, "errors": { "logout": { "unauthorized": "您没有执行此操作的权限", "server": "退出登录时发生服务器错误", "network": "退出登录时发生网络错误", "unknown": "退出登录时发生未知错误" } } }, "upgrade": { "button": "升级", "modal": { "up_to_date": "🎉 您已是最新版本!", "new_version_available": "新版本已可用:{{version}}。升级到最新版本以获取错误修复、增强和新功能!", "view_updates": "查看所有新更新", "view_updates_link": "此处", "tabs": { "manual_setup": "手动设置升级", "docker_setup": "Docker Compose设置升级" } } }, "menu_items": { "logout": "退出登录", "discord": "Discord", "youtube": "YouTube", "twitter": "Twitter (X)", "language": "语言" }, "recording": { "discard": "丢弃" } }, "language_menu": { "en": "英语", "es": "西班牙语", "ja": "日语", "zh": "中文", "de": "德语", "tr": "土耳其语" } } ================================================ FILE: server/.gitignore ================================================ # dependencies /node_modules # misc .DS_Store .env.local .env.development.local .env.test.local .env.production.local .env /.idea /server/logs /build /dist package-lock.json ================================================ FILE: server/config/config.json ================================================ { "development": { "username": "postgres", "password": "postgres", "database": "maxun", "host": "localhost", "port": 5432, "dialect": "postgres" }, "test": { "username": "postgres", "password": "postgres", "database": "maxun_test", "host": "localhost", "port": 5432, "dialect": "postgres" }, "production": { "username": "postgres", "password": "postgres", "database": "maxun_prod", "host": "localhost", "port": 5432, "dialect": "postgres" } } ================================================ FILE: server/docker-entrypoint.sh ================================================ #!/bin/bash set -e # Function to wait for PostgreSQL wait_for_postgres() { echo "Waiting for PostgreSQL at $DB_HOST:$DB_PORT..." max_retries=30 retries=0 while ! nc -z $DB_HOST $DB_PORT; do retries=$((retries+1)) if [ $retries -eq $max_retries ]; then echo "Error: PostgreSQL not available after $max_retries attempts. Continuing anyway..." break fi echo "PostgreSQL not available yet (attempt $retries/$max_retries), retrying..." sleep 2 done if [ $retries -lt $max_retries ]; then echo "PostgreSQL is ready!" fi } # Wait for PostgreSQL to be ready wait_for_postgres # Run the application with migrations before startup NODE_OPTIONS="--max-old-space-size=4096" node -e "require('./server/dist/server/src/db/migrate')().then(() => { console.log('Migration process completed.'); })" # Run the server normally exec "$@" ================================================ FILE: server/src/api/record.ts ================================================ import { Router, Request, Response } from 'express'; import { requireAPIKey } from "../middlewares/api"; import Robot from "../models/Robot"; import Run from "../models/Run"; import { getDecryptedProxyConfig } from "../routes/proxy"; import { v4 as uuid } from "uuid"; import { createRemoteBrowserForRun, destroyRemoteBrowser } from "../browser-management/controller"; import logger from "../logger"; import { browserPool, io as serverIo } from "../server"; import { io, Socket } from "socket.io-client"; import { BinaryOutputService } from "../storage/mino"; import { AuthenticatedRequest } from "../routes/record" import {capture} from "../utils/analytics"; import { Page } from "playwright-core"; import { WorkflowFile } from "maxun-core"; import { addGoogleSheetUpdateTask, processGoogleSheetUpdates } from "../workflow-management/integrations/gsheet"; import { addAirtableUpdateTask, processAirtableUpdates } from "../workflow-management/integrations/airtable"; import { sendWebhook } from "../routes/webhook"; import { convertPageToHTML, convertPageToMarkdown, convertPageToScreenshot } from '../markdownify/scrape'; const router = Router(); const formatRecording = (recordingData: any) => { const recordingMeta = recordingData.recording_meta; const workflow = recordingData.recording.workflow || []; const firstWorkflowStep = recordingMeta.url || workflow[workflow.length - 1]?.where?.url || ''; const inputParameters = [ { type: "string", name: "originUrl", label: "Origin URL", required: true, defaultValue: firstWorkflowStep, }, ]; return { id: recordingMeta.id, name: recordingMeta.name, createdAt: new Date(recordingMeta.createdAt).getTime(), inputParameters, }; }; /** * @swagger * /api/robots: * get: * summary: Get all robots * description: Retrieve a list of all robots. * security: * - api_key: [] * responses: * 200: * description: A list of robots. * content: * application/json: * schema: * type: object * properties: * statusCode: * type: integer * example: 200 * messageCode: * type: string * example: success * robots: * type: object * properties: * totalCount: * type: integer * example: 5 * items: * type: array * items: * type: object * properties: * id: * type: string * example: "12345" * name: * type: string * example: "Sample Robot" * 500: * description: Error retrieving robots. * content: * application/json: * schema: * type: object * properties: * statusCode: * type: integer * example: 500 * messageCode: * type: string * example: error * message: * type: string * example: "Failed to retrieve robots" */ router.get("/robots", requireAPIKey, async (req: Request, res: Response) => { try { const robots = await Robot.findAll({ raw: true }); const formattedRecordings = robots.map(formatRecording); const response = { statusCode: 200, messageCode: "success", robots: { totalCount: formattedRecordings.length, items: formattedRecordings, }, }; res.status(200).json(response); } catch (error) { console.error("Error fetching robots:", error); res.status(500).json({ statusCode: 500, messageCode: "error", message: "Failed to retrieve robots", }); } }); const formatRecordingById = (recordingData: any) => { const recordingMeta = recordingData.recording_meta; const workflow = recordingData.recording.workflow || []; const firstWorkflowStep = recordingMeta.url || workflow[workflow.length - 1]?.where?.url || ''; const inputParameters = [ { type: "string", name: "originUrl", label: "Origin URL", required: true, defaultValue: firstWorkflowStep, }, ]; return { id: recordingMeta.id, name: recordingMeta.name, createdAt: new Date(recordingMeta.createdAt).getTime(), inputParameters, }; }; /** * @swagger * /api/robots/{id}: * get: * summary: Get robot by ID * description: Retrieve a robot by its ID. * security: * - api_key: [] * parameters: * - in: path * name: id * schema: * type: string * required: true * description: The ID of the robot to retrieve. * responses: * 200: * description: Robot details. * content: * application/json: * schema: * type: object * properties: * statusCode: * type: integer * example: 200 * messageCode: * type: string * example: success * robot: * type: object * properties: * id: * type: string * example: "12345" * name: * type: string * example: "Sample Robot" * 404: * description: Robot not found. * content: * application/json: * schema: * type: object * properties: * statusCode: * type: integer * example: 404 * messageCode: * type: string * example: not_found * message: * type: string * example: "Recording with ID not found." */ router.get("/robots/:id", requireAPIKey, async (req: Request, res: Response) => { try { const robot = await Robot.findOne({ where: { 'recording_meta.id': req.params.id }, raw: true }); const formattedRecording = formatRecordingById(robot); const response = { statusCode: 200, messageCode: "success", robot: formattedRecording, }; res.status(200).json(response); } catch (error) { console.error("Error fetching robot:", error); res.status(404).json({ statusCode: 404, messageCode: "not_found", message: `Robot with ID "${req.params.id}" not found.`, }); } }); /** * @swagger * /api/robots/{id}/runs: * get: * summary: Get all runs for a robot * description: Retrieve all runs associated with a specific robot. * security: * - api_key: [] * parameters: * - in: path * name: id * schema: * type: string * required: true * description: The ID of the robot. * responses: * 200: * description: A list of runs for the robot. * content: * application/json: * schema: * type: object * properties: * statusCode: * type: integer * example: 200 * messageCode: * type: string * example: success * runs: * type: object * properties: * totalCount: * type: integer * example: 5 * items: * type: array * items: * type: object * properties: * runId: * type: string * example: "67890" * status: * type: string * example: "completed" * 500: * description: Error retrieving runs. * content: * application/json: * schema: * type: object * properties: * statusCode: * type: integer * example: 500 * messageCode: * type: string * example: error * message: * type: string * example: "Failed to retrieve runs" */ router.get("/robots/:id/runs",requireAPIKey, async (req: Request, res: Response) => { try { const runs = await Run.findAll({ where: { robotMetaId: req.params.id }, raw: true }); const formattedRuns = runs.map(formatRunResponse); const response = { statusCode: 200, messageCode: "success", runs: { totalCount: formattedRuns.length, items: formattedRuns, }, }; res.status(200).json(response); } catch (error) { console.error("Error fetching runs:", error); res.status(500).json({ statusCode: 500, messageCode: "error", message: "Failed to retrieve runs", }); } } ); function formatRunResponse(run: any) { const formattedRun = { id: run.id, status: run.status, name: run.name, robotId: run.robotMetaId, startedAt: run.startedAt, finishedAt: run.finishedAt, runId: run.runId, runByUserId: run.runByUserId, runByScheduleId: run.runByScheduleId, runByAPI: run.runByAPI, runBySDK: run.runBySDK, data: { textData: {}, listData: {}, crawlData: {}, searchData: {}, markdown: '', html: '' }, screenshots: [] as any[], }; const output = run.serializableOutput || {}; if (output.scrapeSchema && typeof output.scrapeSchema === 'object') { formattedRun.data.textData = output.scrapeSchema; } if (output.scrapeList && typeof output.scrapeList === 'object') { formattedRun.data.listData = output.scrapeList; } if (output.crawl && typeof output.crawl === 'object') { formattedRun.data.crawlData = output.crawl; } if (output.search && typeof output.search === 'object') { formattedRun.data.searchData = output.search; } if (output.markdown && Array.isArray(output.markdown)) { formattedRun.data.markdown = output.markdown[0]?.content || ''; } if (output.html && Array.isArray(output.html)) { formattedRun.data.html = output.html[0]?.content || ''; } if (run.binaryOutput) { Object.keys(run.binaryOutput).forEach(key => { if (run.binaryOutput[key]) { formattedRun.screenshots.push(run.binaryOutput[key]); } }); } return formattedRun; } /** * @swagger * /api/robots/{id}/runs/{runId}: * get: * summary: Get a specific run by ID for a robot * description: Retrieve details of a specific run by its ID. * security: * - api_key: [] * parameters: * - in: path * name: id * schema: * type: string * required: true * description: The ID of the robot. * - in: path * name: runId * schema: * type: string * required: true * description: The ID of the run. * responses: * 200: * description: Run details. * content: * application/json: * schema: * type: object * properties: * statusCode: * type: integer * example: 200 * messageCode: * type: string * example: success * run: * type: object * properties: * runId: * type: string * example: "67890" * status: * type: string * example: "completed" * 404: * description: Run not found. * content: * application/json: * schema: * type: object * properties: * statusCode: * type: integer * example: 404 * messageCode: * type: string * example: not_found * message: * type: string * example: "Run with id not found." */ router.get("/robots/:id/runs/:runId", requireAPIKey, async (req: Request, res: Response) => { try { const run = await Run.findOne({ where: { runId: req.params.runId, robotMetaId: req.params.id, }, raw: true }); const response = { statusCode: 200, messageCode: "success", run: formatRunResponse(run), }; res.status(200).json(response); } catch (error) { console.error("Error fetching run:", error); res.status(404).json({ statusCode: 404, messageCode: "not_found", message: `Run with id "${req.params.runId}" for robot with id "${req.params.id}" not found.`, }); } }); async function createWorkflowAndStoreMetadata(id: string, userId: string, isSDK: boolean) { try { const recording = await Robot.findOne({ where: { 'recording_meta.id': id }, raw: true }); if (!recording || !recording.recording_meta || !recording.recording_meta.id) { return { success: false, error: 'Recording not found' }; } const proxyConfig = await getDecryptedProxyConfig(userId); let proxyOptions: any = {}; if (proxyConfig.proxy_url) { proxyOptions = { server: proxyConfig.proxy_url, ...(proxyConfig.proxy_username && proxyConfig.proxy_password && { username: proxyConfig.proxy_username, password: proxyConfig.proxy_password, }), }; } const browserId = createRemoteBrowserForRun(userId); const runId = uuid(); const run = await Run.create({ status: 'running', name: recording.recording_meta.name, robotId: recording.id, robotMetaId: recording.recording_meta.id, startedAt: new Date().toLocaleString(), finishedAt: '', browserId, interpreterSettings: { maxConcurrency: 1, maxRepeats: 1, debug: true }, log: '', runId, runByUserId: userId, runByAPI: !isSDK, runBySDK: isSDK, serializableOutput: {}, binaryOutput: {}, retryCount: 0 }); const plainRun = run.toJSON(); try { const runStartedData = { runId: plainRun.runId, robotMetaId: plainRun.robotMetaId, robotName: plainRun.name, status: 'running', startedAt: plainRun.startedAt, runByUserId: plainRun.runByUserId, runByScheduleId: plainRun.runByScheduleId, runByAPI: plainRun.runByAPI || false, browserId: plainRun.browserId }; serverIo.of('/queued-run').to(`user-${userId}`).emit('run-started', runStartedData); logger.log('info', `API run started notification sent for run: ${plainRun.runId} to user-${userId}`); } catch (socketError: any) { logger.log('warn', `Failed to send run-started notification for API run ${plainRun.runId}: ${socketError.message}`); } return { browserId, runId: plainRun.runId, } } catch (e) { const { message } = e as Error; logger.log('info', `Error while scheduling a run with id: ${id}`); console.log(`Error scheduling run:`, message); return { success: false, error: message, }; } } function withTimeout(promise: Promise, timeoutMs: number, operation: string): Promise { return Promise.race([ promise, new Promise((_, reject) => setTimeout(() => reject(new Error(`${operation} timed out after ${timeoutMs}ms`)), timeoutMs) ) ]); } async function triggerIntegrationUpdates(runId: string, robotMetaId: string): Promise { try { addGoogleSheetUpdateTask(runId, { robotId: robotMetaId, runId: runId, status: 'pending', retries: 5, }); addAirtableUpdateTask(runId, { robotId: robotMetaId, runId: runId, status: 'pending', retries: 5, }); withTimeout(processAirtableUpdates(), 65000, 'Airtable update') .catch(err => logger.log('error', `Airtable update error: ${err.message}`)); withTimeout(processGoogleSheetUpdates(), 65000, 'Google Sheets update') .catch(err => logger.log('error', `Google Sheets update error: ${err.message}`)); } catch (err: any) { logger.log('error', `Failed to update integrations for run: ${runId}: ${err.message}`); } } async function readyForRunHandler(browserId: string, id: string, userId: string, socket: Socket, requestedFormats?: string[]){ try { const result = await executeRun(id, userId, requestedFormats); if (result && result.success) { logger.log('info', `Interpretation of ${id} succeeded`); resetRecordingState(browserId, id); return result.interpretationInfo; } else { logger.log('error', `Interpretation of ${id} failed`); await destroyRemoteBrowser(browserId, userId); resetRecordingState(browserId, id); return null; } } catch (error: any) { logger.error(`Error during readyForRunHandler: ${error.message}`); await destroyRemoteBrowser(browserId, userId); return null; } finally { cleanupSocketConnection(socket, browserId, id); } } function resetRecordingState(browserId: string, id: string) { browserId = ''; id = ''; } function AddGeneratedFlags(workflow: WorkflowFile) { const copy = JSON.parse(JSON.stringify(workflow)); for (let i = 0; i < workflow.workflow.length; i++) { copy.workflow[i].what.unshift({ action: 'flag', args: ['generated'], }); } return copy; }; async function executeRun(id: string, userId: string, requestedFormats?: string[]) { let browser: any = null; try { const run = await Run.findOne({ where: { runId: id } }); if (!run) { return { success: false, error: 'Run not found' }; } const plainRun = run.toJSON(); if (run.status === 'aborted' || run.status === 'aborting') { logger.log('info', `API Run ${id} has status ${run.status}, skipping execution`); return { success: true }; } if (run.status === 'queued') { logger.log('info', `API Run ${id} has status 'queued', skipping stale execution - will be handled by recovery`); return { success: true }; } const retryCount = plainRun.retryCount || 0; if (retryCount >= 3) { logger.log('warn', `API Run ${id} has exceeded max retries (${retryCount}/3), marking as failed`); await run.update({ status: 'failed', finishedAt: new Date().toLocaleString(), log: `Max retries exceeded (${retryCount}/3) - Run permanently failed` }); return { success: false, error: 'Max retries exceeded' }; } const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true }); if (!recording) { return { success: false, error: 'Recording not found' }; } browser = browserPool.getRemoteBrowser(plainRun.browserId); if (!browser) { throw new Error('Could not access browser'); } let currentPage = await browser.getCurrentPage(); if (!currentPage) { throw new Error('Could not create a new page'); } if (recording.recording_meta.type === 'scrape') { logger.log('info', `Executing scrape robot for API run ${id}`); let formats = recording.recording_meta.formats || ['markdown']; if (requestedFormats && Array.isArray(requestedFormats) && requestedFormats.length > 0) { formats = requestedFormats.filter((f): f is 'markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage' => ['markdown', 'html', 'screenshot-visible', 'screenshot-fullpage'].includes(f) ); } await run.update({ status: 'running', log: `Converting page to: ${formats.join(', ')}` }); try { const url = recording.recording_meta.url; if (!url) { throw new Error('No URL specified for markdown robot'); } let markdown = ''; let html = ''; const serializableOutput: any = {}; const binaryOutput: any = {}; const SCRAPE_TIMEOUT = 120000; if (formats.includes('markdown')) { try { const markdownPromise = convertPageToMarkdown(url, currentPage); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`Markdown conversion timed out after ${SCRAPE_TIMEOUT / 1000}s`)), SCRAPE_TIMEOUT); }); markdown = await Promise.race([markdownPromise, timeoutPromise]); if (markdown && markdown.trim().length > 0) { serializableOutput.markdown = [{ content: markdown }]; } } catch (error: any) { logger.log('warn', `Markdown conversion failed for API run ${plainRun.runId}: ${error.message}`); } } if (formats.includes('html')) { try { const htmlPromise = convertPageToHTML(url, currentPage); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`HTML conversion timed out after ${SCRAPE_TIMEOUT / 1000}s`)), SCRAPE_TIMEOUT); }); html = await Promise.race([htmlPromise, timeoutPromise]); if (html && html.trim().length > 0) { serializableOutput.html = [{ content: html }]; } } catch (error: any) { logger.log('warn', `HTML conversion failed for API run ${plainRun.runId}: ${error.message}`); } } if (formats.includes("screenshot-visible")) { try { const screenshotPromise = convertPageToScreenshot(url, currentPage, false); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`Screenshot conversion timed out after ${SCRAPE_TIMEOUT / 1000}s`)), SCRAPE_TIMEOUT); }); const screenshotBuffer = await Promise.race([screenshotPromise, timeoutPromise]); if (screenshotBuffer && screenshotBuffer.length > 0) { binaryOutput['screenshot-visible'] = { data: screenshotBuffer.toString('base64'), mimeType: 'image/png' }; } } catch (error: any) { logger.log('warn', `Screenshot-visible conversion failed for API run ${plainRun.runId}: ${error.message}`); } } if (formats.includes("screenshot-fullpage")) { try { const screenshotPromise = convertPageToScreenshot(url, currentPage, true); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`Screenshot conversion timed out after ${SCRAPE_TIMEOUT / 1000}s`)), SCRAPE_TIMEOUT); }); const screenshotBuffer = await Promise.race([screenshotPromise, timeoutPromise]); if (screenshotBuffer && screenshotBuffer.length > 0) { binaryOutput['screenshot-fullpage'] = { data: screenshotBuffer.toString('base64'), mimeType: 'image/png' }; } } catch (error: any) { logger.log('warn', `Screenshot-fullpage conversion failed for API run ${plainRun.runId}: ${error.message}`); } } await run.update({ status: 'success', finishedAt: new Date().toLocaleString(), log: `${formats.join(', ')} conversion completed successfully`, serializableOutput, binaryOutput, }); let uploadedBinaryOutput: Record = {}; if (Object.keys(binaryOutput).length > 0) { const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, binaryOutput); await run.update({ binaryOutput: uploadedBinaryOutput }); } logger.log('info', `Markdown robot execution completed for API run ${id}`); try { const completionData = { runId: plainRun.runId, robotMetaId: plainRun.robotMetaId, robotName: recording.recording_meta.name, status: 'success', finishedAt: new Date().toLocaleString() }; serverIo .of('/queued-run') .to(`user-${userId}`) .emit('run-completed', completionData); } catch (socketError: any) { logger.log( 'warn', `Failed to send run-completed notification for markdown robot run ${id}: ${socketError.message}` ); } const webhookPayload: any = { robot_id: plainRun.robotMetaId, run_id: plainRun.runId, robot_name: recording.recording_meta.name, status: 'success', started_at: plainRun.startedAt, finished_at: new Date().toLocaleString(), metadata: { browser_id: plainRun.browserId, user_id: userId, }, }; if (serializableOutput.markdown) webhookPayload.markdown = markdown; if (serializableOutput.html) webhookPayload.html = html; if (uploadedBinaryOutput['screenshot-visible']) webhookPayload.screenshot_visible = uploadedBinaryOutput['screenshot-visible']; if (uploadedBinaryOutput['screenshot-fullpage']) webhookPayload.screenshot_fullpage = uploadedBinaryOutput['screenshot-fullpage']; try { await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload); logger.log( 'info', `Webhooks sent successfully for markdown robot API run ${plainRun.runId}` ); } catch (webhookError: any) { logger.log( 'warn', `Failed to send webhooks for markdown robot run ${plainRun.runId}: ${webhookError.message}` ); } capture("maxun-oss-run-created", { runId: plainRun.runId, userId: userId, robotId: recording.recording_meta.id, robotType: "scrape", source: "api", status: "success", createdAt: new Date().toISOString(), formats }); await destroyRemoteBrowser(plainRun.browserId, userId); return { success: true, interpretationInfo: run.toJSON() }; } catch (error: any) { logger.log( 'error', `${formats.join(', ')} conversion failed for API run ${id}: ${error.message}` ); await run.update({ status: 'failed', finishedAt: new Date().toLocaleString(), log: `${formats.join(', ')} conversion failed: ${error.message}`, }); try { const failureData = { runId: plainRun.runId, robotMetaId: plainRun.robotMetaId, robotName: recording.recording_meta.name, status: 'failed', finishedAt: new Date().toLocaleString(), error: error.message }; serverIo .of('/queued-run') .to(`user-${userId}`) .emit('run-completed', failureData); } catch (socketError: any) { logger.log( 'warn', `Failed to send run-failed notification for markdown robot run ${id}: ${socketError.message}` ); } try { await sendWebhook(plainRun.robotMetaId, 'run_failed', { robot_id: plainRun.robotMetaId, run_id: plainRun.runId, robot_name: recording.recording_meta.name, status: 'failed', finished_at: new Date().toLocaleString(), error: { message: error.message, type: 'ConversionError' } }); } catch (webhookError: any) { logger.log('warn', `Failed to send webhook for failed API scrape run ${plainRun.runId}: ${webhookError.message}`); } capture("maxun-oss-run-created", { runId: plainRun.runId, userId: userId, robotId: recording.recording_meta.id, robotType: "scrape", source: "api", status: "failed", createdAt: new Date().toISOString(), formats }); await destroyRemoteBrowser(plainRun.browserId, userId); throw error; } } plainRun.status = 'running'; const workflow = AddGeneratedFlags(recording.recording); browser.interpreter.setRunId(plainRun.runId); const INTERPRETATION_TIMEOUT = 600000; const interpretationPromise = browser.interpreter.InterpretRecording( workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings ); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`Workflow interpretation timed out after ${INTERPRETATION_TIMEOUT/1000}s`)), INTERPRETATION_TIMEOUT); }); const interpretationInfo = await Promise.race([interpretationPromise, timeoutPromise]); const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput); if (browser && browser.interpreter) { await browser.interpreter.clearState(); } await destroyRemoteBrowser(plainRun.browserId, userId); const updatedRun = await run.update({ status: 'success', finishedAt: new Date().toLocaleString(), log: interpretationInfo.log.join('\n'), binaryOutput: uploadedBinaryOutput, }); try { const completionData = { runId: plainRun.runId, robotMetaId: plainRun.robotMetaId, robotName: recording.recording_meta.name, status: 'success', finishedAt: new Date().toLocaleString(), runByUserId: plainRun.runByUserId, runByScheduleId: plainRun.runByScheduleId, runByAPI: plainRun.runByAPI || false, browserId: plainRun.browserId }; serverIo.of('/queued-run').to(`user-${userId}`).emit('run-completed', completionData); logger.log('info', `API run completed notification sent for run: ${plainRun.runId} to user-${userId}`); } catch (socketError: any) { logger.log('warn', `Failed to send run-completed notification for API run ${plainRun.runId}: ${socketError.message}`); } let totalSchemaItemsExtracted = 0; let totalListItemsExtracted = 0; let extractedScreenshotsCount = 0; if (updatedRun) { if (updatedRun.dataValues.serializableOutput) { if (updatedRun.dataValues.serializableOutput.scrapeSchema) { Object.values(updatedRun.dataValues.serializableOutput.scrapeSchema).forEach((schemaResult: any) => { if (Array.isArray(schemaResult)) { totalSchemaItemsExtracted += schemaResult.length; } else if (schemaResult && typeof schemaResult === 'object') { totalSchemaItemsExtracted += 1; } }); } if (updatedRun.dataValues.serializableOutput.scrapeList) { Object.values(updatedRun.dataValues.serializableOutput.scrapeList).forEach((listResult: any) => { if (Array.isArray(listResult)) { totalListItemsExtracted += listResult.length; } }); } } if (updatedRun.dataValues.binaryOutput) { extractedScreenshotsCount = Object.keys(updatedRun.dataValues.binaryOutput).length; } } const totalRowsExtracted = totalSchemaItemsExtracted + totalListItemsExtracted; capture('maxun-oss-run-created',{ runId: id, userId: userId, robotId: recording.recording_meta.id, robotType: recording.recording_meta.type || 'extract', source: 'api', createdAt: new Date().toISOString(), status: 'success', totalSchemaItemsExtracted, totalListItemsExtracted, extractedScreenshotsCount, totalRowsExtracted } ) const parsedOutput = typeof updatedRun.dataValues.serializableOutput === "string" ? JSON.parse(updatedRun.dataValues.serializableOutput) : updatedRun.dataValues.serializableOutput || {}; const parsedList = typeof parsedOutput.scrapeList === "string" ? JSON.parse(parsedOutput.scrapeList) : parsedOutput.scrapeList || {}; const parsedSchema = typeof parsedOutput.scrapeSchema === "string" ? JSON.parse(parsedOutput.scrapeSchema) : parsedOutput.scrapeSchema || {}; const parsedCrawl = typeof parsedOutput.crawl === "string" ? JSON.parse(parsedOutput.crawl) : parsedOutput.crawl || {}; const parsedSearch = typeof parsedOutput.search === "string" ? JSON.parse(parsedOutput.search) : parsedOutput.search || {}; const webhookPayload = { robot_id: plainRun.robotMetaId, run_id: plainRun.runId, robot_name: recording.recording_meta.name, status: "success", started_at: plainRun.startedAt, finished_at: new Date().toLocaleString(), extracted_data: { captured_texts: parsedSchema || {}, captured_lists: parsedList || {}, crawl_data: parsedCrawl || {}, search_data: parsedSearch || {}, captured_texts_count: totalSchemaItemsExtracted, captured_lists_count: totalListItemsExtracted, screenshots_count: extractedScreenshotsCount }, metadata: { browser_id: plainRun.browserId, user_id: userId, }, }; try { await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload); logger.log('info', `Webhooks sent successfully for completed run ${plainRun.runId}`); } catch (webhookError: any) { logger.log('error', `Failed to send webhooks for run ${plainRun.runId}: ${webhookError.message}`); } await triggerIntegrationUpdates(plainRun.runId, plainRun.robotMetaId); return { success: true, interpretationInfo: updatedRun.toJSON() }; } catch (error: any) { logger.log('info', `Error while running a robot with id: ${id} - ${error.message}`); const run = await Run.findOne({ where: { runId: id } }); if (run) { if (browser) { try { if (browser.interpreter) { await browser.interpreter.clearState(); } await destroyRemoteBrowser(run.browserId, userId); } catch (cleanupError: any) { logger.error(`Failed to cleanup browser in error handler: ${cleanupError.message}`); } } await run.update({ status: 'failed', finishedAt: new Date().toLocaleString(), log: (run.log ? run.log + '\n' : '') + `Error: ${error.message}\n` + (error.stack ? error.stack : ''), }); try { const recording = await Robot.findOne({ where: { 'recording_meta.id': run.robotMetaId }, raw: true }); const failureData = { runId: run.runId, robotMetaId: run.robotMetaId, robotName: recording ? recording.recording_meta.name : 'Unknown Robot', status: 'failed', finishedAt: new Date().toLocaleString(), runByUserId: run.runByUserId, runByScheduleId: run.runByScheduleId, runByAPI: run.runByAPI || false, browserId: run.browserId }; serverIo.of('/queued-run').to(`user-${userId}`).emit('run-completed', failureData); logger.log('info', `API run permanently failed notification sent for run: ${run.runId} to user-${userId}`); } catch (socketError: any) { logger.log('warn', `Failed to send run-completed notification for permanently failed API run ${run.runId}: ${socketError.message}`); } const recording = await Robot.findOne({ where: { 'recording_meta.id': run.robotMetaId }, raw: true }); const failedWebhookPayload = { robot_id: run.robotMetaId, run_id: run.runId, robot_name: recording ? recording.recording_meta.name : 'Unknown Robot', status: 'failed', started_at: run.startedAt, finished_at: new Date().toLocaleString(), error: { message: error.message, stack: error.stack, type: error.name || 'ExecutionError' }, metadata: { browser_id: run.browserId, user_id: userId, } }; try { await sendWebhook(run.robotMetaId, 'run_failed', failedWebhookPayload); logger.log('info', `Failure webhooks sent successfully for run ${run.runId}`); } catch (webhookError: any) { logger.log('error', `Failed to send failure webhooks for run ${run.runId}: ${webhookError.message}`); } capture( 'maxun-oss-run-created', { runId: id, userId: userId, robotId: recording?.recording_meta?.id || run.robotMetaId, robotType: recording?.recording_meta?.type || 'extract', source: 'api', createdAt: new Date().toISOString(), status: 'failed', is_llm: (recording?.recording_meta as any)?.isLLM, } ); } return { success: false, error: error.message, }; } } export async function handleRunRecording(id: string, userId: string, isSDK: boolean = false, requestedFormats?: string[]) { let socket: Socket | null = null; try { const result = await createWorkflowAndStoreMetadata(id, userId, isSDK); const { browserId, runId: newRunId } = result; if (!browserId || !newRunId || !userId) { throw new Error('browserId or runId or userId is undefined'); } const CONNECTION_TIMEOUT = 30000; socket = io(`${process.env.BACKEND_URL ? process.env.BACKEND_URL : 'http://localhost:8080'}/${browserId}`, { transports: ['websocket'], rejectUnauthorized: false, timeout: CONNECTION_TIMEOUT, }); const readyHandler = () => readyForRunHandler(browserId, newRunId, userId, socket!, requestedFormats); socket.on('ready-for-run', readyHandler); socket.on('connect_error', (error: Error) => { logger.error(`Socket connection error for API run ${newRunId}: ${error.message}`); cleanupSocketConnection(socket!, browserId, newRunId); }); socket.on('error', (error: Error) => { logger.error(`Socket error for API run ${newRunId}: ${error.message}`); }); socket.on('disconnect', () => { cleanupSocketConnection(socket!, browserId, newRunId); }); logger.log('info', `Running Robot: ${id}`); return newRunId; } catch (error: any) { logger.error('Error running robot:', error); if (socket) { cleanupSocketConnection(socket, '', ''); } } } function cleanupSocketConnection(socket: Socket, browserId: string, id: string) { try { socket.removeAllListeners(); socket.disconnect(); if (browserId) { const namespace = serverIo.of(browserId); namespace.removeAllListeners(); namespace.disconnectSockets(true); const nsps = (serverIo as any)._nsps; if (nsps && nsps.has(`/${browserId}`)) { nsps.delete(`/${browserId}`); logger.log('debug', `Deleted namespace /${browserId} from io._nsps Map`); } } logger.log('info', `Cleaned up socket connection for browserId: ${browserId}, runId: ${id}`); } catch (error: any) { logger.error(`Error cleaning up socket connection: ${error.message}`); } } async function waitForRunCompletion(runId: string, interval: number = 2000) { const MAX_WAIT_TIME = 180 * 60 * 1000; const startTime = Date.now(); while (true) { if (Date.now() - startTime > MAX_WAIT_TIME) { throw new Error('Run completion timeout after 3 hours'); } const run = await Run.findOne({ where: { runId } }); if (!run) throw new Error('Run not found'); if (run.status === 'success') { return run.toJSON(); } else if (run.status === 'failed') { throw new Error('Run failed'); } else if (run.status === 'aborted' || run.status === 'aborting') { throw new Error('Run was aborted'); } await new Promise(resolve => setTimeout(resolve, interval)); } } /** * @swagger * /api/robots/{id}/runs: * post: * summary: Run a robot by ID * description: When you need to run a robot and get its captured data, you can use this endpoint to create a run for the robot. For now, you can poll the GET endpoint to retrieve a run's details as soon as it is finished. We are working on adding a webhook feature to notify you when a run is finished. * security: * - api_key: [] * parameters: * - in: path * name: id * schema: * type: string * required: true * description: The ID of the robot to run. * requestBody: * required: false * content: * application/json: * schema: * type: object * properties: * formats: * type: array * items: * type: string * enum: [markdown, html] * description: Optional override formats for this run. * example: * formats: ["html"] * responses: * 200: * description: Robot run started successfully. * content: * application/json: * schema: * type: object * properties: * statusCode: * type: integer * example: 200 * messageCode: * type: string * example: success * run: * type: object * properties: * runId: * type: string * example: "67890" * status: * type: string * example: "in_progress" * 401: * description: Unauthorized access. * content: * application/json: * schema: * type: object * properties: * ok: * type: boolean * example: false * error: * type: string * example: "Unauthorized" * 500: * description: Error running robot. * content: * application/json: * schema: * type: object * properties: * statusCode: * type: integer * example: 500 * messageCode: * type: string * example: error * message: * type: string * example: "Failed to run robot" */ router.post("/robots/:id/runs", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => { try { if (!req.user) { return res.status(401).json({ ok: false, error: 'Unauthorized' }); } const requestedFormats = req.body?.formats; const runId = await handleRunRecording(req.params.id, req.user.id, false, requestedFormats); if (!runId) { throw new Error('Run ID is undefined'); } const completedRun = await waitForRunCompletion(runId); const response = { statusCode: 200, messageCode: "success", run: formatRunResponse(completedRun), }; res.status(200).json(response); } catch (error) { console.error("Error running robot:", error); res.status(500).json({ statusCode: 500, messageCode: "error", message: "Failed to run robot", }); } }); /** * @swagger * /api/robots/{id}/duplicate: * post: * summary: Duplicate a robot with a new target URL * description: Duplicate an existing robot to run it on a different URL with the same structure. * security: * - api_key: [] * parameters: * - in: path * name: id * schema: * type: string * required: true * description: The ID of the robot to duplicate. * requestBody: * required: true * content: * application/json: * schema: * type: object * required: * - targetUrl * properties: * targetUrl: * type: string * example: "https://www.ycombinator.com/companies/airbnb" * description: The new URL to target in the duplicated robot. * responses: * 201: * description: Robot duplicated successfully. * 400: * description: Missing required field. * 404: * description: Robot not found. * 500: * description: Internal server error. */ router.post("/robots/:id/duplicate", requireAPIKey, async (req: Request, res: Response) => { try { const { id } = req.params; const { targetUrl } = req.body; if (!targetUrl) { return res.status(400).json({ statusCode: 400, messageCode: "bad_request", message: 'The "targetUrl" field is required.', }); } try { const parsed = new URL(targetUrl); if (!['http:', 'https:'].includes(parsed.protocol)) { return res.status(400).json({ statusCode: 400, messageCode: "bad_request", message: 'The "targetUrl" must use http or https protocol.', }); } } catch { return res.status(400).json({ statusCode: 400, messageCode: "bad_request", message: 'The "targetUrl" must be a valid URL.', }); } const originalRobot = await Robot.findOne({ where: { 'recording_meta.id': id }, }); if (!originalRobot) { return res.status(404).json({ statusCode: 404, messageCode: "not_found", message: `Robot with ID "${id}" not found.`, }); } const lastWord = targetUrl.split('/').filter(Boolean).pop() || 'Unnamed'; const steps: any[] = originalRobot.recording.workflow; const entryStep = steps.findLast((step: any) => step.where?.url === 'about:blank'); const originalEntryUrl: string | null = entryStep?.what?.find( (action: any) => action.action === 'goto' && action.args?.length )?.args?.[0] ?? null; let gotoUpdated = false; let whereUpdateStopped = false; const workflow = [...steps].reverse().map((step: any) => { let updatedWhere = step.where; if (originalEntryUrl && step.where?.url !== 'about:blank' && !whereUpdateStopped) { if (step.where?.url === originalEntryUrl) { updatedWhere = { ...step.where, url: targetUrl }; } else { whereUpdateStopped = true; } } const updatedWhat = step.what.map((action: any) => { if (!gotoUpdated && action.action === 'goto' && action.args?.[0] === originalEntryUrl) { gotoUpdated = true; return { ...action, args: [targetUrl, ...action.args.slice(1)] }; } return action; }); return { ...step, where: updatedWhere, what: updatedWhat }; }).reverse(); const currentTimestamp = new Date().toLocaleString(); const newRobot = await Robot.create({ id: uuid(), userId: originalRobot.userId, recording_meta: { ...originalRobot.recording_meta, id: uuid(), name: `${originalRobot.recording_meta.name} (${lastWord})`, url: targetUrl, createdAt: currentTimestamp, updatedAt: currentTimestamp, }, recording: { ...originalRobot.recording, workflow }, google_sheet_email: null, google_sheet_name: null, google_sheet_id: null, google_access_token: null, google_refresh_token: null, airtable_base_id: null, airtable_base_name: null, airtable_table_name: null, airtable_table_id: null, airtable_access_token: null, airtable_refresh_token: null, webhooks: null, schedule: null, }); logger.log('info', `Robot with ID ${id} duplicated successfully as ${newRobot.id}.`); return res.status(201).json({ statusCode: 201, messageCode: "success", robot: formatRecordingById(newRobot.toJSON()), }); } catch (error) { logger.log('error', `Error duplicating robot with ID ${req.params.id}: ${error instanceof Error ? error.message : 'Unknown error'}`); return res.status(500).json({ statusCode: 500, messageCode: "error", message: error instanceof Error ? error.message : 'An unknown error occurred.', }); } }); export default router; ================================================ FILE: server/src/api/sdk.ts ================================================ /** * SDK API Routes * Separate API endpoints specifically for Maxun SDKs * All routes require API key authentication */ import { Router, Request, Response } from 'express'; import { requireAPIKey } from "../middlewares/api"; import Robot from "../models/Robot"; import Run from "../models/Run"; import { v4 as uuid } from 'uuid'; import { WorkflowFile } from "maxun-core"; import logger from "../logger"; import { capture } from "../utils/analytics"; import { handleRunRecording } from "./record"; import { WorkflowEnricher } from "../sdk/workflowEnricher"; import { cancelScheduledWorkflow, scheduleWorkflow } from '../storage/schedule'; import { computeNextRun } from "../utils/schedule"; import moment from 'moment-timezone'; const router = Router(); interface AuthenticatedRequest extends Request { user?: any; } /** * Create a new robot programmatically * POST /api/sdk/robots */ router.post("/sdk/robots", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => { try { const user = req.user; const workflowFile: WorkflowFile = req.body; if (!workflowFile.meta || !workflowFile.workflow) { return res.status(400).json({ error: "Invalid workflow structure. Expected { meta, workflow }" }); } if (!workflowFile.meta.name) { return res.status(400).json({ error: "Robot name is required in meta.name" }); } const type = (workflowFile.meta as any).type || 'extract'; let enrichedWorkflow: any[] = []; let extractedUrl: string | undefined; if (type === 'scrape') { enrichedWorkflow = []; extractedUrl = (workflowFile.meta as any).url; if (!extractedUrl) { return res.status(400).json({ error: "URL is required for scrape robots" }); } } else { const enrichResult = await WorkflowEnricher.enrichWorkflow(workflowFile.workflow, user.id); if (!enrichResult.success) { logger.error("[SDK] Error in Selector Validation:\n" + JSON.stringify(enrichResult.errors, null, 2)) return res.status(400).json({ error: "Workflow validation failed", details: enrichResult.errors }); } enrichedWorkflow = enrichResult.workflow!; extractedUrl = enrichResult.url; } const robotId = uuid(); const metaId = uuid(); const robotMeta: any = { name: workflowFile.meta.name, id: metaId, createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(), pairs: enrichedWorkflow.length, params: [], type, url: extractedUrl, formats: (workflowFile.meta as any).formats || [], isLLM: (workflowFile.meta as any).isLLM, }; const robot = await Robot.create({ id: robotId, userId: user.id, recording_meta: robotMeta, recording: { workflow: enrichedWorkflow } }); const eventName = robotMeta.isLLM ? "maxun-oss-llm-robot-created" : "maxun-oss-robot-created"; const telemetryData: any = { robot_meta: robot.recording_meta, recording: robot.recording, }; if (robotMeta.isLLM && (workflowFile.meta as any).prompt) { telemetryData.prompt = (workflowFile.meta as any).prompt; } capture(eventName, telemetryData); return res.status(201).json({ data: robot, message: "Robot created successfully" }); } catch (error: any) { logger.error("[SDK] Error creating robot:", error); return res.status(500).json({ error: "Failed to create robot", message: error.message }); } }); /** * List all robots for the authenticated user * GET /api/sdk/robots */ router.get("/sdk/robots", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => { try { const robots = await Robot.findAll(); return res.status(200).json({ data: robots }); } catch (error: any) { logger.error("[SDK] Error listing robots:", error); return res.status(500).json({ error: "Failed to list robots", message: error.message }); } }); /** * Get a specific robot by ID * GET /api/sdk/robots/:id */ router.get("/sdk/robots/:id", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => { try { const robotId = req.params.id; const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) { return res.status(404).json({ error: "Robot not found" }); } return res.status(200).json({ data: robot }); } catch (error: any) { logger.error("[SDK] Error getting robot:", error); return res.status(500).json({ error: "Failed to get robot", message: error.message }); } }); /** * Update a robot * PUT /api/sdk/robots/:id */ router.put("/sdk/robots/:id", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => { try { const robotId = req.params.id; const updates = req.body; const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) { return res.status(404).json({ error: "Robot not found" }); } const updateData: any = {}; if (updates.workflow) { updateData.recording = { workflow: updates.workflow }; } if (updates.meta) { updateData.recording_meta = { ...robot.recording_meta, ...updates.meta, updatedAt: new Date().toISOString() }; } if (updates.google_sheet_email !== undefined) { updateData.google_sheet_email = updates.google_sheet_email; } if (updates.google_sheet_name !== undefined) { updateData.google_sheet_name = updates.google_sheet_name; } if (updates.airtable_base_id !== undefined) { updateData.airtable_base_id = updates.airtable_base_id; } if (updates.airtable_table_name !== undefined) { updateData.airtable_table_name = updates.airtable_table_name; } if (updates.schedule !== undefined) { if (updates.schedule === null) { try { await cancelScheduledWorkflow(robotId); } catch (cancelError) { logger.warn(`[SDK] Failed to cancel existing schedule for robot ${robotId}: ${cancelError}`); } updateData.schedule = null; } else { const { runEvery, runEveryUnit, timezone, startFrom = 'SUNDAY', dayOfMonth = 1, atTimeStart = '00:00', atTimeEnd = '23:59' } = updates.schedule; if (!runEvery || !runEveryUnit || !timezone) { return res.status(400).json({ error: "Missing required schedule parameters: runEvery, runEveryUnit, timezone" }); } if (!moment.tz.zone(timezone)) { return res.status(400).json({ error: "Invalid timezone" }); } const [startHours, startMinutes] = atTimeStart.split(':').map(Number); const [endHours, endMinutes] = atTimeEnd.split(':').map(Number); if (isNaN(startHours) || isNaN(startMinutes) || isNaN(endHours) || isNaN(endMinutes) || startHours < 0 || startHours > 23 || startMinutes < 0 || startMinutes > 59 || endHours < 0 || endHours > 23 || endMinutes < 0 || endMinutes > 59) { return res.status(400).json({ error: 'Invalid time format. Expected HH:MM (e.g., 09:30)' }); } const days = ['SUNDAY', 'MONDAY', 'TUESDAY', 'WEDNESDAY', 'THURSDAY', 'FRIDAY', 'SATURDAY']; if (!days.includes(startFrom)) { return res.status(400).json({ error: 'Invalid startFrom day. Must be one of: SUNDAY, MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY' }); } let cronExpression; const dayIndex = days.indexOf(startFrom); switch (runEveryUnit) { case 'MINUTES': cronExpression = `*/${runEvery} * * * *`; break; case 'HOURS': cronExpression = `${startMinutes} */${runEvery} * * *`; break; case 'DAYS': cronExpression = `${startMinutes} ${startHours} */${runEvery} * *`; break; case 'WEEKS': cronExpression = `${startMinutes} ${startHours} * * ${dayIndex}`; break; case 'MONTHS': cronExpression = `${startMinutes} ${startHours} ${dayOfMonth} */${runEvery} *`; if (startFrom !== 'SUNDAY') { cronExpression += ` ${dayIndex}`; } break; default: return res.status(400).json({ error: "Invalid runEveryUnit. Must be one of: MINUTES, HOURS, DAYS, WEEKS, MONTHS" }); } try { await cancelScheduledWorkflow(robotId); } catch (cancelError) { logger.warn(`[SDK] Failed to cancel existing schedule for robot ${robotId}: ${cancelError}`); } try { await scheduleWorkflow(robotId, req.user.id, cronExpression, timezone); } catch (scheduleError: any) { logger.error(`[SDK] Failed to schedule workflow for robot ${robotId}: ${scheduleError.message}`); return res.status(500).json({ error: "Failed to schedule workflow", message: scheduleError.message }); } const nextRunAt = computeNextRun(cronExpression, timezone); updateData.schedule = { runEvery, runEveryUnit, timezone, startFrom, dayOfMonth, atTimeStart, atTimeEnd, cronExpression, lastRunAt: undefined, nextRunAt: nextRunAt || undefined, }; logger.info(`[SDK] Scheduled robot ${robotId} with cron: ${cronExpression} in timezone: ${timezone}`); } } if (updates.webhooks !== undefined) { updateData.webhooks = updates.webhooks; } if (updates.proxy_url !== undefined) { updateData.proxy_url = updates.proxy_url; } if (updates.proxy_username !== undefined) { updateData.proxy_username = updates.proxy_username; } if (updates.proxy_password !== undefined) { updateData.proxy_password = updates.proxy_password; } await robot.update(updateData); logger.info(`[SDK] Robot updated: ${robotId}`); return res.status(200).json({ data: robot, message: "Robot updated successfully" }); } catch (error: any) { logger.error("[SDK] Error updating robot:", error); return res.status(500).json({ error: "Failed to update robot", message: error.message }); } }); /** * Delete a robot * DELETE /api/sdk/robots/:id */ router.delete("/sdk/robots/:id", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => { try { const robotId = req.params.id; const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) { return res.status(404).json({ error: "Robot not found" }); } await Run.destroy({ where: { robotMetaId: robot.recording_meta.id } }); await robot.destroy(); logger.info(`[SDK] Robot deleted: ${robotId}`); const deleteEventName = robot.recording_meta.isLLM ? "maxun-oss-llm-robot-deleted" : "maxun-oss-robot-deleted"; capture(deleteEventName, { robotId: robotId, user_id: req.user?.id, deleted_at: new Date().toISOString(), } ) return res.status(200).json({ message: "Robot deleted successfully" }); } catch (error: any) { logger.error("[SDK] Error deleting robot:", error); return res.status(500).json({ error: "Failed to delete robot", message: error.message }); } }); /** * Execute a robot * POST /api/sdk/robots/:id/execute */ router.post("/sdk/robots/:id/execute", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => { try { const user = req.user; const robotId = req.params.id; logger.info(`[SDK] Starting execution for robot ${robotId}`); const runId = await handleRunRecording(robotId, user.id.toString(), true); if (!runId) { throw new Error('Failed to start robot execution'); } const run = await waitForRunCompletion(runId, user.id.toString()); let listData: any[] = []; if (run.serializableOutput?.scrapeList) { const scrapeList: any = run.serializableOutput.scrapeList; if (scrapeList.scrapeList && Array.isArray(scrapeList.scrapeList)) { listData = scrapeList.scrapeList; } else if (Array.isArray(scrapeList)) { listData = scrapeList; } else if (typeof scrapeList === 'object') { const listValues = Object.values(scrapeList); if (listValues.length > 0 && Array.isArray(listValues[0])) { listData = listValues[0] as any[]; } } } let crawlData: any[] = []; if (run.serializableOutput?.crawl) { const crawl: any = run.serializableOutput.crawl; if (Array.isArray(crawl)) { crawlData = crawl; } else if (typeof crawl === 'object') { const crawlValues = Object.values(crawl); if (crawlValues.length > 0 && Array.isArray(crawlValues[0])) { crawlData = crawlValues[0] as any[]; } } } let searchData: any = {}; if (run.serializableOutput?.search) { searchData = run.serializableOutput.search; } return res.status(200).json({ data: { runId: run.runId, status: run.status, data: { textData: run.serializableOutput?.scrapeSchema || {}, listData: listData, crawlData: crawlData, searchData: searchData }, screenshots: Object.values(run.binaryOutput || {}) } }); } catch (error: any) { logger.error("[SDK] Error executing robot:", error); return res.status(500).json({ error: "Failed to execute robot", message: error.message }); } }); /** * Wait for run completion */ async function waitForRunCompletion(runId: string, interval: number = 2000) { const MAX_WAIT_TIME = 180 * 60 * 1000; const startTime = Date.now(); while (true) { if (Date.now() - startTime > MAX_WAIT_TIME) { throw new Error('Run completion timeout after 3 hours'); } const run = await Run.findOne({ where: { runId } }); if (!run) throw new Error('Run not found'); if (run.status === 'success') { return run.toJSON(); } else if (run.status === 'failed') { throw new Error('Run failed'); } else if (run.status === 'aborted') { throw new Error('Run was aborted'); } await new Promise(resolve => setTimeout(resolve, interval)); } } /** * Get all runs for a robot * GET /api/sdk/robots/:id/runs */ router.get("/sdk/robots/:id/runs", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => { try { const robotId = req.params.id; const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) { return res.status(404).json({ error: "Robot not found" }); } const runs = await Run.findAll({ where: { robotMetaId: robot.recording_meta.id }, order: [['startedAt', 'DESC']] }); return res.status(200).json({ data: runs }); } catch (error: any) { logger.error("[SDK] Error getting runs:", error); return res.status(500).json({ error: "Failed to get runs", message: error.message }); } }); /** * Get a specific run * GET /api/sdk/robots/:id/runs/:runId */ router.get("/sdk/robots/:id/runs/:runId", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => { try { const robotId = req.params.id; const runId = req.params.runId; const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) { return res.status(404).json({ error: "Robot not found" }); } const run = await Run.findOne({ where: { runId: runId, robotMetaId: robot.recording_meta.id } }); if (!run) { return res.status(404).json({ error: "Run not found" }); } return res.status(200).json({ data: run }); } catch (error: any) { logger.error("[SDK] Error getting run:", error); return res.status(500).json({ error: "Failed to get run", message: error.message }); } }); /** * Abort a running execution * POST /api/sdk/robots/:id/runs/:runId/abort */ router.post("/sdk/robots/:id/runs/:runId/abort", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => { try { const robotId = req.params.id; const runId = req.params.runId; const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) { return res.status(404).json({ error: "Robot not found" }); } const run = await Run.findOne({ where: { runId: runId, robotMetaId: robot.recording_meta.id } }); if (!run) { return res.status(404).json({ error: "Run not found" }); } if (run.status !== 'running' && run.status !== 'queued') { return res.status(400).json({ error: "Run is not in a state that can be aborted", currentStatus: run.status }); } await run.update({ status: 'aborted' }); logger.info(`[SDK] Run ${runId} marked for abortion`); return res.status(200).json({ message: "Run abortion initiated", data: run }); } catch (error: any) { logger.error("[SDK] Error aborting run:", error); return res.status(500).json({ error: "Failed to abort run", message: error.message }); } }); /** * Create a crawl robot programmatically * POST /api/sdk/crawl */ router.post("/sdk/crawl", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => { try { const user = req.user; const { url, name, crawlConfig } = req.body; if (!url || !crawlConfig) { return res.status(400).json({ error: "URL and crawl configuration are required" }); } try { new URL(url); } catch (err) { return res.status(400).json({ error: "Invalid URL format" }); } if (typeof crawlConfig !== 'object') { return res.status(400).json({ error: "crawlConfig must be an object" }); } const robotName = name || `Crawl Robot - ${new URL(url).hostname}`; const robotId = uuid(); const metaId = uuid(); const robot = await Robot.create({ id: robotId, userId: user.id, recording_meta: { name: robotName, id: metaId, createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(), pairs: 1, params: [], type: 'crawl', url: url, }, recording: { workflow: [ { where: { url }, what: [ { action: 'flag', args: ['generated'] }, { action: 'crawl', args: [crawlConfig], name: 'Crawl' } ] }, { where: { url: 'about:blank' }, what: [ { action: 'goto', args: [url] }, { action: 'waitForLoadState', args: ['networkidle'] } ] } ] } }); logger.info(`[SDK] Crawl robot created: ${metaId} (db: ${robotId}) by user ${user.id}`); capture("maxun-oss-robot-created", { userId: user.id.toString(), robotId: metaId, robotName: robotName, url: url, robotType: 'crawl', crawlConfig: crawlConfig, source: 'sdk', robot_meta: robot.recording_meta, recording: robot.recording, }); return res.status(201).json({ data: robot, message: "Crawl robot created successfully" }); } catch (error: any) { logger.error("[SDK] Error creating crawl robot:", error); return res.status(500).json({ error: "Failed to create crawl robot", message: error.message }); } }); /** * Create a search robot programmatically * POST /api/sdk/search */ router.post("/sdk/search", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => { try { const user = req.user; const { name, searchConfig } = req.body; if (!searchConfig) { return res.status(400).json({ error: "Search configuration is required" }); } if (!searchConfig.query) { return res.status(400).json({ error: "searchConfig must include a query" }); } if (typeof searchConfig !== 'object') { return res.status(400).json({ error: "searchConfig must be an object" }); } if (searchConfig.mode && !['discover', 'scrape'].includes(searchConfig.mode)) { return res.status(400).json({ error: "searchConfig.mode must be either 'discover' or 'scrape'" }); } searchConfig.provider = 'duckduckgo'; const robotName = name || `Search Robot - ${searchConfig.query}`; const robotId = uuid(); const metaId = uuid(); const robot = await Robot.create({ id: robotId, userId: user.id, recording_meta: { name: robotName, id: metaId, createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(), pairs: 1, params: [], type: 'search', }, recording: { workflow: [ { where: { url: 'about:blank' }, what: [ { action: 'search', args: [searchConfig], name: 'Search' } ] } ] } }); logger.info(`[SDK] Search robot created: ${metaId} (db: ${robotId}) by user ${user.id}`); capture("maxun-oss-robot-created", { userId: user.id.toString(), robotId: metaId, robotName: robotName, robotType: 'search', searchQuery: searchConfig.query, searchProvider: searchConfig.provider || 'duckduckgo', searchLimit: searchConfig.limit || 10, source: 'sdk', robot_meta: robot.recording_meta, recording: robot.recording, }); return res.status(201).json({ data: robot, message: "Search robot created successfully" }); } catch (error: any) { logger.error("[SDK] Error creating search robot:", error); return res.status(500).json({ error: "Failed to create search robot", message: error.message }); } }); /** * LLM-based extraction - generate workflow from natural language prompt * POST /api/sdk/extract/llm * URL is optional - if not provided, the system will search for the target website based on the prompt */ router.post("/sdk/extract/llm", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => { try { const user = req.user const { url, prompt, llmProvider, llmModel, llmApiKey, llmBaseUrl, robotName } = req.body; if (!prompt) { return res.status(400).json({ error: "Prompt is required" }); } if (url) { try { new URL(url); } catch (err) { return res.status(400).json({ error: "Invalid URL format" }); } } const llmConfig = { provider: llmProvider, model: llmModel, apiKey: llmApiKey, baseUrl: llmBaseUrl }; let workflowResult: any; let finalUrl: string; if (url) { workflowResult = await WorkflowEnricher.generateWorkflowFromPrompt(url, prompt, user.id, llmConfig); finalUrl = workflowResult.url || url; } else { workflowResult = await WorkflowEnricher.generateWorkflowFromPromptWithSearch(prompt, user.id, llmConfig); finalUrl = workflowResult.url || ''; } if (!workflowResult.success || !workflowResult.workflow) { return res.status(400).json({ error: "Failed to generate workflow from prompt", details: workflowResult.errors }); } const robotId = uuid(); const metaId = uuid(); const robotMeta: any = { name: robotName || `LLM Extract: ${prompt.substring(0, 50)}`, id: metaId, createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(), pairs: workflowResult.workflow.length, params: [], type: 'extract', url: finalUrl, isLLM: true }; const robot = await Robot.create({ id: robotId, userId: user.id, recording_meta: robotMeta, recording: { workflow: workflowResult.workflow }, }); logger.info(`[SDK] Persistent robot created: ${metaId} for LLM extraction`); capture("maxun-oss-llm-robot-created", { robot_meta: robot.recording_meta, recording: robot.recording, prompt: prompt }); return res.status(200).json({ success: true, data: { robotId: metaId, name: robotMeta.name, description: prompt, url: finalUrl, workflow: workflowResult.workflow } }); } catch (error: any) { logger.error("[SDK] Error in LLM extraction:", error); return res.status(500).json({ error: "Failed to perform LLM extraction", message: error.message }); } }); export default router; ================================================ FILE: server/src/browser-management/browserConnection.ts ================================================ import { chromium } from 'playwright-core'; import type { Browser } from 'playwright-core'; import logger from '../logger'; /** * Configuration for connection retry logic */ const CONNECTION_CONFIG = { maxRetries: 3, retryDelay: 2000, connectionTimeout: 30000, }; /** * Get the WebSocket endpoint from the browser service health check * @returns Promise - The WebSocket endpoint URL with browser ID */ async function getBrowserServiceEndpoint(): Promise { const healthPort = process.env.BROWSER_HEALTH_PORT || '3002'; const healthHost = process.env.BROWSER_WS_HOST || 'localhost'; const healthEndpoint = `http://${healthHost}:${healthPort}/health`; try { logger.debug(`Fetching WebSocket endpoint from: ${healthEndpoint}`); const response = await fetch(healthEndpoint); const data = await response.json(); if (data.status === 'healthy' && data.wsEndpoint) { logger.debug(`Got WebSocket endpoint: ${data.wsEndpoint}`); return data.wsEndpoint; } throw new Error('Health check did not return a valid wsEndpoint'); } catch (error: any) { logger.error(`Failed to fetch endpoint from health check: ${error.message}`); throw new Error( `Browser service is not accessible at ${healthEndpoint}. ` + `Make sure the browser service is running (docker-compose up browser)` ); } } /** * Launch a local browser as fallback when browser service is unavailable * @returns Promise - Locally launched browser instance */ async function launchLocalBrowser(): Promise { logger.warn('Attempting to launch local browser'); logger.warn('Note: This requires Chromium binaries to be installed (npx playwright install chromium)'); try { const browser = await chromium.launch({ headless: true, args: [ '--disable-blink-features=AutomationControlled', '--disable-web-security', '--disable-features=IsolateOrigins,site-per-process', '--disable-site-isolation-trials', '--disable-extensions', '--no-sandbox', '--disable-dev-shm-usage', '--disable-gpu', '--force-color-profile=srgb', '--force-device-scale-factor=2', '--ignore-certificate-errors', '--mute-audio' ], }); logger.info('Successfully launched local browser'); return browser; } catch (error: any) { logger.error(`Failed to launch local browser: ${error.message}`); throw new Error( `Could not launch local browser. ` + `Please either:\n` + ` 1. Start the browser service: docker-compose up browser\n` + ` 2. Install Chromium binaries: npx playwright@1.57.0 install chromium` ); } } /** * Connect to the remote browser service with retry logic, with fallback to local browser * @param retries - Number of connection attempts (default: 3) * @returns Promise - Connected browser instance (remote or local) * @throws Error if both remote connection and local launch fail */ export async function connectToRemoteBrowser(retries?: number): Promise { const maxRetries = retries ?? CONNECTION_CONFIG.maxRetries; try { const wsEndpoint = await getBrowserServiceEndpoint(); logger.info(`Connecting to browser service at ${wsEndpoint}...`); for (let attempt = 1; attempt <= maxRetries; attempt++) { try { logger.debug(`Connection attempt ${attempt}/${maxRetries}`); const browser = await chromium.connect(wsEndpoint, { timeout: CONNECTION_CONFIG.connectionTimeout, }); logger.info('Successfully connected to browser service'); return browser; } catch (error: any) { logger.warn( `Connection attempt ${attempt}/${maxRetries} failed: ${error.message}` ); if (attempt === maxRetries) { logger.error( `Failed to connect to browser service after ${maxRetries} attempts` ); throw new Error(`Remote connection failed: ${error.message}`); } logger.debug(`Waiting ${CONNECTION_CONFIG.retryDelay}ms before retry...`); await new Promise(resolve => setTimeout(resolve, CONNECTION_CONFIG.retryDelay)); } } throw new Error('Failed to connect to browser service'); } catch (error: any) { logger.warn(`Browser service connection failed: ${error.message}`); logger.warn('Falling back to local browser launch...'); return await launchLocalBrowser(); } } /** * Check if browser service is healthy * @returns Promise - true if service is healthy */ export async function checkBrowserServiceHealth(): Promise { try { const healthPort = process.env.BROWSER_HEALTH_PORT || '3002'; const healthHost = process.env.BROWSER_WS_HOST || 'localhost'; const healthEndpoint = `http://${healthHost}:${healthPort}/health`; const response = await fetch(healthEndpoint); const data = await response.json(); if (data.status === 'healthy') { logger.info('Browser service health check passed'); return true; } logger.warn('Browser service health check failed:', data); return false; } catch (error: any) { logger.error('Browser service health check error:', error.message); return false; } } ================================================ FILE: server/src/browser-management/classes/BrowserPool.ts ================================================ import { RemoteBrowser } from "./RemoteBrowser"; import logger from "../../logger"; /** * @category Types */ /** * Represents the possible states of a remote browser. * @category Types */ type BrowserState = "recording" | "run"; interface BrowserPoolInfo { /** * The instance of remote browser. */ browser: RemoteBrowser | null, /** * States if the browser's instance is being actively used. * Helps to persist the progress on the frontend when the application has been reloaded. * @default false */ active: boolean, /** * The user ID that owns this browser instance. */ userId: string, /** * The current state of the browser. * Can be "recording" or "run". * @default "recording" */ state: BrowserState, /** * The status of the browser instance. * Can be "reserved", "initializing", "ready" or "failed". */ status?: "reserved" | "initializing" | "ready" | "failed", /** * Timestamp when the browser slot was created/reserved */ createdAt?: number, /** * Timestamp when the browser was last accessed */ lastAccessed?: number, } /** * Dictionary of all the active remote browser's instances indexed by their id. * The value in this dictionary is of type BrowserPoolInfo, * which provides additional information about the browser's usage. * @category Types */ interface PoolDictionary { [key: string]: BrowserPoolInfo, } /** * A browser pool is a collection of remote browsers that are initialized and ready to be used. * Enforces a "1 User - 2 Browser" policy, while allowing multiple users to have their own browser instances. * Adds the possibility to add, remove and retrieve remote browsers from the pool. * @category BrowserManagement */ export class BrowserPool { /** * Holds all the instances of remote browsers. */ private pool: PoolDictionary = {}; /** * Maps user IDs to their browser IDs. * A user can have up to 2 browsers. */ private userToBrowserMap: Map = new Map(); /** * Locks for atomic operations to prevent race conditions * Key format: "userId-state", Value: timestamp when lock was acquired */ private reservationLocks: Map = new Map(); /** * Adds a remote browser instance to the pool for a specific user. * If the user already has two browsers, the oldest browser will be closed and replaced. * * @param id remote browser instance's id * @param browser remote browser instance * @param userId the user ID that owns this browser instance * @param active states if the browser's instance is being actively used * @returns true if a new browser was added, false if an existing browser was replaced */ public addRemoteBrowser = ( id: string, browser: RemoteBrowser, userId: string, active: boolean = false, state: BrowserState = "recording" ): boolean => { // Check if browser with this ID already exists and belongs to this user if (this.pool[id] && this.pool[id].userId === userId) { // Just update the existing browser this.pool[id] = { browser, active, userId, state: this.pool[id].state || state, }; logger.log('debug', `Updated existing browser with id: ${id} for user: ${userId}`); return false; } // Get existing browsers for this user let userBrowserIds = this.userToBrowserMap.get(userId) || []; let replaced = false; // If trying to add a "recording" browser, check if one already exists if (state === "recording") { // Check if user already has a recording browser const hasRecordingBrowser = userBrowserIds.some(browserId => this.pool[browserId] && this.pool[browserId].state === "recording" ); if (hasRecordingBrowser) { logger.log('debug', `User ${userId} already has a browser in "recording" state`); return false; } } // For "run" state, check if the user already has the maximum number of browsers (2) if (userBrowserIds.length >= 2 && !userBrowserIds.includes(id)) { logger.log('debug', "User already has the maximum number of browsers (2)"); return false; } // Add the new browser to the pool this.pool[id] = { browser, active, userId, state, }; // Update the user-to-browser mapping if (!userBrowserIds.includes(id)) { userBrowserIds.push(id); } this.userToBrowserMap.set(userId, userBrowserIds); logger.log('debug', `Remote browser with id: ${id} added to the pool for user: ${userId}`); return !replaced; }; /** * Removes the remote browser instance from the pool. * Note: This doesn't handle browser closing as RemoteBrowser doesn't expose a close method. * The caller should ensure the browser is properly closed before calling this method. * * @param id remote browser instance's id * @returns true if the browser was removed successfully, false otherwise */ public closeAndDeleteBrowser = (id: string): boolean => { if (!this.pool[id]) { logger.log('warn', `Remote browser with id: ${id} does not exist in the pool`); return false; } // Remove the user-to-browser mapping const userId = this.pool[id].userId; const userBrowserIds = this.userToBrowserMap.get(userId) || []; if (userBrowserIds.includes(id)) { const updatedBrowserIds = userBrowserIds.filter(bid => bid !== id); if (updatedBrowserIds.length === 0) { this.userToBrowserMap.delete(userId); } else { this.userToBrowserMap.set(userId, updatedBrowserIds); } } // Remove from pool delete this.pool[id]; logger.log('debug', `Remote browser with id: ${id} removed from the pool`); return true; }; /** * Removes the remote browser instance from the pool without attempting to close it. * * @param id remote browser instance's id * @returns true if the browser was removed successfully, false otherwise */ public deleteRemoteBrowser = (id: string): boolean => { if (!this.pool[id]) { logger.log('warn', `Remote browser with id: ${id} does not exist in the pool`); return false; } // Remove the user-to-browser mapping const userId = this.pool[id].userId; const userBrowserIds = this.userToBrowserMap.get(userId) || []; if (userBrowserIds.includes(id)) { const updatedBrowserIds = userBrowserIds.filter(bid => bid !== id); if (updatedBrowserIds.length === 0) { this.userToBrowserMap.delete(userId); } else { this.userToBrowserMap.set(userId, updatedBrowserIds); } } // Remove from pool delete this.pool[id]; logger.log('debug', `Remote browser with id: ${id} deleted from the pool`); return true; }; /** * Returns the remote browser instance from the pool. * * @param id remote browser instance's id * @returns remote browser instance or undefined if it does not exist in the pool */ public getRemoteBrowser = (id: string): RemoteBrowser | undefined => { const poolInfo = this.pool[id]; if (!poolInfo) { return undefined; } // Return undefined for reserved slots (browser is null) if (poolInfo.status === "reserved") { logger.log('debug', `Browser ${id} is reserved but not yet ready`); return undefined; } // Return undefined for failed slots if (poolInfo.status === "failed") { logger.log('debug', `Browser ${id} has failed status`); return undefined; } return poolInfo.browser || undefined; }; /** * Returns the active browser's instance id for a specific user. * If state is specified, only returns a browser with that exact state. * * @param userId the user ID to find the browser for * @param state optional browser state filter ("recording" or "run") * @returns the browser ID for the user, or null if no browser exists with the required state */ public getActiveBrowserId = (userId: string, state?: BrowserState): string | null => { const browserIds = this.userToBrowserMap.get(userId); if (!browserIds || browserIds.length === 0) { logger.log('debug', `No browser found for user: ${userId}`); return null; } // If state is specified, only return browsers with that exact state if (state) { // Check browsers in reverse order (newest first) to find one with the specified state for (let i = browserIds.length - 1; i >= 0; i--) { const browserId = browserIds[i]; // Verify the browser still exists in the pool if (!this.pool[browserId]) { browserIds.splice(i, 1); continue; } // Check if browser matches state filter if (this.pool[browserId].state === state) { return browserId; } } // If no browser with matching state, return null logger.log('debug', `No browser with state ${state} found for user: ${userId}`); return null; } // If no state specified, return any browser for (let i = browserIds.length - 1; i >= 0; i--) { const browserId = browserIds[i]; // Verify the browser still exists in the pool if (!this.pool[browserId]) { browserIds.splice(i, 1); continue; } // Return the first browser found if (this.pool[browserId]) { console.log(`Active browser Id ${browserId} found for user: ${userId}`); return browserId; } } // If no active browser, return the most recent one if (browserIds.length > 0) { const mostRecentId = browserIds[browserIds.length - 1]; console.log(`No active browser found, returning most recent browser Id ${mostRecentId} for user: ${userId}`); return mostRecentId; } // Clean up the mapping if all browsers were invalid if (browserIds.length === 0) { this.userToBrowserMap.delete(userId); } logger.log('warn', `Browser mapping found for user: ${userId}, but no valid browsers exist in pool`); return null; }; /** * Returns the user ID associated with a browser ID. * * @param browserId the browser ID to find the user for * @returns the user ID for the browser, or null if the browser doesn't exist */ public getUserForBrowser = (browserId: string): string | null => { if (!this.pool[browserId]) { return null; } return this.pool[browserId].userId; }; /** * Sets the active state of a browser. * * @param id the browser ID * @param active the new active state * @returns true if successful, false if the browser wasn't found */ public setActiveBrowser = (id: string, active: boolean): boolean => { if (!this.pool[id]) { logger.log('warn', `Remote browser with id: ${id} does not exist in the pool`); return false; } this.pool[id].active = active; logger.log('debug', `Remote browser with id: ${id} set to ${active ? 'active' : 'inactive'}`); return true; }; /** * Sets the state of a browser. * Only allows one browser in "recording" state per user. * * @param id the browser ID * @param state the new state ("recording" or "run") * @returns true if successful, false if the browser wasn't found or state change not allowed */ public setBrowserState = (id: string, state: BrowserState): boolean => { if (!this.pool[id]) { logger.log('warn', `Remote browser with id: ${id} does not exist in the pool`); return false; } // If trying to set to "recording" state, check if another browser is already recording if (state === "recording") { const userId = this.pool[id].userId; const userBrowserIds = this.userToBrowserMap.get(userId) || []; // Check if any other browser for this user is already in recording state const hasAnotherRecordingBrowser = userBrowserIds.some(browserId => browserId !== id && this.pool[browserId] && this.pool[browserId].state === "recording" ); if (hasAnotherRecordingBrowser) { logger.log('warn', `Cannot set browser ${id} to "recording" state: User ${userId} already has a browser in recording state`); return false; } } this.pool[id].state = state; logger.log('debug', `Remote browser with id: ${id} state set to ${state}`); return true; }; /** * Gets the current state of a browser. * * @param id the browser ID * @returns the current state or null if the browser wasn't found */ public getBrowserState = (id: string): BrowserState | null => { if (!this.pool[id]) { logger.log('warn', `Remote browser with id: ${id} does not exist in the pool`); return null; } return this.pool[id].state; }; /** * Returns all browser instances for a specific user. * With the "1 User - 2 Browser" policy, this can return up to 2 browsers. * * @param userId the user ID to find browsers for * @returns an array of browser IDs belonging to the user */ public getAllBrowserIdsForUser = (userId: string): string[] => { const browserIds: string[] = []; // Get browser IDs from the map const mappedBrowserIds = this.userToBrowserMap.get(userId) || []; // Filter to only include IDs that exist in the pool for (const id of mappedBrowserIds) { if (this.pool[id]) { browserIds.push(id); } } // As a safeguard, also check the entire pool for any browsers assigned to this user // This helps detect and fix any inconsistencies in the maps for (const [id, info] of Object.entries(this.pool)) { if (info.userId === userId && !browserIds.includes(id)) { browserIds.push(id); } } // Update the map if inconsistencies were found if (browserIds.length > 0 && JSON.stringify(browserIds) !== JSON.stringify(mappedBrowserIds)) { // Limit to 2 browsers if more were found const limitedBrowserIds = browserIds.slice(-2); this.userToBrowserMap.set(userId, limitedBrowserIds); } return browserIds; }; /** * Returns the total number of browsers in the pool. */ public getPoolSize = (): number => { return Object.keys(this.pool).length; }; /** * Returns the total number of active users (users with browsers). */ public getActiveUserCount = (): number => { return this.userToBrowserMap.size; }; /** * Gets the current active browser for the system if there's only one active user. * This is a migration helper to support code that hasn't been updated to the user-browser model yet. * * @param currentUserId The ID of the current user, which will be prioritized if multiple browsers exist * @param state Optional state filter to find browsers in a specific state * @returns A browser ID if one can be determined, or null */ public getActiveBrowserForMigration = (currentUserId?: string, state?: BrowserState): string | null => { // If a current user ID is provided and they have a browser, return that if (currentUserId) { const browserForUser = this.getActiveBrowserId(currentUserId, state); if (browserForUser) { return browserForUser; } // If state is specified and no matching browser was found, return null if (state) { return null; } } // If only one user has a browser, try to find a matching browser if (this.userToBrowserMap.size === 1) { const userId = Array.from(this.userToBrowserMap.keys())[0]; const browserIds = this.userToBrowserMap.get(userId) || []; // If state is specified, only look for that state if (state) { // Return the active browser that matches the state for (let i = browserIds.length - 1; i >= 0; i--) { const bid = browserIds[i]; if (this.pool[bid]?.active && this.pool[bid].state === state) { return bid; } } // If no active browser with matching state, try to find any browser with matching state for (let i = browserIds.length - 1; i >= 0; i--) { const bid = browserIds[i]; if (this.pool[bid] && this.pool[bid].state === state) { return bid; } } // If still no matching browser, return null return null; } // If no state filter, find any active browser for (let i = browserIds.length - 1; i >= 0; i--) { if (this.pool[browserIds[i]]?.active) { return browserIds[i]; } } return browserIds.length > 0 ? browserIds[browserIds.length - 1] : null; } // Fall back to checking all browsers if no user was specified if (state) { // Look for active browsers with the specific state for (const id of Object.keys(this.pool)) { if (this.pool[id].active && this.pool[id].state === state) { return id; } } // Then look for any browser with the specific state for (const id of Object.keys(this.pool)) { if (this.pool[id].state === state) { return id; } } // If no browser with the requested state is found, return null return null; } // If no state filter, find any active browser for (const id of Object.keys(this.pool)) { if (this.pool[id].active) { return id; } } // If all else fails, return the first browser in the pool const browserIds = Object.keys(this.pool); return browserIds.length > 0 ? browserIds[0] : null; }; /** * Checks if there are available browser slots for a user. * Returns true if user has available slots AND none of their active browsers are in "recording" state. * @param userId the user ID to check browser slots for * @returns {boolean} true if user has available slots and no recording browsers, false otherwise */ public hasAvailableBrowserSlots = (userId: string, state?: BrowserState): boolean => { const userBrowserIds = this.userToBrowserMap.get(userId) || []; if (userBrowserIds.length >= 2) { return false; } if (state === "recording") { const hasBrowserInState = userBrowserIds.some(browserId => this.pool[browserId] && this.pool[browserId].state === "recording" ); return !hasBrowserInState; } return true; }; /** * Returns the first active browser's instance id from the pool. * If there is no active browser, it returns null. * If there are multiple active browsers, it returns the first one. * * @returns the first remote active browser instance's id from the pool * @deprecated Use getBrowserIdForUser instead to enforce the 1 User - 2 Browser policy */ public getActiveBrowserIdLegacy = (): string | null => { for (const id of Object.keys(this.pool)) { if (this.pool[id].active) { return id; } } // Don't log a warning since this behavior is expected in the user-browser model // logger.log('warn', `No active browser in the pool`); return null; }; /** * Reserves a browser slot atomically to prevent race conditions. * This ensures slot counting is accurate for rapid successive requests. * * @param id browser ID to reserve * @param userId user ID that owns this reservation * @param state browser state ("recording" or "run") * @returns true if slot was reserved, false if user has reached limit */ public reserveBrowserSlotAtomic = (id: string, userId: string, state: BrowserState = "run"): boolean => { const lockKey = `${userId}-${state}`; if (this.reservationLocks.has(lockKey)) { logger.log('debug', `Reservation already in progress for user ${userId} state ${state}`); return false; } try { this.reservationLocks.set(lockKey, Date.now()); if (!this.hasAvailableBrowserSlots(userId, state)) { logger.log('debug', `Cannot reserve slot for user ${userId}: no available slots`); return false; } if (this.pool[id]) { logger.log('debug', `Browser slot ${id} already exists`); return false; } const now = Date.now(); this.pool[id] = { browser: null, active: false, userId, state, status: "reserved", createdAt: now, lastAccessed: now }; const userBrowserIds = this.userToBrowserMap.get(userId) || []; if (!userBrowserIds.includes(id)) { userBrowserIds.push(id); this.userToBrowserMap.set(userId, userBrowserIds); } logger.log('info', `Atomically reserved browser slot ${id} for user ${userId} in state ${state}`); return true; } catch (error: any) { logger.log('error', `Error during atomic slot reservation: ${error.message}`); if (this.pool[id] && this.pool[id].status === "reserved") { this.deleteRemoteBrowser(id); } return false; } finally { this.reservationLocks.delete(lockKey); } }; /** * Upgrades a reserved slot to an actual browser instance. * * @param id browser ID that was previously reserved * @param browser the actual RemoteBrowser instance * @returns true if successful, false if slot wasn't reserved */ public upgradeBrowserSlot = (id: string, browser: RemoteBrowser): boolean => { if (!this.pool[id]) { logger.log('warn', `Cannot upgrade browser ${id}: slot does not exist in pool`); return false; } if (this.pool[id].status !== "reserved") { logger.log('warn', `Cannot upgrade browser ${id}: slot not in reserved state (current: ${this.pool[id].status})`); return false; } this.pool[id].browser = browser; this.pool[id].status = "ready"; logger.log('info', `Upgraded browser slot ${id} to ready state`); return true; }; /** * Marks a reserved slot as failed and removes it with proper cleanup. * * @param id browser ID to mark as failed */ public failBrowserSlot = (id: string): void => { if (this.pool[id]) { logger.log('info', `Marking browser slot ${id} as failed`); // Attempt to cleanup browser resources before deletion const browserInfo = this.pool[id]; if (browserInfo.browser) { try { // Try to close browser if it exists browserInfo.browser.switchOff?.().catch((error: any) => { logger.log('warn', `Error closing failed browser ${id}: ${error.message}`); }); } catch (error: any) { logger.log('warn', `Error during browser cleanup for ${id}: ${error.message}`); } } this.deleteRemoteBrowser(id); } }; /** * Cleanup stale browser slots that have been in reserved/initializing state too long * This prevents resource leaks from failed initializations */ public cleanupStaleBrowserSlots = (): void => { const now = Date.now(); const staleThreshold = 5 * 60 * 1000; // 5 minutes const staleSlots: string[] = []; for (const [id, info] of Object.entries(this.pool)) { const isStale = info.status === "reserved" || info.status === "initializing"; const createdAt = info.createdAt || 0; const age = now - createdAt; if (isStale && info.browser === null && age > staleThreshold) { staleSlots.push(id); } } staleSlots.forEach(id => { const info = this.pool[id]; logger.log('warn', `Cleaning up stale browser slot ${id} with status ${info.status}, age: ${Math.round((now - (info.createdAt || 0)) / 1000)}s`); this.failBrowserSlot(id); }); if (staleSlots.length > 0) { logger.log('info', `Cleaned up ${staleSlots.length} stale browser slots`); } this.cleanupStaleReservationLocks(); }; /** * Cleans up reservation locks that are older than 1 minute * This prevents locks from being held indefinitely due to crashes */ private cleanupStaleReservationLocks = (): void => { const now = Date.now(); const lockTimeout = 60 * 1000; // 1 minute const staleLocks: string[] = []; for (const [lockKey, timestamp] of this.reservationLocks.entries()) { if (now - timestamp > lockTimeout) { staleLocks.push(lockKey); } } staleLocks.forEach(lockKey => { this.reservationLocks.delete(lockKey); }); if (staleLocks.length > 0) { logger.log('warn', `Cleaned up ${staleLocks.length} stale reservation locks`); } }; /** * Gets the current status of a browser slot. * * @param id browser ID to check * @returns the status or null if browser doesn't exist */ public getBrowserStatus = (id: string): "reserved" | "initializing" | "ready" | "failed" | null => { if (!this.pool[id]) { return null; } return this.pool[id].status || null; }; /** * Returns all browser instances in the pool. * Used for cleanup operations like graceful shutdown. * * @returns Map of browser IDs to browser instances */ public getAllBrowsers = (): Map => { const browsers = new Map(); for (const [id, info] of Object.entries(this.pool)) { if (info.browser) { browsers.set(id, info.browser); } } return browsers; }; } ================================================ FILE: server/src/browser-management/classes/RemoteBrowser.ts ================================================ import { Page, Browser, CDPSession, BrowserContext } from 'playwright-core'; import { Socket } from "socket.io"; import { PlaywrightBlocker } from '@cliqz/adblocker-playwright'; import fetch from 'cross-fetch'; import logger from '../../logger'; import { readFileSync } from "fs"; import { InterpreterSettings } from "../../types"; import { WorkflowGenerator } from "../../workflow-management/classes/Generator"; import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter"; import { getDecryptedProxyConfig } from '../../routes/proxy'; import { getInjectableScript } from 'idcac-playwright'; import { FingerprintInjector } from "fingerprint-injector"; import { FingerprintGenerator } from "fingerprint-generator"; import { connectToRemoteBrowser } from '../browserConnection'; declare global { interface Window { rrweb?: any; isRecording?: boolean; emitEventToBackend?: (event: any) => Promise; } } // const MEMORY_CONFIG = { // gcInterval: 20000, // maxHeapSize: 1536 * 1024 * 1024, // heapUsageThreshold: 0.7 // }; /** * This class represents a remote browser instance. * It is used to allow a variety of interaction with the Playwright's browser instance. * Every remote browser holds an instance of a generator and interpreter classes with * the purpose of generating and interpreting workflows. * @category BrowserManagement */ export class RemoteBrowser { /** * Playwright's [browser](https://playwright.dev/docs/api/class-browser) instance. * @private */ private browser: Browser | null = null; private context: BrowserContext | null = null; /** * The Playwright's [CDPSession](https://playwright.dev/docs/api/class-cdpsession) instance, * used to talk raw Chrome Devtools Protocol. * @private */ private client: CDPSession | null | undefined = null; /** * Socket.io socket instance enabling communication with the client (frontend) side. * @private */ private socket: Socket; /** * The Playwright's [Page](https://playwright.dev/docs/api/class-page) instance * as current interactive remote browser's page. * @private */ private currentPage: Page | null | undefined = null; /** * Interpreter settings for any started interpretation. * @private */ private interpreterSettings: InterpreterSettings = { debug: false, maxConcurrency: 1, maxRepeats: 1, }; /** * The user ID that owns this browser instance * @private */ private userId: string; private lastEmittedUrl: string | null = null; /** * {@link WorkflowGenerator} instance specific to the remote browser. */ public generator: WorkflowGenerator; /** * {@link WorkflowInterpreter} instance specific to the remote browser. */ public interpreter: WorkflowInterpreter; public isDOMStreamingActive: boolean = false; /** * Flag to indicate if this is a recording session (requires rrweb for real-time DOM streaming) * When false (robot run mode), rrweb is skipped to improve performance * @private */ private isRecordingMode: boolean = false; // private memoryCleanupInterval: NodeJS.Timeout | null = null; // private memoryManagementInterval: NodeJS.Timeout | null = null; /** * Initializes a new instances of the {@link Generator} and {@link WorkflowInterpreter} classes and * assigns the socket instance everywhere. * @param socket socket.io socket instance used to communicate with the client side * @constructor */ public constructor(socket: Socket, userId: string, poolId: string, isRecordingMode: boolean = false) { this.socket = socket; this.userId = userId; this.interpreter = new WorkflowInterpreter(socket); this.generator = new WorkflowGenerator(socket, poolId); this.isRecordingMode = isRecordingMode; } // private initializeMemoryManagement(): void { // this.memoryManagementInterval = setInterval(() => { // const memoryUsage = process.memoryUsage(); // const heapUsageRatio = memoryUsage.heapUsed / MEMORY_CONFIG.maxHeapSize; // if (heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold * 1.2) { // logger.warn( // "Critical memory pressure detected, triggering emergency cleanup" // ); // this.performMemoryCleanup(); // } else if (heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold) { // logger.warn("High memory usage detected, triggering cleanup"); // if ( // global.gc && // heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold * 1.1 // ) { // global.gc(); // } // } // }, MEMORY_CONFIG.gcInterval); // } // private async performMemoryCleanup(): Promise { // if (global.gc) { // try { // global.gc(); // logger.info("Garbage collection requested"); // } catch (error) { // logger.error("Error during garbage collection:", error); // } // } // if (this.currentPage) { // try { // await new Promise((resolve) => setTimeout(resolve, 500)); // logger.info("CDP session reset completed"); // } catch (error) { // logger.error("Error resetting CDP session:", error); // } // } // this.socket.emit("memory-cleanup", { // userId: this.userId, // timestamp: Date.now(), // }); // } /** * Normalizes URLs to prevent navigation loops while maintaining consistent format */ private normalizeUrl(url: string): string { try { const parsedUrl = new URL(url); parsedUrl.pathname = parsedUrl.pathname.replace(/\/+$/, '') || '/'; parsedUrl.protocol = parsedUrl.protocol.toLowerCase(); return parsedUrl.toString(); } catch { return url; } } /** * Determines if a URL change is significant enough to emit */ private shouldEmitUrlChange(newUrl: string): boolean { if (!this.lastEmittedUrl) { return true; } const normalizedNew = this.normalizeUrl(newUrl); const normalizedLast = this.normalizeUrl(this.lastEmittedUrl); return normalizedNew !== normalizedLast; } /** * Setup scroll event listener to track user scrolling */ private setupScrollEventListener(): void { try { this.socket.removeAllListeners('dom:scroll'); } catch (error: any) { logger.warn(`Error removing old scroll listener: ${error.message}`); } this.socket.on( "dom:scroll", (data: { deltaX: number; deltaY: number }) => { if (!this.isDOMStreamingActive || !this.currentPage) return; this.currentPage.mouse.wheel(data.deltaX, data.deltaY).catch(() => {}); } ); } private emitLoadingProgress(progress: number, pendingRequests: number): void { this.socket.emit("domLoadingProgress", { progress: Math.round(progress), pendingRequests, userId: this.userId, timestamp: Date.now(), }); } private async setupPageEventListeners(page: Page) { try { page.removeAllListeners('framenavigated'); page.removeAllListeners('load'); logger.debug('Removed existing page event listeners before re-registering'); } catch (error: any) { logger.warn(`Error removing existing page listeners: ${error.message}`); } page.on('framenavigated', async (frame) => { if (frame === page.mainFrame()) { const currentUrl = page.url(); if (this.shouldEmitUrlChange(currentUrl)) { this.lastEmittedUrl = currentUrl; this.socket.emit('urlChanged', { url: currentUrl, userId: this.userId }); } await page.evaluate(() => { if (window.rrweb && window.isRecording) { window.isRecording = false; } }); if (this.isRecordingMode) { await page.waitForLoadState('networkidle', { timeout: 10000 }).catch(() => { logger.warn('[rrweb] Network idle timeout on navigation, proceeding with rrweb initialization'); }); await this.initializeRRWebRecording(page); } } }); page.on('load', async () => { const injectScript = async (): Promise => { try { await page.waitForLoadState('networkidle', { timeout: 5000 }); if (page.isClosed()) { logger.debug('Page is closed, cannot inject script'); return false; } await page.evaluate(getInjectableScript()); return true; } catch (error: any) { logger.log('warn', `Script injection attempt failed: ${error.message}`); return false; } }; const success = await injectScript(); console.log("Script injection result:", success); }); } /** * Initialize rrweb recording for real-time DOM streaming * This replaces the snapshot-based approach with live event streaming * Only runs in recording mode - skipped for robot runs to improve performance */ private async initializeRRWebRecording(page: Page): Promise { if (!this.isRecordingMode) { logger.debug('[rrweb] Skipping initialization - not in recording mode (robot run)'); return; } try { const rrwebJsPath = require.resolve('rrweb/dist/rrweb.min.js'); const rrwebScriptContent = readFileSync(rrwebJsPath, 'utf8'); await page.context().addInitScript(rrwebScriptContent); await page.evaluate((scriptContent) => { if (typeof window.rrweb === 'undefined') { try { (0, eval)(scriptContent); } catch (e) { console.error('[rrweb] eval failed:', e); } } }, rrwebScriptContent); const rrwebLoaded = await page.evaluate(() => typeof window.rrweb !== 'undefined'); if (rrwebLoaded) { logger.debug('[rrweb] Script injected successfully'); } else { logger.warn('[rrweb] Script injection failed - window.rrweb not found'); } const isAlreadyExposed = await page.evaluate(() => { return typeof window.emitEventToBackend === 'function'; }); if (!isAlreadyExposed) { let hasEmittedFullSnapshot = false; await page.exposeFunction('emitEventToBackend', (event: any) => { this.socket.emit('rrweb-event', event); if (event.type === 2 && !hasEmittedFullSnapshot) { hasEmittedFullSnapshot = true; this.emitLoadingProgress(100, 0); logger.debug(`[rrweb] Full snapshot sent, loading progress at 100%`); } }); } const rrwebStatus = await page.evaluate(() => { if (!window.rrweb) { console.error('[rrweb] window.rrweb is not defined!'); return { success: false, error: 'window.rrweb is not defined' }; } if (window.isRecording) { return { success: false, error: 'already recording' }; } window.isRecording = true; try { const recordHandle = window.rrweb.record({ emit(event: any) { if (window.emitEventToBackend) { window.emitEventToBackend(event).catch(() => { }); } }, maskAllInputs: false, recordCanvas: false, sampling: { mousemove: false, mouseInteraction: true, scroll: 75, media: 800, input: 'last', }, input: true, checkoutEveryNms: 120000, }); (window as any).rrwebRecordHandle = recordHandle; return { success: true }; } catch (error: any) { console.error('[rrweb] Failed to start recording:', error); return { success: false, error: error.message }; } }); if (rrwebStatus.success) { this.isDOMStreamingActive = true; this.emitLoadingProgress(80, 0); this.setupScrollEventListener(); } else { logger.error(`Failed to initialize rrweb recording: ${rrwebStatus.error}`); } } catch (error: any) { logger.error(`Failed to initialize rrweb recording: ${error.message}`); } } private getUserAgent() { const userAgents = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.140 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:117.0) Gecko/20100101 Firefox/117.0', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.1938.81 Safari/537.36 Edg/116.0.1938.81', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.96 Safari/537.36 OPR/101.0.4843.25', 'Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.62 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:118.0) Gecko/20100101 Firefox/118.0', ]; return userAgents[Math.floor(Math.random() * userAgents.length)]; } /** * Apply modern fingerprint-suite injection */ private async applyEnhancedFingerprinting(context: BrowserContext): Promise { try { try { const fingerprintGenerator = new FingerprintGenerator(); const fingerprint = fingerprintGenerator.getFingerprint(); const fingerprintInjector = new FingerprintInjector(); await fingerprintInjector.attachFingerprintToPlaywright(context as any, fingerprint); logger.info("Enhanced fingerprinting applied successfully"); } catch (fingerprintError: any) { logger.warn(`Modern fingerprint injection failed: ${fingerprintError.message}. Using existing protection.`); } } catch (error: any) { logger.error(`Enhanced fingerprinting failed: ${error.message}`); } } /** * An asynchronous constructor for asynchronously initialized properties. * Must be called right after creating an instance of RemoteBrowser class. * @param options remote browser options to be used when launching the browser * @returns {Promise} */ public initialize = async (userId: string): Promise => { const MAX_RETRIES = 3; const OVERALL_INIT_TIMEOUT = 120000; let retryCount = 0; let success = false; this.socket.emit("dom-snapshot-loading", { userId: this.userId, timestamp: Date.now(), }); this.emitLoadingProgress(0, 0); const initializationPromise = (async () => { while (!success && retryCount < MAX_RETRIES) { try { this.browser = await connectToRemoteBrowser(); if (!this.browser || this.browser.isConnected() === false) { throw new Error('Browser failed to launch or is not connected'); } this.emitLoadingProgress(20, 0); const proxyConfig = await getDecryptedProxyConfig(userId); let proxyOptions: { server: string, username?: string, password?: string } = { server: '' }; if (proxyConfig.proxy_url) { proxyOptions = { server: proxyConfig.proxy_url, ...(proxyConfig.proxy_username && proxyConfig.proxy_password && { username: proxyConfig.proxy_username, password: proxyConfig.proxy_password, }), }; } const contextOptions: any = { // viewport: { height: 400, width: 900 }, // recordVideo: { dir: 'videos/' } // Force reduced motion to prevent animation issues reducedMotion: 'reduce', // Force JavaScript to be enabled javaScriptEnabled: true, // Set a reasonable timeout timeout: 50000, // Disable hardware acceleration forcedColors: 'none', isMobile: false, hasTouch: false, userAgent: this.getUserAgent(), }; if (proxyOptions.server) { contextOptions.proxy = { server: proxyOptions.server, username: proxyOptions.username ? proxyOptions.username : undefined, password: proxyOptions.password ? proxyOptions.password : undefined, }; } await new Promise(resolve => setTimeout(resolve, 500)); const contextPromise = this.browser.newContext(contextOptions); this.context = await Promise.race([ contextPromise, new Promise((_, reject) => { setTimeout(() => reject(new Error('Context creation timed out after 15s')), 15000); }) ]) as BrowserContext; await this.applyEnhancedFingerprinting(this.context); await this.context.addInitScript( `const defaultGetter = Object.getOwnPropertyDescriptor( Navigator.prototype, "webdriver" ).get; defaultGetter.apply(navigator); defaultGetter.toString(); Object.defineProperty(Navigator.prototype, "webdriver", { set: undefined, enumerable: true, configurable: true, get: new Proxy(defaultGetter, { apply: (target, thisArg, args) => { Reflect.apply(target, thisArg, args); return false; }, }), }); const patchedGetter = Object.getOwnPropertyDescriptor( Navigator.prototype, "webdriver" ).get; patchedGetter.apply(navigator); patchedGetter.toString();` ); this.currentPage = await this.context.newPage(); this.emitLoadingProgress(40, 0); await this.setupPageEventListeners(this.currentPage); if (this.isRecordingMode) { await this.currentPage.waitForLoadState('networkidle', { timeout: 10000 }).catch(() => { logger.warn('[rrweb] Network idle timeout, proceeding with rrweb initialization'); }); await this.initializeRRWebRecording(this.currentPage); } try { const blocker = await PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']); await blocker.enableBlockingInPage(this.currentPage as any); this.client = await this.currentPage.context().newCDPSession(this.currentPage); await blocker.disableBlockingInPage(this.currentPage as any); console.log('Adblocker initialized'); } catch (error: any) { console.warn('Failed to initialize adblocker, continuing without it:', error.message); this.client = await this.currentPage.context().newCDPSession(this.currentPage); } success = true; logger.log('debug', `Browser initialized successfully for user ${userId}`); } catch (error: any) { retryCount++; logger.log('error', `Browser initialization failed (attempt ${retryCount}/${MAX_RETRIES}): ${error.message}`); if (this.browser) { try { await this.browser.close(); } catch (closeError) { logger.log('warn', `Failed to close browser during cleanup: ${closeError}`); } this.browser = null; } if (retryCount >= MAX_RETRIES) { throw new Error(`Failed to initialize browser after ${MAX_RETRIES} attempts: ${error.message}`); } await new Promise(resolve => setTimeout(resolve, 1000)); } } })(); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`Browser initialization timed out after ${OVERALL_INIT_TIMEOUT}ms`)), OVERALL_INIT_TIMEOUT); }); await Promise.race([initializationPromise, timeoutPromise]); }; /** * Captures a screenshot directly without running the workflow interpreter * @param settings Screenshot settings containing fullPage, type, etc. * @returns Promise */ public captureDirectScreenshot = async (settings: { fullPage: boolean; type: 'png' | 'jpeg'; timeout?: number; animations?: 'disabled' | 'allow'; caret?: 'hide' | 'initial'; scale?: 'css' | 'device'; }): Promise => { if (!this.currentPage) { logger.error("No current page available for screenshot"); this.socket.emit('screenshotError', { userId: this.userId, error: 'No active page available' }); return; } try { this.socket.emit('screenshotCaptureStarted', { userId: this.userId, fullPage: settings.fullPage }); const screenshotBuffer = await this.currentPage.screenshot({ fullPage: settings.fullPage, type: settings.type || 'png', timeout: settings.timeout || 30000, animations: settings.animations || 'allow', caret: settings.caret || 'hide', scale: settings.scale || 'device' }); const base64Data = screenshotBuffer.toString('base64'); const mimeType = `image/${settings.type || 'png'}`; const dataUrl = `data:${mimeType};base64,${base64Data}`; this.socket.emit('directScreenshotCaptured', { userId: this.userId, screenshot: dataUrl, mimeType: mimeType, fullPage: settings.fullPage, timestamp: Date.now() }); } catch (error) { logger.error('Failed to capture direct screenshot:', error); this.socket.emit('screenshotError', { userId: this.userId, error: error instanceof Error ? error.message : 'Unknown error occurred' }); } }; /** * Removes all socket event listeners */ private removeAllSocketListeners(): void { try { this.socket.removeAllListeners('captureDirectScreenshot'); this.socket.removeAllListeners('settings'); this.socket.removeAllListeners('changeTab'); this.socket.removeAllListeners('addTab'); this.socket.removeAllListeners('closeTab'); this.socket.removeAllListeners('dom:scroll'); logger.debug(`Removed all socket listeners for user ${this.userId}`); } catch (error: any) { logger.warn(`Error removing socket listeners: ${error.message}`); } } /** * Registers all event listeners needed for the recording editor session. * Should be called only once after the full initialization of the remote browser. * @returns void */ public registerEditorEvents = (): void => { logger.log("debug", `Registering editor events for user: ${this.userId}`); this.removeAllSocketListeners(); this.socket.on("captureDirectScreenshot", async (settings) => { await this.captureDirectScreenshot(settings); }); this.socket.on( "changeTab", async (tabIndex) => await this.changeTab(tabIndex) ); this.socket.on("addTab", async () => { await this.currentPage?.context().newPage(); const lastTabIndex = this.currentPage ? this.currentPage.context().pages().length - 1 : 0; await this.changeTab(lastTabIndex); }); this.socket.on("closeTab", async (tabInfo) => { const page = this.currentPage?.context().pages()[tabInfo.index]; if (page) { if (tabInfo.isCurrent) { if (this.currentPage?.context().pages()[tabInfo.index + 1]) { await this.changeTab(tabInfo.index + 1); } else { await this.changeTab(tabInfo.index - 1); } } await page.close(); } }); }; /** * Terminates the dom snapshot session and closes the remote browser. * If an interpretation was running it will be stopped. * @returns {Promise} */ public async switchOff(): Promise { this.isDOMStreamingActive = false; // if (this.memoryCleanupInterval) { // clearInterval(this.memoryCleanupInterval); // this.memoryCleanupInterval = null; // } // if (this.memoryManagementInterval) { // clearInterval(this.memoryManagementInterval); // this.memoryManagementInterval = null; // } this.removeAllSocketListeners(); try { if (this.currentPage) { const isClosed = this.currentPage.isClosed(); if (!isClosed) { this.currentPage.removeAllListeners(); logger.debug('Removed all page event listeners'); } else { logger.debug('Page already closed, skipping listener removal'); } } } catch (error: any) { logger.warn(`Error removing page listeners: ${error.message}`); } if (this.generator) { try { this.generator.cleanup(); logger.debug('Generator cleanup completed'); } catch (error: any) { logger.warn(`Error cleaning up generator: ${error.message}`); } } try { await this.interpreter.stopInterpretation(); } catch (error) { logger.error("Error stopping interpretation during shutdown:", error); } try { if (this.client && this.currentPage && !this.currentPage.isClosed()) { const detachPromise = this.client.detach(); const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('CDP detach timeout')), 5000) ); await Promise.race([detachPromise, timeoutPromise]); logger.debug('CDP session detached successfully'); } } catch (error: any) { logger.warn(`Error detaching CDP session: ${error.message}`); } finally { this.client = null; } try { if (this.currentPage && !this.currentPage.isClosed()) { const closePromise = this.currentPage.close(); const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('Page close timeout')), 5000) ); await Promise.race([closePromise, timeoutPromise]); logger.debug('Current page closed successfully'); } } catch (error: any) { logger.warn(`Error closing current page: ${error.message}`); } finally { this.currentPage = null; } try { if (this.context) { const contextClosePromise = this.context.close(); const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('Context close timeout')), 5000) ); await Promise.race([contextClosePromise, timeoutPromise]); logger.debug('Browser context closed successfully'); } } catch (error: any) { logger.warn(`Error closing browser context: ${error.message}`); } finally { this.context = null; } try { if (this.browser) { const browserClosePromise = this.browser.close(); const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error('Browser close timeout')), 5000) ); await Promise.race([browserClosePromise, timeoutPromise]); logger.debug('Browser closed successfully'); } } catch (error: any) { logger.error("Error during browser close:", error); } finally { this.browser = null; } } /** * Updates the active socket instance. * This will update all registered events for the socket and * all the properties using the socket. * @param socket socket.io socket instance used to communicate with the client side * @returns void */ public updateSocket = (socket: Socket): void => { this.socket = socket; this.registerEditorEvents(); this.generator?.updateSocket(socket); this.interpreter?.updateSocket(socket); if (this.isDOMStreamingActive) { this.setupScrollEventListener(); } }; /** * Starts the interpretation of the currently generated workflow. * @returns {Promise} */ public interpretCurrentRecording = async (): Promise => { logger.log('debug', 'Starting interpretation in the editor'); if (this.generator) { const workflow = this.generator.AddGeneratedFlags(this.generator.getWorkflowFile()); await this.initializeNewPage(); if (this.currentPage) { // this.currentPage.setViewportSize({ height: 400, width: 900 }); const params = this.generator.getParams(); if (params) { this.interpreterSettings.params = params.reduce((acc, param) => { if (this.interpreterSettings.params && Object.keys(this.interpreterSettings.params).includes(param)) { return { ...acc, [param]: this.interpreterSettings.params[param] }; } else { return { ...acc, [param]: '', } } }, {}) } logger.log('debug', `Starting interpretation with settings: ${JSON.stringify(this.interpreterSettings, null, 2)}`); await this.interpreter.interpretRecordingInEditor( workflow, this.currentPage, (newPage: Page) => this.currentPage = newPage, this.interpreterSettings ); // clear the active index from generator this.generator.clearLastIndex(); } else { logger.log('error', 'Could not get a new page, returned undefined'); } } else { logger.log('error', 'Generator is not initialized'); } }; /** * Returns the current page instance. * @returns {Page | null | undefined} */ public getCurrentPage = (): Page | null | undefined => { return this.currentPage; }; /** * Changes the active page to the page instance on the given index * available in pages array on the {@link BrowserContext}. * Automatically stops the screencast session on the previous page and starts the new one. * @param tabIndex index of the page in the pages array on the {@link BrowserContext} * @returns {Promise} */ private changeTab = async (tabIndex: number): Promise => { const page = this.currentPage?.context().pages()[tabIndex]; if (page) { this.currentPage = page; await this.setupPageEventListeners(this.currentPage); //await this.currentPage.setViewportSize({ height: 400, width: 900 }) this.client = await this.currentPage.context().newCDPSession(this.currentPage); // Include userId in the URL change event this.socket.emit('urlChanged', { url: this.currentPage.url(), userId: this.userId }); } else { logger.log('error', `${tabIndex} index out of range of pages`) } } /** * Internal method for a new page initialization. Subscribes this page to the screencast. * @param options optional page options to be used when creating a new page * @returns {Promise} */ private initializeNewPage = async (options?: Object): Promise => { const newPage = options ? await this.browser?.newPage(options) : await this.browser?.newPage(); await newPage?.setExtraHTTPHeaders({ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' }); await this.currentPage?.close(); this.currentPage = newPage; if (this.currentPage) { await this.setupPageEventListeners(this.currentPage); logger.debug('Using rrweb live recording for new page'); } else { logger.log('error', 'Could not get a new page, returned undefined'); } }; } ================================================ FILE: server/src/browser-management/controller.ts ================================================ /** * The main function group which determines the flow of remote browser management. * Holds the singleton instances of browser pool and socket.io server. */ import { Socket } from "socket.io"; import { v4 as uuid } from "uuid"; import { Page } from "playwright-core"; import { createSocketConnection, createSocketConnectionForRun } from "../socket-connection/connection"; import { io, browserPool } from "../server"; import { RemoteBrowser } from "./classes/RemoteBrowser"; import { RemoteBrowserOptions } from "../types"; import logger from "../logger"; /** * Starts and initializes a {@link RemoteBrowser} instance. * Creates a new socket connection over a dedicated namespace * and registers all interaction event handlers. * Returns the id of an active browser or the new remote browser's generated id. * @param options {@link RemoteBrowserOptions} to be used when launching the browser * @returns string * @category BrowserManagement-Controller */ export const initializeRemoteBrowserForRecording = (userId: string, mode: string = "dom"): string => { const id = getActiveBrowserIdByState(userId, "recording") || uuid(); createSocketConnection( io.of(id), userId, async (socket: Socket) => { // browser is already active const activeId = getActiveBrowserIdByState(userId, "recording"); if (activeId) { const remoteBrowser = browserPool.getRemoteBrowser(activeId); remoteBrowser?.updateSocket(socket); } else { const browserSession = new RemoteBrowser(socket, userId, id, true); browserSession.interpreter.subscribeToPausing(); try { await browserSession.initialize(userId); await browserSession.registerEditorEvents(); logger.info('DOM streaming started for remote browser in recording mode'); browserPool.addRemoteBrowser(id, browserSession, userId, false, "recording"); } catch (initError: any) { logger.error(`Failed to initialize browser for recording: ${initError.message}`); logger.info('Sending browser failure notification to frontend'); socket.emit('dom-mode-error', { userId: userId, error: 'Failed to start the browser, please try again in some time.' }); socket.emit('error', { userId: userId, message: 'Failed to start the browser, please try again in some time.', details: initError.message }); await new Promise(resolve => setTimeout(resolve, 100)); try { await browserSession.switchOff(); logger.debug('Cleaned up failed browser session'); } catch (cleanupError: any) { logger.warn(`Failed to cleanup browser session: ${cleanupError.message}`); } logger.info('Browser initialization failed, user notified'); return id; } } socket.emit('loaded'); }); return id; }; /** * Starts and initializes a {@link RemoteBrowser} instance for interpretation. * Creates a new {@link Socket} connection over a dedicated namespace. * Returns the new remote browser's generated id. * @param userId User ID for browser ownership * @returns string Browser ID * @category BrowserManagement-Controller */ export const createRemoteBrowserForRun = (userId: string): string => { if (!userId) { logger.log('error', 'createRemoteBrowserForRun: Missing required parameter userId'); throw new Error('userId is required'); } const id = uuid(); const slotReserved = browserPool.reserveBrowserSlotAtomic(id, userId, "run"); if (!slotReserved) { logger.log('warn', `Cannot create browser for user ${userId}: no available slots`); throw new Error('User has reached maximum browser limit'); } logger.log('info', `createRemoteBrowserForRun: Reserved slot ${id} for user ${userId}`); initializeBrowserAsync(id, userId) .catch((error: any) => { logger.log('error', `Unhandled error in initializeBrowserAsync for browser ${id}: ${error.message}`); browserPool.failBrowserSlot(id); }); return id; }; /** * Terminates a remote browser recording session * and removes the browser from the browser pool. * @param id instance id of the remote browser to be terminated * @returns {Promise} * @category BrowserManagement-Controller */ export const destroyRemoteBrowser = async (id: string, userId: string): Promise => { const DESTROY_TIMEOUT = 30000; const destroyPromise = (async () => { try { const browserSession = browserPool.getRemoteBrowser(id); if (!browserSession) { logger.log('info', `Browser with id: ${id} not found, may have already been destroyed`); return true; } logger.log('debug', `Switching off the browser with id: ${id}`); try { await browserSession.switchOff(); } catch (switchOffError) { logger.log('warn', `Error switching off browser ${id}: ${switchOffError}`); } try { const namespace = io.of(id); const sockets = await namespace.fetchSockets(); for (const socket of sockets) { socket.disconnect(true); } namespace.removeAllListeners(); await new Promise(resolve => setTimeout(resolve, 100)); const nsps = (io as any)._nsps; if (nsps && nsps.has(`/${id}`)) { const ns = nsps.get(`/${id}`); if (ns && ns.sockets && ns.sockets.size === 0) { nsps.delete(`/${id}`); logger.log('debug', `Deleted empty namespace /${id} from io._nsps Map`); } else { logger.log('warn', `Namespace /${id} still has ${ns?.sockets?.size || 0} sockets, skipping manual deletion`); } } logger.log('debug', `Cleaned up socket namespace for browser ${id}`); } catch (namespaceCleanupError: any) { logger.log('warn', `Error cleaning up socket namespace for browser ${id}: ${namespaceCleanupError.message}`); } return browserPool.deleteRemoteBrowser(id); } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Failed to destroy browser ${id}: ${errorMessage}`); try { return browserPool.deleteRemoteBrowser(id); } catch (deleteError) { logger.log('error', `Failed to delete browser ${id} from pool: ${deleteError}`); return false; } } })(); try { const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error(`Browser destruction timed out after ${DESTROY_TIMEOUT}ms`)), DESTROY_TIMEOUT) ); return await Promise.race([destroyPromise, timeoutPromise]); } catch (timeoutError: any) { logger.log('error', `Browser ${id} destruction timeout: ${timeoutError.message} - force removing from pool`); try { return browserPool.deleteRemoteBrowser(id); } catch (deleteError) { logger.log('error', `Failed to force delete browser ${id} after timeout: ${deleteError}`); return false; } } }; /** * Returns the id of an active browser or null. * Wrapper around {@link browserPool.getActiveBrowserId()} function. * @returns {string | null} * @category BrowserManagement-Controller */ export const getActiveBrowserId = (userId: string): string | null => { return browserPool.getActiveBrowserId(userId); }; /** * Returns the id of an active browser with the specified state or null. * @param userId the user ID to find the browser for * @param state the browser state to filter by ("recording" or "run") * @returns {string | null} * @category BrowserManagement-Controller */ export const getActiveBrowserIdByState = (userId: string, state: "recording" | "run"): string | null => { return browserPool.getActiveBrowserId(userId, state); }; /** * Checks if there are available browser slots for a user. * Wrapper around {@link browserPool.hasAvailableBrowserSlots()} function. * If state is provided, also checks that none of their active browsers are in that state. * @param userId the user ID to check browser slots for * @param state optional state to check - if provided, ensures no browser is in this state * @returns {boolean} true if user has available slots (and no browsers in specified state if state is provided) * @category BrowserManagement-Controller */ export const canCreateBrowserInState = (userId: string, state?: "recording" | "run"): boolean => { return browserPool.hasAvailableBrowserSlots(userId, state); }; /** * Returns the url string from a remote browser if exists in the browser pool. * @param id instance id of the remote browser * @returns {string | undefined} * @category BrowserManagement-Controller */ export const getRemoteBrowserCurrentUrl = (id: string, userId: string): string | undefined => { return browserPool.getRemoteBrowser(id)?.getCurrentPage()?.url(); }; /** * Returns the array of tab strings from a remote browser if exists in the browser pool. * @param id instance id of the remote browser * @return {string[] | undefined} * @category BrowserManagement-Controller */ export const getRemoteBrowserCurrentTabs = (id: string, userId: string): string[] | undefined => { return browserPool.getRemoteBrowser(id)?.getCurrentPage()?.context().pages() .map((page) => { const parsedUrl = new URL(page.url()); const host = parsedUrl.hostname.match(/\b(?!www\.)[a-zA-Z0-9]+/g)?.join('.'); if (host) { return host; } return 'new tab'; }); }; /** * Interprets the currently generated workflow in the active browser instance. * If there is no active browser, the function logs an error. * @returns {Promise} * @category BrowserManagement-Controller */ export const interpretWholeWorkflow = async (userId: string) => { const id = getActiveBrowserIdByState(userId, "recording"); if (id) { const browser = browserPool.getRemoteBrowser(id); if (browser) { await browser.interpretCurrentRecording(); } else { logger.log('error', `No active browser with id ${id} found in the browser pool`); } } else { logger.log('error', `Cannot interpret the workflow: bad id ${id}.`); } }; /** * Stops the interpretation of the current workflow in the active browser instance. * If there is no active browser, the function logs an error. * @returns {Promise} * @category BrowserManagement-Controller */ export const stopRunningInterpretation = async (userId: string) => { const id = getActiveBrowserIdByState(userId, "recording"); if (id) { const browserSession = browserPool.getRemoteBrowser(id); await browserSession?.switchOff(); } else { logger.log('error', 'Cannot stop interpretation: No active browser or generator.'); } }; const initializeBrowserAsync = async (id: string, userId: string) => { try { const namespace = io.of(id); let clientConnected = false; let connectionTimeout: NodeJS.Timeout; const waitForConnection = new Promise((resolve) => { namespace.on('connection', (socket: Socket) => { clientConnected = true; clearTimeout(connectionTimeout); logger.log('info', `Frontend connected to browser ${id} via socket ${socket.id}`); resolve(socket); }); connectionTimeout = setTimeout(() => { if (!clientConnected) { logger.log('warn', `No client connected to browser ${id} within timeout, proceeding with dummy socket`); resolve(null); } }, 15000); }); namespace.on('error', (error: any) => { logger.log('error', `Socket namespace error for browser ${id}: ${error.message}`); clearTimeout(connectionTimeout); browserPool.failBrowserSlot(id); }); const connectWithRetry = async (maxRetries: number = 3): Promise => { let retryCount = 0; while (retryCount < maxRetries) { try { const socket = await waitForConnection; if (socket || retryCount === maxRetries - 1) { return socket; } } catch (error: any) { logger.log('warn', `Connection attempt ${retryCount + 1} failed for browser ${id}: ${error.message}`); } retryCount++; if (retryCount < maxRetries) { const delay = Math.pow(2, retryCount) * 1000; logger.log('info', `Retrying connection for browser ${id} in ${delay}ms (attempt ${retryCount + 1}/${maxRetries})`); await new Promise(resolve => setTimeout(resolve, delay)); } } return null; }; const socket = await connectWithRetry(3); try { let browserSession: RemoteBrowser; if (socket) { logger.log('info', `Using real socket for browser ${id}`); browserSession = new RemoteBrowser(socket, userId, id); } else { logger.log('info', `Using dummy socket for browser ${id}`); const dummySocket = { emit: (event: string, data?: any) => { logger.log('debug', `Browser ${id} dummy socket emitted ${event}:`, data); }, on: () => {}, id: `dummy-${id}`, } as any; browserSession = new RemoteBrowser(dummySocket, userId, id); } logger.log('debug', `Starting browser initialization for ${id}`); try { const BROWSER_INIT_TIMEOUT = 45000; logger.log('info', `Browser initialization starting with ${BROWSER_INIT_TIMEOUT/1000}s timeout`); const initPromise = browserSession.initialize(userId); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error('Browser initialization timeout')), BROWSER_INIT_TIMEOUT); }); await Promise.race([initPromise, timeoutPromise]); } catch (initError: any) { logger.log('error', `Browser initialization failed for ${id}: ${initError.message}`); try { await browserSession.switchOff(); logger.log('info', `Cleaned up failed browser initialization for ${id}`); } catch (cleanupError: any) { logger.log('error', `Failed to cleanup browser ${id}: ${cleanupError.message}`); } throw initError; } const upgraded = browserPool.upgradeBrowserSlot(id, browserSession); if (!upgraded) { try { await browserSession.switchOff(); } catch (cleanupError: any) { logger.log('error', `Failed to cleanup browser after slot upgrade failure: ${cleanupError.message}`); } throw new Error('Failed to upgrade reserved browser slot'); } await new Promise(resolve => setTimeout(resolve, 500)); if (socket) { socket.emit('ready-for-run'); } else { setTimeout(async () => { try { logger.log('info', `Browser ${id} with dummy socket is ready for execution`); } catch (error: any) { logger.log('error', `Error with dummy socket browser ${id}: ${error.message}`); } }, 100); } logger.log('info', `Browser ${id} successfully initialized for run with ${socket ? 'real' : 'dummy'} socket`); } catch (error: any) { logger.log('error', `Error initializing browser ${id}: ${error.message}`); browserPool.failBrowserSlot(id); if (socket) { socket.emit('error', { message: error.message }); } throw error; } } catch (error: any) { logger.log('error', `Error setting up browser ${id}: ${error.message}`); browserPool.failBrowserSlot(id); throw error; } }; /** * Creates a RemoteBrowser instance specifically for SDK validation * Uses dummy socket and returns browser ID and Page for validation tasks * @param userId User ID for browser ownership * @returns Promise with browser ID and Page instance * @category BrowserManagement-Controller */ export const createRemoteBrowserForValidation = async ( userId: string ): Promise<{ browserId: string; page: Page }> => { const id = uuid(); logger.log('info', `Creating validation browser ${id} for user ${userId}`); try { const dummySocket = { emit: (event: string, data?: any) => { logger.log('debug', `Browser ${id} emitted ${event}`); }, on: () => {}, off: () => {}, id: `validation-${id}`, } as any; const browserSession = new RemoteBrowser(dummySocket, userId, id); const VALIDATION_INIT_TIMEOUT = 45000; const initPromise = browserSession.initialize(userId); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error('Validation browser initialization timeout')), VALIDATION_INIT_TIMEOUT); }); await Promise.race([initPromise, timeoutPromise]); const added = browserPool.addRemoteBrowser(id, browserSession, userId, true, 'run'); if (!added) { await browserSession.switchOff(); throw new Error('Failed to add validation browser to pool'); } const page = browserSession.getCurrentPage(); if (!page) { await destroyRemoteBrowser(id, userId); throw new Error('Failed to get page from validation browser'); } logger.log('info', `Browser ${id} initialized successfully`); return { browserId: id, page }; } catch (error: any) { logger.log('error', `Failed to create validation browser ${id}: ${error.message}`); try { await destroyRemoteBrowser(id, userId); } catch (cleanupError) { logger.log('warn', `Failed to cleanup browser ${id}: ${cleanupError}`); } throw error; } }; ================================================ FILE: server/src/browser-management/inputHandlers.ts ================================================ /** * A set of functions handling reproduction of user input * on the remote browser instance as well as the generation of workflow pairs. * These functions are called by the client through socket communication. */ import { Socket } from 'socket.io'; import logger from "../logger"; import { Coordinates, ScrollDeltas, KeyboardInput, DatePickerEventData } from '../types'; import { browserPool } from "../server"; import { Page } from "playwright-core"; import { CustomActions } from "../../../src/shared/types"; import { WhereWhatPair } from "maxun-core"; import { RemoteBrowser } from './classes/RemoteBrowser'; /** * A wrapper function for handling user input. * This function gets the active browser instance from the browser pool * and passes necessary arguments to the appropriate handlers. * e.g. {@link Generator}, {@link RemoteBrowser.currentPage} * * Also ignores any user input while interpretation is in progress. * * @param handleCallback The callback handler to be called * @param args - arguments to be passed to the handler * @param socket - socket with authenticated request * @category HelperFunctions */ const handleWrapper = async ( handleCallback: ( activeBrowser: RemoteBrowser, page: Page, args?: any ) => Promise, userId: string, args?: any ) => { const id = browserPool.getActiveBrowserId(userId, "recording"); if (id) { const activeBrowser = browserPool.getRemoteBrowser(id); if (activeBrowser?.interpreter.interpretationInProgress() && !activeBrowser.interpreter.interpretationIsPaused) { logger.log('debug', `Ignoring input, while interpretation is in progress`); return; } const currentPage = activeBrowser?.getCurrentPage(); if (currentPage && activeBrowser) { if (args) { await handleCallback(activeBrowser, currentPage, args); } else { await handleCallback(activeBrowser, currentPage); } } else { logger.log('warn', `No active page for browser ${id}`); } } else { logger.log('warn', `No active browser for id ${id}`); } } /** * An interface for custom action description. * @category Types */ interface CustomActionEventData { action: CustomActions; settings: any; actionId?: string; } /** * A wrapper function for handling custom actions. * @param socket The socket connection * @param customActionEventData The custom action event data * @category HelperFunctions */ const onGenerateAction = async (customActionEventData: CustomActionEventData, userId: string) => { logger.log('debug', `Generating ${customActionEventData.action} action emitted from client`); await handleWrapper(handleGenerateAction, userId, customActionEventData); } /** * Handles the generation of a custom action workflow pair. * @param generator The workflow generator * @param page The active page * @param action The custom action * @param settings The custom action settings * @param actionId Optional action ID for tracking and updating specific actions * @category BrowserManagement */ const handleGenerateAction = async (activeBrowser: RemoteBrowser, page: Page, { action, settings, actionId }: CustomActionEventData) => { try { if (page.isClosed()) { logger.log("debug", `Ignoring generate action event: page is closed`); return; } const generator = activeBrowser.generator; await generator.customAction(action, actionId || '', settings, page); } catch (e) { const { message } = e as Error; logger.log("warn", `Error handling generate action event: ${message}`); } } /** * Handles the date selection event. * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param data - the data of the date selection event {@link DatePickerEventData} * @category BrowserManagement */ const handleDateSelection = async (activeBrowser: RemoteBrowser, page: Page, data: DatePickerEventData) => { try { if (page.isClosed()) { logger.log("debug", `Ignoring date selection event: page is closed`); return; } const generator = activeBrowser.generator; await generator.onDateSelection(page, data); logger.log("debug", `Date ${data.value} selected`); } catch (e) { const { message } = e as Error; logger.log("warn", `Error handling date selection event: ${message}`); } } /** * A wrapper function for handling the date selection event. * @param socket The socket connection * @param data - the data of the date selection event * @category HelperFunctions */ const onDateSelection = async (data: DatePickerEventData, userId: string) => { logger.log('debug', 'Handling date selection event emitted from client'); await handleWrapper(handleDateSelection, userId, data); } /** * Handles the dropdown selection event. * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param data - the data of the dropdown selection event * @category BrowserManagement */ const handleDropdownSelection = async (activeBrowser: RemoteBrowser, page: Page, data: { selector: string, value: string }) => { try { if (page.isClosed()) { logger.log("debug", `Ignoring dropdown selection event: page is closed`); return; } const generator = activeBrowser.generator; await generator.onDropdownSelection(page, data); logger.log("debug", `Dropdown value ${data.value} selected`); } catch (e) { const { message } = e as Error; logger.log("warn", `Error handling dropdown selection event: ${message}`); } } /** * A wrapper function for handling the dropdown selection event. * @param socket The socket connection * @param data - the data of the dropdown selection event * @category HelperFunctions */ const onDropdownSelection = async (data: { selector: string, value: string }, userId: string) => { logger.log('debug', 'Handling dropdown selection event emitted from client'); await handleWrapper(handleDropdownSelection, userId, data); } /** * Handles the time selection event. * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param data - the data of the time selection event * @category BrowserManagement */ const handleTimeSelection = async (activeBrowser: RemoteBrowser, page: Page, data: { selector: string, value: string }) => { try { if (page.isClosed()) { logger.log("debug", `Ignoring time selection event: page is closed`); return; } const generator = activeBrowser.generator; await generator.onTimeSelection(page, data); logger.log("debug", `Time value ${data.value} selected`); } catch (e) { const { message } = e as Error; logger.log("warn", `Error handling time selection event: ${message}`); } } /** * A wrapper function for handling the time selection event. * @param socket The socket connection * @param data - the data of the time selection event * @category HelperFunctions */ const onTimeSelection = async (data: { selector: string, value: string }, userId: string) => { logger.log('debug', 'Handling time selection event emitted from client'); await handleWrapper(handleTimeSelection, userId, data); } /** * Handles the datetime-local selection event. * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param data - the data of the datetime-local selection event * @category BrowserManagement */ const handleDateTimeLocalSelection = async (activeBrowser: RemoteBrowser, page: Page, data: { selector: string, value: string }) => { try { if (page.isClosed()) { logger.log( "debug", `Ignoring datetime-local selection event: page is closed` ); return; } const generator = activeBrowser.generator; await generator.onDateTimeLocalSelection(page, data); logger.log("debug", `DateTime Local value ${data.value} selected`); } catch (e) { const { message } = e as Error; logger.log( "warn", `Error handling datetime-local selection event: ${message}` ); } } /** * A wrapper function for handling the datetime-local selection event. * @param socket The socket connection * @param data - the data of the datetime-local selection event * @category HelperFunctions */ const onDateTimeLocalSelection = async (data: { selector: string, value: string }, userId: string) => { logger.log('debug', 'Handling datetime-local selection event emitted from client'); await handleWrapper(handleDateTimeLocalSelection, userId, data); } /** * A wrapper function for handling the keyup event. * @param socket The socket connection * @param keyboardInput - the keyboard input of the keyup event * @category HelperFunctions */ const onKeyup = async (keyboardInput: KeyboardInput, userId: string) => { logger.log('debug', 'Handling keyup event emitted from client'); await handleWrapper(handleKeyup, userId, keyboardInput); } /** * A keyup event handler. * Reproduces the keyup event on the remote browser instance. * Does not generate any data - keyup is not reflected in the workflow. * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param key - the released key * @category BrowserManagement */ const handleKeyup = async (activeBrowser: RemoteBrowser, page: Page, key: string) => { try { if (page.isClosed()) { logger.log("debug", `Ignoring keyup event: page is closed`); return; } await page.keyboard.up(key); logger.log("debug", `Key ${key} unpressed`); } catch (e) { const { message } = e as Error; logger.log("warn", `Error handling keyup event: ${message}`); } }; /** * A wrapper function for handling the url change event. * @param socket The socket connection * @param url - the new url of the page * @category HelperFunctions */ const onChangeUrl = async (url: string, userId: string) => { logger.log('debug', 'Handling change url event emitted from client'); await handleWrapper(handleChangeUrl, userId, url); } /** * An url change event handler. * Navigates the page to the given url and generates data for the workflow. * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param url - the new url of the page * @category BrowserManagement */ const handleChangeUrl = async (activeBrowser: RemoteBrowser, page: Page, url: string) => { try { if (page.isClosed()) { logger.log("debug", `Ignoring change url event: page is closed`); return; } if (url) { const generator = activeBrowser.generator; await generator.onChangeUrl(url, page); try { await page.goto(url, { waitUntil: "domcontentloaded", timeout: 30000 }); await page.waitForTimeout(500); logger.log("debug", `Went to ${url}`); } catch (e) { const { message } = e as Error; logger.log("error", message); } } else { logger.log("warn", `No url provided`); } } catch (e) { const { message } = e as Error; logger.log("warn", `Error handling change url event: ${message}`); } }; /** * A wrapper function for handling the refresh event. * @param socket The socket connection * @category HelperFunctions */ const onRefresh = async (userId: string) => { logger.log('debug', 'Handling refresh event emitted from client'); await handleWrapper(handleRefresh, userId, undefined); } /** * A refresh event handler. * Refreshes the page. This is not reflected in the workflow. * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @category BrowserManagement */ const handleRefresh = async (activeBrowser: RemoteBrowser, page: Page) => { try { if (page.isClosed()) { logger.log("debug", `Ignoring refresh event: page is closed`); return; } logger.log("debug", "Refreshing page..."); await page.reload({ waitUntil: "domcontentloaded", timeout: 30000, }); // small stabilization delay like changeUrl await page.waitForTimeout(500); logger.log("debug", `Page refreshed successfully.`); } catch (e) { const { message } = e as Error; logger.log("warn", `Error handling refresh event: ${message}`); } }; /** * A wrapper function for handling the go back event. * @param socket The socket connection * @category HelperFunctions */ const onGoBack = async (userId: string) => { logger.log('debug', 'Handling go back event emitted from client'); await handleWrapper(handleGoBack, userId, undefined); } /** * A go back event handler. * Navigates the page back and generates data for the workflow. * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @category BrowserManagement */ const handleGoBack = async (activeBrowser: RemoteBrowser, page: Page) => { try { if (page.isClosed()) { logger.log("debug", `Ignoring go back event: page is closed`); return; } const generator = activeBrowser.generator; await page.goBack({ waitUntil: "commit" }); generator.onGoBack(page.url()); logger.log("debug", "Page went back"); } catch (e) { const { message } = e as Error; logger.log("warn", `Error handling go back event: ${message}`); } }; /** * A wrapper function for handling the go forward event. * @param socket The socket connection * @category HelperFunctions */ const onGoForward = async (userId: string) => { logger.log('debug', 'Handling go forward event emitted from client'); await handleWrapper(handleGoForward, userId, undefined); } /** * A go forward event handler. * Navigates the page forward and generates data for the workflow. * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @category BrowserManagement */ const handleGoForward = async (activeBrowser: RemoteBrowser, page: Page) => { try { if (page.isClosed()) { logger.log("debug", `Ignoring go forward event: page is closed`); return; } const generator = activeBrowser.generator; await page.goForward({ waitUntil: "commit" }); generator.onGoForward(page.url()); logger.log("debug", "Page went forward"); } catch (e) { const { message } = e as Error; logger.log("warn", `Error handling go forward event: ${message}`); } }; /** * Handles the click action event. * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param data - the data of the click action event * @category BrowserManagement */ const handleClickAction = async ( activeBrowser: RemoteBrowser, page: Page, data: { selector: string; url: string; userId: string; elementInfo?: any; coordinates?: { x: number; y: number }; isSPA?: boolean; } ) => { try { if (page.isClosed()) { logger.log("debug", `Ignoring click action event: page is closed`); return; } const { selector, url, elementInfo, coordinates, isSPA = false } = data; if (page.isClosed()) { logger.log("debug", "Page is closed, cannot remove target attribute"); return; } const anchorInfo = await page.evaluate(({ sel }) => { try { const element = document.querySelector(sel); if (element) { if (element.getAttribute('target') === '_blank') { element.removeAttribute('target'); } const parentAnchor = element.closest('a[target="_blank"]') as HTMLAnchorElement; if (parentAnchor) { parentAnchor.removeAttribute('target'); } const anchor = element.tagName === 'A' ? element as HTMLAnchorElement : element.closest('a') as HTMLAnchorElement; if (anchor && anchor.href) { return { hasAnchor: true, href: anchor.href }; } } return { hasAnchor: false, href: null }; } catch (e) { console.error('Error removing target attribute:', e); return { hasAnchor: false, href: null }; } }, { sel: selector }); const currentUrl = page.url(); const isInputElement = elementInfo && (elementInfo.tagName === 'INPUT' || elementInfo.tagName === 'TEXTAREA'); if (isInputElement && coordinates) { try { const elementHandle = await page.$(selector); if (elementHandle) { const boundingBox = await elementHandle.boundingBox(); if (boundingBox) { await page.mouse.click( boundingBox.x + coordinates.x, boundingBox.y + coordinates.y ); } else { await page.click(selector); } } else { await page.click(selector); } } catch (error: any) { logger.log("warn", `Failed to click at coordinates: ${error.message}`); await page.click(selector); } } else { await page.click(selector); } const generator = activeBrowser.generator; await generator.onDOMClickAction(page, data); logger.log("debug", `Click action processed: ${selector}`); if (isInputElement) { logger.log("debug", `Input field click - skipping DOM snapshot for smooth typing`); return; } if (isSPA) { logger.log("debug", `SPA interaction detected for selector: ${selector}`); await new Promise((resolve) => setTimeout(resolve, 1500)); } else { try { await page.waitForNavigation({ timeout: 1500 }); } catch (e) { } let newUrl = page.url(); if (anchorInfo.hasAnchor && anchorInfo.href) { try { const expectedUrl = new URL(anchorInfo.href); const actualUrl = new URL(newUrl); const navigatedToExpectedPage = expectedUrl.origin === actualUrl.origin && expectedUrl.pathname === actualUrl.pathname; if (!navigatedToExpectedPage) { logger.log("debug", `Click did not navigate to expected URL, using page.goto as fallback`); await page.goto(anchorInfo.href, { waitUntil: "domcontentloaded", timeout: 30000 }); newUrl = page.url(); } } catch (urlError: any) { logger.log("debug", `Error comparing URLs: ${urlError.message}`); } } const finalNavigated = newUrl !== currentUrl && !newUrl.endsWith("/#"); if (finalNavigated) { logger.log("debug", `Navigation detected: ${currentUrl} -> ${newUrl}`); await generator.onDOMNavigation(page, { url: newUrl, currentUrl: currentUrl, userId: data.userId, }); } } await new Promise((resolve) => setTimeout(resolve, 300)); } catch (e) { const { message } = e as Error; logger.log( "warn", `Error handling enhanced click action event: ${message}` ); } }; /** * A wrapper function for handling the click action event. * @param socket The socket connection * @param data - the data of the click action event * @category HelperFunctions */ const onDOMClickAction = async ( data: { selector: string; url: string; userId: string; elementInfo?: any; coordinates?: { x: number; y: number }; }, userId: string ) => { logger.log("debug", "Handling click action event emitted from client"); await handleWrapper(handleClickAction, userId, data); }; /** * Handles the keyboard action event. * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param data - the data of the keyboard action event * @category BrowserManagement */ const handleKeyboardAction = async ( activeBrowser: RemoteBrowser, page: Page, data: { selector: string; key: string; url: string; userId: string; inputType?: string; } ) => { try { if (page.isClosed()) { logger.log("debug", `Ignoring keyboard action event: page is closed`); return; } const generator = activeBrowser.generator; await page.press(data.selector, data.key); await generator.onDOMKeyboardAction(page, data); logger.log( "debug", `Keyboard action processed: ${data.key} on ${data.selector}` ); } catch (e) { const { message } = e as Error; logger.log("warn", `Error handling keyboard action event: ${message}`); } }; /** * A wrapper function for handling the keyboard action event. * @param socket The socket connection * @param data - the data of the keyboard action event * @category HelperFunctions */ const onDOMKeyboardAction = async ( data: { selector: string; key: string; url: string; userId: string; inputType?: string; }, userId: string ) => { logger.log("debug", "Handling keyboard action event emitted from client"); await handleWrapper(handleKeyboardAction, userId, data); }; /** * Handles the remove action event. * This is called when a user discards a capture action (list or text) that was already emitted to the backend. * @param activeBrowser - the active remote browser instance * @param page - the active page of the remote browser * @param data - the data containing the actionId to remove * @category BrowserManagement */ const handleRemoveAction = async ( activeBrowser: RemoteBrowser, page: Page, data: { actionId: string } ) => { try { const { actionId } = data; const generator = activeBrowser.generator; const removed = generator.removeAction(actionId); if (removed) { logger.log("info", `Action ${actionId} successfully removed from workflow`); } else { logger.log("debug", `Action ${actionId} not found in workflow`); } } catch (e) { const { message } = e as Error; logger.log("warn", `Error handling remove action event: ${message}`); } }; /** * A wrapper function for handling the remove action event. * @param data - the data containing the actionId to remove * @param userId - the user ID * @category HelperFunctions */ const onRemoveAction = async ( data: { actionId: string }, userId: string ) => { logger.log("debug", "Handling remove action event emitted from client"); await handleWrapper(handleRemoveAction, userId, data); }; /** * Tests pagination by scrolling down and checking if new content loads * @param data Object containing listSelector * @param userId The user ID * @param socket The socket connection to emit results */ const onTestPaginationScroll = async ( data: { listSelector: string }, userId: string, socket: Socket ) => { logger.log("debug", "Testing pagination scroll emitted from client"); const id = browserPool.getActiveBrowserId(userId, "recording"); if (!id) { logger.log("warn", `No active browser for id ${id}`); socket.emit("paginationScrollTestResult", { success: false, error: "No active browser" }); return; } const activeBrowser = browserPool.getRemoteBrowser(id); const currentPage = activeBrowser?.getCurrentPage(); if (!currentPage || !activeBrowser) { logger.log("warn", `No active page for browser ${id}`); socket.emit("paginationScrollTestResult", { success: false, error: "No active page" }); return; } try { const { listSelector } = data; logger.log("info", `Starting pagination scroll test for selector: ${listSelector}`); const initialCount = await currentPage.evaluate((selector) => { function evaluateSelector(sel: string): Element[] { try { const isXPath = sel.startsWith('//') || sel.startsWith('(//'); if (isXPath) { const result = document.evaluate( sel, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ); const elements: Element[] = []; for (let i = 0; i < result.snapshotLength; i++) { const node = result.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { elements.push(node as Element); } } return elements; } else { return Array.from(document.querySelectorAll(sel)); } } catch (err) { console.error('Selector evaluation failed:', sel, err); return []; } } return evaluateSelector(selector).length; }, listSelector); logger.log("info", `Initial list count: ${initialCount}`); const scrollInfo = await currentPage.evaluate(() => { return { scrollY: window.scrollY, scrollHeight: document.documentElement.scrollHeight, viewportHeight: window.innerHeight }; }); logger.log("info", `Scroll info:`, scrollInfo); await currentPage.evaluate(() => { window.scrollTo(0, document.body.scrollHeight); }); logger.log("info", "Scrolled to bottom, waiting for potential content load..."); await currentPage.waitForTimeout(2000); const newCount = await currentPage.evaluate((selector) => { function evaluateSelector(sel: string): Element[] { try { const isXPath = sel.startsWith('//') || sel.startsWith('(//'); if (isXPath) { const result = document.evaluate( sel, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ); const elements: Element[] = []; for (let i = 0; i < result.snapshotLength; i++) { const node = result.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { elements.push(node as Element); } } return elements; } else { return Array.from(document.querySelectorAll(sel)); } } catch (err) { return []; } } return evaluateSelector(selector).length; }, listSelector); logger.log("info", `New list count after scroll: ${newCount}`); await currentPage.evaluate((originalY) => { window.scrollTo(0, originalY); }, scrollInfo.scrollY); const contentLoaded = newCount > initialCount; logger.log("info", `Scroll test result: ${contentLoaded ? 'Content loaded' : 'No new content'}`); socket.emit("paginationScrollTestResult", { success: true, contentLoaded: contentLoaded, initialCount: initialCount, newCount: newCount, itemsAdded: newCount - initialCount }); } catch (error) { const { message } = error as Error; logger.log("error", `Error during pagination scroll test: ${message}`); socket.emit("paginationScrollTestResult", { success: false, error: message }); } }; /** * Helper function for registering the handlers onto established websocket connection. * Registers various input handlers. * * All these handlers first generates the workflow pair data * and then calls the corresponding playwright's function to emulate the input. * They also ignore any user input while interpretation is in progress. * * @param socket websocket with established connection * @returns void * @category BrowserManagement */ const registerInputHandlers = (socket: Socket, userId: string) => { socket.on("input:keyup", (data) => onKeyup(data, userId)); socket.on("input:url", (data) => onChangeUrl(data, userId)); socket.on("input:refresh", () => onRefresh(userId)); socket.on("input:back", () => onGoBack(userId)); socket.on("input:forward", () => onGoForward(userId)); socket.on("input:date", (data) => onDateSelection(data, userId)); socket.on("input:dropdown", (data) => onDropdownSelection(data, userId)); socket.on("input:time", (data) => onTimeSelection(data, userId)); socket.on("input:datetime-local", (data) => onDateTimeLocalSelection(data, userId)); socket.on("action", (data) => onGenerateAction(data, userId)); socket.on("removeAction", (data) => onRemoveAction(data, userId)); socket.on("dom:click", (data) => onDOMClickAction(data, userId)); socket.on("dom:keypress", (data) => onDOMKeyboardAction(data, userId)); socket.on("testPaginationScroll", (data) => onTestPaginationScroll(data, userId, socket)); }; /** * Removes all input handler socket listeners to prevent memory leaks * Must be called when socket disconnects or browser session ends * @param socket websocket with established connection * @returns void * @category BrowserManagement */ const removeInputHandlers = (socket: Socket) => { try { socket.removeAllListeners("input:keyup"); socket.removeAllListeners("input:url"); socket.removeAllListeners("input:refresh"); socket.removeAllListeners("input:back"); socket.removeAllListeners("input:forward"); socket.removeAllListeners("input:date"); socket.removeAllListeners("input:dropdown"); socket.removeAllListeners("input:time"); socket.removeAllListeners("input:datetime-local"); socket.removeAllListeners("action"); socket.removeAllListeners("dom:input"); socket.removeAllListeners("dom:click"); socket.removeAllListeners("dom:keypress"); socket.removeAllListeners("removeAction"); socket.removeAllListeners("testPaginationScroll"); } catch (error: any) { console.warn(`Error removing input handlers: ${error.message}`); } }; export { registerInputHandlers, removeInputHandlers }; ================================================ FILE: server/src/constants/config.ts ================================================ export const SERVER_PORT = process.env.BACKEND_PORT ? Number(process.env.BACKEND_PORT) : 8080 export const DEBUG = process.env.DEBUG === 'true' export const LOGS_PATH = process.env.LOGS_PATH ?? 'server/logs' export const ANALYTICS_ID = 'oss' ================================================ FILE: server/src/db/config/database.js ================================================ const dotenv = require('dotenv'); dotenv.config({ path: './.env' }); // Validate required environment variables const requiredEnvVars = ['DB_USER', 'DB_PASSWORD', 'DB_NAME', 'DB_HOST', 'DB_PORT']; requiredEnvVars.forEach(envVar => { if (!process.env[envVar]) { console.error(`Error: Environment variable ${envVar} is not set.`); process.exit(1); } }); module.exports = { development: { username: process.env.DB_USER, password: process.env.DB_PASSWORD, database: process.env.DB_NAME, host: process.env.DB_HOST, port: process.env.DB_PORT, dialect: 'postgres', logging: console.log, }, test: { username: process.env.DB_USER, password: process.env.DB_PASSWORD, database: process.env.DB_NAME, host: process.env.DB_HOST, port: process.env.DB_PORT, dialect: 'postgres', logging: false, }, production: { username: process.env.DB_USER, password: process.env.DB_PASSWORD, database: process.env.DB_NAME, host: process.env.DB_HOST, port: process.env.DB_PORT, dialect: 'postgres', logging: false, } }; ================================================ FILE: server/src/db/migrate.js ================================================ 'use strict'; import { execSync } from 'child_process'; import path from 'path'; import { fileURLToPath } from 'url'; import db from './models/index.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); async function runMigrations() { try { console.log('Testing database connection...'); await db.sequelize.authenticate(); console.log('Database connection established successfully.'); console.log('Running database migrations...'); execSync('npx sequelize-cli db:migrate', { stdio: 'inherit', cwd: path.resolve(__dirname, '../../..') }); console.log('Migrations completed successfully'); return true; } catch (error) { console.error('Migration error:', error); return false; } } module.exports = runMigrations; ================================================ FILE: server/src/db/migrations/20250327111003-add-airtable-columns.js ================================================ 'use strict'; module.exports = { up: async (queryInterface, Sequelize) => { // Add Airtable related columns return queryInterface.sequelize.transaction(async (transaction) => { try { // Check if columns already exist first to make the migration idempotent const tableInfo = await queryInterface.describeTable('robot', { transaction }); // Add airtable_base_id if it doesn't exist if (!tableInfo.airtable_base_id) { await queryInterface.addColumn('robot', 'airtable_base_id', { type: Sequelize.STRING, allowNull: true }, { transaction }); } // Add airtable_base_name if it doesn't exist if (!tableInfo.airtable_base_name) { await queryInterface.addColumn('robot', 'airtable_base_name', { type: Sequelize.STRING, allowNull: true }, { transaction }); } // Add airtable_table_name if it doesn't exist if (!tableInfo.airtable_table_name) { await queryInterface.addColumn('robot', 'airtable_table_name', { type: Sequelize.STRING, allowNull: true }, { transaction }); } // Add airtable_table_id if it doesn't exist if (!tableInfo.airtable_table_id) { await queryInterface.addColumn('robot', 'airtable_table_id', { type: Sequelize.STRING, allowNull: true }, { transaction }); } // Add airtable_access_token if it doesn't exist if (!tableInfo.airtable_access_token) { await queryInterface.addColumn('robot', 'airtable_access_token', { type: Sequelize.TEXT, // Using TEXT for potentially long tokens allowNull: true }, { transaction }); } // Add airtable_refresh_token if it doesn't exist if (!tableInfo.airtable_refresh_token) { await queryInterface.addColumn('robot', 'airtable_refresh_token', { type: Sequelize.TEXT, // Using TEXT for potentially long tokens allowNull: true }, { transaction }); } return Promise.resolve(); } catch (error) { return Promise.reject(error); } }); }, down: async (queryInterface, Sequelize) => { // Remove Airtable related columns return queryInterface.sequelize.transaction(async (transaction) => { try { // Remove columns in reverse order await queryInterface.removeColumn('robot', 'airtable_refresh_token', { transaction }); await queryInterface.removeColumn('robot', 'airtable_access_token', { transaction }); await queryInterface.removeColumn('robot', 'airtable_table_id', { transaction }); await queryInterface.removeColumn('robot', 'airtable_table_name', { transaction }); await queryInterface.removeColumn('robot', 'airtable_base_name', { transaction }); await queryInterface.removeColumn('robot', 'airtable_base_id', { transaction }); return Promise.resolve(); } catch (error) { return Promise.reject(error); } }); } }; ================================================ FILE: server/src/db/migrations/20250527105655-add-webhooks.js ================================================ 'use strict'; module.exports = { async up(queryInterface, Sequelize) { await queryInterface.addColumn('robot', 'webhooks', { type: Sequelize.JSONB, allowNull: true, defaultValue: null, comment: 'Webhook configurations for the robot' }); // Optional: Add an index for better query performance if you plan to search within webhook data await queryInterface.addIndex('robot', { fields: ['webhooks'], using: 'gin', // GIN index for JSONB columns name: 'robot_webhooks_gin_idx' }); }, async down(queryInterface, Sequelize) { // Remove the index first await queryInterface.removeIndex('robot', 'robot_webhooks_gin_idx'); // Then remove the column await queryInterface.removeColumn('robot', 'webhooks'); } }; ================================================ FILE: server/src/db/models/index.js ================================================ 'use strict'; import fs from 'fs'; import path from 'path'; import { fileURLToPath } from 'url'; import Sequelize from 'sequelize'; import databaseConfig from '../config/database.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const basename = path.basename(__filename); const env = process.env.NODE_ENV || 'development'; const config = databaseConfig[env]; const db = {}; let sequelize; if (config.use_env_variable) { try { sequelize = new Sequelize(process.env[config.use_env_variable], config); console.log(`Connected to database using ${config.use_env_variable}`); } catch (error) { console.error('Unable to connect to the database using environment variable:', error); process.exit(1); } } else { try { sequelize = new Sequelize(config.database, config.username, config.password, config); console.log(`Connected to database: ${config.database}`); } catch (error) { console.error('Unable to connect to the database:', error); process.exit(1); } } fs .readdirSync(__dirname) .filter(file => { return ( file.indexOf('.') !== 0 && file !== basename && file.slice(-3) === '.js' && file.indexOf('.test.js') === -1 ); }) .forEach(file => { const model = require(path.join(__dirname, file))(sequelize, Sequelize.DataTypes); db[model.name] = model; }); Object.keys(db).forEach(modelName => { if (db[modelName].associate) { db[modelName].associate(db); } }); db.sequelize = sequelize; db.Sequelize = Sequelize; module.exports = db; ================================================ FILE: server/src/index.ts ================================================ export * from "./server"; export * from "./logger"; export * from "./types"; export * from "./browser-management/controller"; export * from "./browser-management/inputHandlers"; export * from "./browser-management/classes/RemoteBrowser"; export * from "./browser-management/classes/BrowserPool"; export * from "./socket-connection/connection"; export * from "./workflow-management/selector"; export * from "./workflow-management/storage"; export * from "./workflow-management/utils"; export * from "./workflow-management/classes/Interpreter"; export * from "./workflow-management/classes/Generator"; export * from "./workflow-management/scheduler"; ================================================ FILE: server/src/logger.ts ================================================ import { createLogger, format, transports } from 'winston'; import { DEBUG, LOGS_PATH } from "./constants/config"; const { combine, timestamp, printf } = format; const logger = createLogger({ format: combine( timestamp(), printf(info => `${info.timestamp} ${info.level}: ${info.message}`), ), defaultMeta: { service: 'user-service' }, transports: [ new transports.Console({ level: DEBUG ? 'info' : 'debug' }), new transports.File({ filename: `${LOGS_PATH}/error.log`, level: 'error' }), new transports.File({ filename: `${LOGS_PATH}/combined.log`, level: 'debug' }), ], }); export default logger; ================================================ FILE: server/src/markdownify/markdown.ts ================================================ export async function parseMarkdown( html: string | null | undefined, baseUrl?: string | null ): Promise { const TurndownService = require("turndown"); const { gfm } = require("joplin-turndown-plugin-gfm"); const cheerio = require("cheerio"); const { URL } = require("url"); if (!html) return ""; const tidiedHtml = tidyHtml(html); const t = new TurndownService({ headingStyle: "atx", // ensures #### instead of ------ codeBlockStyle: "fenced", }); // --------------------------------------------- // Proper ATX headings #### instead of underline-style // --------------------------------------------- t.addRule("forceAtxHeadings", { filter: ["h1", "h2", "h3", "h4", "h5", "h6"], replacement: (content: string, node: any) => { const level = Number(node.nodeName.charAt(1)); const clean = content.trim(); return `\n${"#".repeat(level)} ${clean}\n`; }, }); // --------------------------------------------- // Remove SVGs // --------------------------------------------- t.addRule("truncate-svg", { filter: "svg", replacement: () => "", }); // --------------------------------------------- // Improved paragraph cleanup // --------------------------------------------- t.addRule("improved-paragraph", { filter: "p", replacement: (innerText: string) => { const trimmed = innerText.trim(); if (!trimmed) return ""; return `${trimmed.replace(/\n{3,}/g, "\n\n")}\n\n`; }, }); // --------------------------------------------- // Inline link with fallback text // --------------------------------------------- t.addRule("inlineLink", { filter: (node: any, opts: any) => node.nodeName === "A" && node.getAttribute("href"), replacement: (content: string, node: any) => { let text = content.trim(); // Fallback: aria-label → title → domain if (!text) { text = node.getAttribute("aria-label")?.trim() || node.getAttribute("title")?.trim() || getDomainFromUrl(node.getAttribute("href")) || "link"; } let href = node.getAttribute("href").trim(); // relative → absolute if (baseUrl && isRelativeUrl(href)) { try { const u = new URL(href, baseUrl); href = u.toString(); } catch { } } href = cleanUrl(href); return `[${text}](${href})`; }, }); t.use(gfm); // Convert HTML → Markdown try { let out = await t.turndown(tidiedHtml); out = fixBrokenLinks(out); out = stripSkipLinks(out); return out.trim(); } catch (err) { console.error("HTML→Markdown failed", { err }); return ""; } } // ----------------------------------------------------- // Helpers // ----------------------------------------------------- function isRelativeUrl(url: string): boolean { return !url.includes("://") && !url.startsWith("mailto:") && !url.startsWith("tel:"); } function getDomainFromUrl(url: string): string | null { try { const u = new URL(url); return u.hostname.replace("www.", ""); } catch { return null; } } function cleanUrl(u: string): string { return u; } function cleanAttribute(attr: string) { return attr ? attr.replace(/(\n+\s*)+/g, "\n") : ""; } function tidyHtml(html: string): string { const cheerio = require("cheerio"); const $ = cheerio.load(html); const manuallyCleanedElements = [ "script", "style", "iframe", "noscript", "meta", "link", "object", "embed", "canvas", "audio", "video", ]; manuallyCleanedElements.forEach((tag) => $(tag).remove()); return $("body").html(); } function fixBrokenLinks(md: string): string { let depth = 0; let result = ""; for (const ch of md) { if (ch === "[") depth++; if (ch === "]") depth = Math.max(0, depth - 1); result += depth > 0 && ch === "\n" ? "\\\n" : ch; } return result; } function stripSkipLinks(md: string): string { return md.replace(/\[Skip to Content\]\(#[^\)]*\)/gi, ""); } ================================================ FILE: server/src/markdownify/scrape.ts ================================================ import { Page } from "playwright-core"; import { parseMarkdown } from "./markdown"; import logger from "../logger"; async function gotoWithFallback(page: any, url: string) { try { return await page.goto(url, { waitUntil: "networkidle", timeout: 100000, }); } catch (err) { // fallback: JS-heavy or unstable sites return await page.goto(url, { waitUntil: "domcontentloaded", timeout: 100000, }); } } /** * Fetches a webpage, strips scripts/styles/images/etc, * returns clean Markdown using parser. * @param url - The URL to convert * @param page - Existing Playwright page instance to use */ export async function convertPageToMarkdown(url: string, page: Page): Promise { try { logger.log('info', `[Scrape] Using existing page instance for markdown conversion of ${url}`); await gotoWithFallback(page, url); const cleanedHtml = await page.evaluate(() => { const selectors = [ "script", "style", "link[rel='stylesheet']", "noscript", "meta", "svg", "img", "picture", "source", "video", "audio", "iframe", "object", "embed" ]; selectors.forEach(sel => { document.querySelectorAll(sel).forEach(e => e.remove()); }); const all = document.querySelectorAll("*"); all.forEach(el => { [...el.attributes].forEach(attr => { if (attr.name.startsWith("on")) { el.removeAttribute(attr.name); } }); }); return document.documentElement.outerHTML; }); const markdown = await parseMarkdown(cleanedHtml, url); return markdown; } catch (error: any) { logger.error(`[Scrape] Error during markdown conversion: ${error.message}`); throw error; } } /** * Fetches a webpage, strips scripts/styles/images/etc, * returns clean HTML. * @param url - The URL to convert * @param page - Existing Playwright page instance to use */ export async function convertPageToHTML(url: string, page: Page): Promise { try { logger.log('info', `[Scrape] Using existing page instance for HTML conversion of ${url}`); await gotoWithFallback(page, url); const cleanedHtml = await page.evaluate(() => { const selectors = [ "script", "style", "link[rel='stylesheet']", "noscript", "meta", "svg", "img", "picture", "source", "video", "audio", "iframe", "object", "embed" ]; selectors.forEach(sel => { document.querySelectorAll(sel).forEach(e => e.remove()); }); const all = document.querySelectorAll("*"); all.forEach(el => { [...el.attributes].forEach(attr => { if (attr.name.startsWith("on")) { el.removeAttribute(attr.name); } }); }); return document.documentElement.outerHTML; }); return cleanedHtml; } catch (error: any) { logger.error(`[Scrape] Error during HTML conversion: ${error.message}`); throw error; } } /** * Takes a screenshot of the page * @param url - The URL to screenshot * @param page - Existing Playwright page instance to use * @param fullPage - Whether to capture the full scrollable page (true) or just visible viewport (false) */ export async function convertPageToScreenshot(url: string, page: Page, fullPage: boolean = false): Promise { try { const screenshotType = fullPage ? 'full page' : 'visible viewport'; logger.log('info', `[Scrape] Taking ${screenshotType} screenshot of ${url}`); await gotoWithFallback(page, url); const screenshot = await page.screenshot({ type: 'png', fullPage }); return screenshot; } catch (error: any) { logger.error(`[Scrape] Error during screenshot: ${error.message}`); throw error; } } ================================================ FILE: server/src/mcp-worker.ts ================================================ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { z } from "zod"; import fetch from 'node-fetch'; import dotenv from 'dotenv'; dotenv.config(); const log = (message: string) => { if (process.env.NODE_ENV !== 'production') { console.error(`[MCP Worker] ${message}`); } }; class MaxunMCPWorker { private mcpServer: McpServer; private apiKey: string; private apiUrl: string; constructor() { this.apiKey = process.env.MCP_API_KEY || ''; this.apiUrl = process.env.BACKEND_URL || 'http://localhost:8080'; if (!this.apiKey) { throw new Error('MCP_API_KEY environment variable is required'); } this.mcpServer = new McpServer({ name: 'Maxun Web Scraping Server', version: '1.0.0' }); this.setupTools(); } private async makeApiRequest(endpoint: string, options: any = {}) { const url = `${this.apiUrl}${endpoint}`; const headers = { 'Content-Type': 'application/json', 'x-api-key': this.apiKey, ...options.headers }; const response = await fetch(url, { ...options, headers }); if (!response.ok) { throw new Error(`API request failed: ${response.status} ${response.statusText}`); } return await response.json(); } private setupTools() { // Tool: List all robots this.mcpServer.tool( "list_robots", {}, async () => { try { const data = await this.makeApiRequest('/api/robots'); return { content: [{ type: "text", text: `Found ${data.robots.totalCount} robots:\n\n${JSON.stringify(data.robots.items, null, 2)}` }] }; } catch (error: any) { return { content: [{ type: "text", text: `Error fetching robots: ${error.message}` }], isError: true }; } } ); // Tool: Get robot details by ID this.mcpServer.tool( "get_robot", { robot_id: z.string().describe("ID of the robot to get details for") }, async ({ robot_id }: { robot_id: string }) => { try { const data = await this.makeApiRequest(`/api/robots/${robot_id}`); return { content: [{ type: "text", text: `Robot Details:\n\n${JSON.stringify(data.robot, null, 2)}` }] }; } catch (error: any) { return { content: [{ type: "text", text: `Error fetching robot: ${error.message}` }], isError: true }; } } ); // Tool: Run a robot and get results this.mcpServer.tool( "run_robot", { robot_id: z.string().describe("ID of the robot to run"), wait_for_completion: z.boolean().default(true).describe("Whether to wait for the run to complete") }, async ({ robot_id, wait_for_completion }: { robot_id: string; wait_for_completion: boolean }) => { try { const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`, { method: 'POST' }); if (wait_for_completion) { const extractedData = data.run.data; const screenshots = data.run.screenshots; let resultText = `Robot run completed successfully!\n\n`; resultText += `Run ID: ${data.run.runId}\n`; resultText += `Status: ${data.run.status}\n`; resultText += `Started: ${data.run.startedAt}\n`; resultText += `Finished: ${data.run.finishedAt}\n\n`; if (extractedData.textData && extractedData.textData.length > 0) { resultText += `Extracted Text Data (${extractedData.textData.length} items):\n`; resultText += JSON.stringify(extractedData.textData, null, 2) + '\n\n'; } if (extractedData.listData && extractedData.listData.length > 0) { resultText += `Extracted List Data (${extractedData.listData.length} items):\n`; resultText += JSON.stringify(extractedData.listData, null, 2) + '\n\n'; } if (screenshots && screenshots.length > 0) { resultText += `Screenshots captured: ${screenshots.length}\n`; resultText += `Screenshot URLs:\n`; screenshots.forEach((screenshot: any, index: any) => { resultText += `${index + 1}. ${screenshot}\n`; }); } return { content: [{ type: "text", text: resultText }] }; } else { return { content: [{ type: "text", text: `Robot run started! Run ID: ${data.run.runId}\nStatus: ${data.run.status}` }] }; } } catch (error: any) { return { content: [{ type: "text", text: `Error running robot: ${error.message}` }], isError: true }; } } ); // Tool: Get all runs for a robot this.mcpServer.tool( "get_robot_runs", { robot_id: z.string().describe("ID of the robot") }, async ({ robot_id }: { robot_id: string }) => { try { const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`); return { content: [{ type: "text", text: `Robot runs (${data.runs.totalCount} total):\n\n${JSON.stringify(data.runs.items, null, 2)}` }] }; } catch (error: any) { return { content: [{ type: "text", text: `Error fetching runs: ${error.message}` }], isError: true }; } } ); // Tool: Get specific run details this.mcpServer.tool( "get_run_details", { robot_id: z.string().describe("ID of the robot"), run_id: z.string().describe("ID of the specific run") }, async ({ robot_id, run_id }: { robot_id: string; run_id: string }) => { try { const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs/${run_id}`); const run = data.run; let resultText = `Run Details:\n\n`; resultText += `Run ID: ${run.runId}\n`; resultText += `Status: ${run.status}\n`; resultText += `Robot ID: ${run.robotId}\n`; resultText += `Started: ${run.startedAt}\n`; resultText += `Finished: ${run.finishedAt}\n\n`; if (run.data.textData && run.data.textData.length > 0) { resultText += `Extracted Text Data:\n${JSON.stringify(run.data.textData, null, 2)}\n\n`; } if (run.data.listData && run.data.listData.length > 0) { resultText += `Extracted List Data:\n${JSON.stringify(run.data.listData, null, 2)}\n\n`; } if (run.screenshots && run.screenshots.length > 0) { resultText += `Screenshots:\n`; run.screenshots.forEach((screenshot: any, index: any) => { resultText += `${index + 1}. ${screenshot}\n`; }); } return { content: [{ type: "text", text: resultText }] }; } catch (error: any) { return { content: [{ type: "text", text: `Error fetching run details: ${error.message}` }], isError: true }; } } ); // Tool: Get robot performance summary this.mcpServer.tool( "get_robot_summary", { robot_id: z.string().describe("ID of the robot") }, async ({ robot_id }: { robot_id: string }) => { try { const [robotData, runsData] = await Promise.all([ this.makeApiRequest(`/api/robots/${robot_id}`), this.makeApiRequest(`/api/robots/${robot_id}/runs`) ]); const robot = robotData.robot; const runs = runsData.runs.items; const successfulRuns = runs.filter((run: any) => run.status === 'success'); const failedRuns = runs.filter((run: any) => run.status === 'failed'); let totalTextItems = 0; let totalListItems = 0; let totalScreenshots = 0; successfulRuns.forEach((run: any) => { if (run.data.textData) totalTextItems += run.data.textData.length; if (run.data.listData) totalListItems += run.data.listData.length; if (run.screenshots) totalScreenshots += run.screenshots.length; }); const summary = `Robot Performance Summary: Robot Name: ${robot.name} Robot ID: ${robot.id} Created: ${robot.createdAt ? new Date(robot.createdAt).toLocaleString() : 'N/A'} Performance Metrics: - Total Runs: ${runs.length} - Successful Runs: ${successfulRuns.length} - Failed Runs: ${failedRuns.length} - Success Rate: ${runs.length > 0 ? ((successfulRuns.length / runs.length) * 100).toFixed(1) : 0}% Data Extracted: - Total Text Items: ${totalTextItems} - Total List Items: ${totalListItems} - Total Screenshots: ${totalScreenshots} - Total Data Points: ${totalTextItems + totalListItems} Input Parameters: ${JSON.stringify(robot.inputParameters, null, 2)}`; return { content: [{ type: "text", text: summary }] }; } catch (error: any) { return { content: [{ type: "text", text: `Error generating robot summary: ${error.message}` }], isError: true }; } } ); } async start() { try { const transport = new StdioServerTransport(); await this.mcpServer.connect(transport); log('Maxun MCP Worker connected and ready'); } catch (error: any) { log(`Failed to start MCP Worker: ${error.message}`); throw error; } } async stop() { try { await this.mcpServer.close(); log('Maxun MCP Worker stopped'); } catch (error: any) { log(`Error stopping MCP Worker: ${error.message}`); } } } async function main() { try { const worker = new MaxunMCPWorker(); await worker.start(); // Handle graceful shutdown process.on('SIGTERM', async () => { await worker.stop(); process.exit(0); }); process.on('SIGINT', async () => { await worker.stop(); process.exit(0); }); } catch (error) { console.error('Failed to start MCP Worker:', error); process.exit(1); } } // Only start if this is run as a worker or directly if (process.env.MCP_WORKER === 'true' || require.main === module) { main(); } ================================================ FILE: server/src/middlewares/api.ts ================================================ import { Response } from "express"; import User from "../models/User"; import { AuthenticatedRequest } from "../routes/record" export const requireAPIKey = async (req: AuthenticatedRequest, res: Response, next: any) => { const apiKey = req.headers['x-api-key']; if (!apiKey) { return res.status(401).json({ error: "API key is missing" }); } const user = await User.findOne({ where: { api_key: apiKey } }); if (!user) { return res.status(403).json({ error: "Invalid API key" }); } req.user = user; next(); }; ================================================ FILE: server/src/middlewares/auth.ts ================================================ import { Request, Response } from "express"; import { verify, JwtPayload } from "jsonwebtoken"; interface UserRequest extends Request { user?: JwtPayload | string; } export const requireSignIn = (req: UserRequest, res: Response, next: any) => { const token = req.cookies && req.cookies.token ? req.cookies.token : null; if (token === null) return res.sendStatus(401); const secret = process.env.JWT_SECRET; if (!secret) { return res.sendStatus(500); // Internal Server Error if secret is not defined } verify(token, secret, (err: any, user: any) => { if (err) { console.log('JWT verification error:', err); return res.sendStatus(403); } // Normalize payload key if (user.userId && !user.id) { user.id = user.userId; delete user.userId; // temporary: del the old key for clarity } req.user = user; next(); }); }; ================================================ FILE: server/src/models/Robot.ts ================================================ import { Model, DataTypes, Optional } from 'sequelize'; import sequelize from '../storage/db'; import { WhereWhatPair } from 'maxun-core'; interface RobotMeta { name: string; id: string; createdAt: string; pairs: number; updatedAt: string; params: any[]; type?: 'extract' | 'scrape' | 'crawl' | 'search'; url?: string; formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[]; isLLM?: boolean; } interface RobotWorkflow { workflow: WhereWhatPair[]; } interface WebhookConfig { id: string; url: string; events: string[]; active: boolean; createdAt: string; updatedAt: string; lastCalledAt?: string | null; retryAttempts?: number; retryDelay?: number; timeout?: number; } interface RobotAttributes { id: string; userId?: number; recording_meta: RobotMeta; recording: RobotWorkflow; google_sheet_email?: string | null; google_sheet_name?: string | null; google_sheet_id?: string | null; google_access_token?: string | null; google_refresh_token?: string | null; airtable_base_id?: string | null; airtable_base_name?: string | null; airtable_table_name?: string | null; airtable_access_token?: string | null; airtable_refresh_token?: string | null; schedule?: ScheduleConfig | null; airtable_table_id?: string | null; webhooks?: WebhookConfig[] | null; } interface ScheduleConfig { runEvery: number; runEveryUnit: 'MINUTES' | 'HOURS' | 'DAYS' | 'WEEKS' | 'MONTHS'; startFrom: 'SUNDAY' | 'MONDAY' | 'TUESDAY' | 'WEDNESDAY' | 'THURSDAY' | 'FRIDAY' | 'SATURDAY'; atTimeStart?: string; atTimeEnd?: string; timezone: string; lastRunAt?: Date; nextRunAt?: Date; dayOfMonth?: string; cronExpression?: string; } interface RobotCreationAttributes extends Optional { } class Robot extends Model implements RobotAttributes { public id!: string; public userId!: number; public recording_meta!: RobotMeta; public recording!: RobotWorkflow; public google_sheet_email!: string | null; public google_sheet_name!: string | null; public google_sheet_id!: string | null; public google_access_token!: string | null; public google_refresh_token!: string | null; public airtable_base_id!: string | null; public airtable_base_name!: string | null; public airtable_table_name!: string | null; public airtable_access_token!: string | null; public airtable_refresh_token!: string | null; public airtable_table_id!: string | null; public schedule!: ScheduleConfig | null; public webhooks!: WebhookConfig[] | null; } Robot.init( { id: { type: DataTypes.UUID, defaultValue: DataTypes.UUIDV4, primaryKey: true, }, userId: { type: DataTypes.INTEGER, allowNull: false, }, recording_meta: { type: DataTypes.JSONB, allowNull: false, }, recording: { type: DataTypes.JSONB, allowNull: false, }, google_sheet_email: { type: DataTypes.STRING, allowNull: true, }, google_sheet_name: { type: DataTypes.STRING, allowNull: true, }, google_sheet_id: { type: DataTypes.STRING, allowNull: true, }, google_access_token: { type: DataTypes.STRING, allowNull: true, }, google_refresh_token: { type: DataTypes.STRING, allowNull: true, }, airtable_base_id: { type: DataTypes.STRING, allowNull: true, }, airtable_base_name: { type: DataTypes.STRING, allowNull: true, }, airtable_table_name: { type: DataTypes.STRING, allowNull: true, }, airtable_table_id: { type: DataTypes.STRING, allowNull: true, }, airtable_access_token: { type: DataTypes.TEXT, allowNull: true, }, airtable_refresh_token: { type: DataTypes.TEXT, allowNull: true, }, schedule: { type: DataTypes.JSONB, allowNull: true, }, webhooks: { type: DataTypes.JSONB, allowNull: true, defaultValue: null, }, }, { sequelize, tableName: 'robot', timestamps: false, } ); export default Robot; ================================================ FILE: server/src/models/Run.ts ================================================ import { Model, DataTypes, Optional } from 'sequelize'; import sequelize from '../storage/db'; import Robot from './Robot'; interface InterpreterSettings { maxConcurrency: number; maxRepeats: number; debug: boolean; } interface RunAttributes { id: string; status: string; name: string; robotId: string; robotMetaId: string; startedAt: string; finishedAt: string; browserId: string; interpreterSettings: InterpreterSettings; log: string; runId: string; runByUserId?: string; runByScheduleId?: string; runByAPI?: boolean; runBySDK?: boolean; serializableOutput: Record; binaryOutput: Record; retryCount?: number; } interface RunCreationAttributes extends Optional { } class Run extends Model implements RunAttributes { public id!: string; public status!: string; public name!: string; public robotId!: string; public robotMetaId!: string; public startedAt!: string; public finishedAt!: string; public browserId!: string; public interpreterSettings!: InterpreterSettings; public log!: string; public runId!: string; public runByUserId!: string; public runByScheduleId!: string; public runByAPI!: boolean; public runBySDK!: boolean; public serializableOutput!: Record; public binaryOutput!: Record; public retryCount!: number; } Run.init( { id: { type: DataTypes.UUID, defaultValue: DataTypes.UUIDV4, primaryKey: true, }, status: { type: DataTypes.STRING(50), allowNull: false, }, name: { type: DataTypes.STRING(255), allowNull: false, }, robotId: { type: DataTypes.UUID, allowNull: false, references: { model: Robot, key: 'id', }, }, robotMetaId: { type: DataTypes.UUID, allowNull: false, }, startedAt: { type: DataTypes.STRING(255), allowNull: false, }, finishedAt: { type: DataTypes.STRING(255), allowNull: false, }, browserId: { type: DataTypes.UUID, allowNull: false, }, interpreterSettings: { type: DataTypes.JSONB, allowNull: false, }, log: { type: DataTypes.TEXT, allowNull: true, }, runId: { type: DataTypes.UUID, allowNull: false, }, runByUserId: { type: DataTypes.INTEGER, allowNull: true, }, runByScheduleId: { type: DataTypes.UUID, allowNull: true, }, runByAPI: { type: DataTypes.BOOLEAN, allowNull: true, }, runBySDK: { type: DataTypes.BOOLEAN, allowNull: true, }, serializableOutput: { type: DataTypes.JSONB, allowNull: true, }, binaryOutput: { type: DataTypes.JSONB, allowNull: true, defaultValue: {}, }, retryCount: { type: DataTypes.INTEGER, allowNull: true, defaultValue: 0, }, }, { sequelize, tableName: 'run', timestamps: false, } ); export default Run; ================================================ FILE: server/src/models/User.ts ================================================ import { DataTypes, Model, Optional } from 'sequelize'; import sequelize from '../storage/db'; interface UserAttributes { id: number; email: string; password: string; api_key_name?: string | null; api_key?: string | null; api_key_created_at?: Date | null; proxy_url?: string | null; proxy_username?: string | null; proxy_password?: string | null; } interface UserCreationAttributes extends Optional { } class User extends Model implements UserAttributes { public id!: number; public email!: string; public password!: string; public api_key_name!: string | null; public api_key!: string | null; public api_key_created_at!: Date | null; public proxy_url!: string | null; public proxy_username!: string | null; public proxy_password!: string | null; } User.init( { id: { type: DataTypes.INTEGER, autoIncrement: true, primaryKey: true, }, email: { type: DataTypes.STRING, allowNull: false, unique: true, validate: { isEmail: true, }, }, password: { type: DataTypes.STRING, allowNull: false, }, api_key_name: { type: DataTypes.STRING, allowNull: true, defaultValue: 'Maxun API Key', }, api_key: { type: DataTypes.STRING, allowNull: true, }, api_key_created_at: { type: DataTypes.DATE, allowNull: true, }, proxy_url: { type: DataTypes.STRING, allowNull: true, }, proxy_username: { type: DataTypes.STRING, allowNull: true, }, proxy_password: { type: DataTypes.STRING, allowNull: true, }, }, { sequelize, tableName: 'user', } ); export default User; ================================================ FILE: server/src/models/associations.ts ================================================ import Robot from './Robot'; import Run from './Run'; export default function setupAssociations() { Run.belongsTo(Robot, { foreignKey: 'robotId' }); Robot.hasMany(Run, { foreignKey: 'robotId' }); } ================================================ FILE: server/src/pgboss-worker.ts ================================================ /** * Recording worker using PgBoss for asynchronous browser recording operations */ import PgBoss, { Job } from 'pg-boss'; import logger from './logger'; import { initializeRemoteBrowserForRecording, destroyRemoteBrowser, interpretWholeWorkflow, stopRunningInterpretation, } from './browser-management/controller'; import { WorkflowFile } from 'maxun-core'; import Run from './models/Run'; import Robot from './models/Robot'; import { browserPool } from './server'; import { Page } from 'playwright-core'; import { capture } from './utils/analytics'; import { addGoogleSheetUpdateTask, googleSheetUpdateTasks, processGoogleSheetUpdates } from './workflow-management/integrations/gsheet'; import { addAirtableUpdateTask, airtableUpdateTasks, processAirtableUpdates } from './workflow-management/integrations/airtable'; import { io as serverIo } from "./server"; import { sendWebhook } from './routes/webhook'; import { BinaryOutputService } from './storage/mino'; import { convertPageToMarkdown, convertPageToHTML, convertPageToScreenshot } from './markdownify/scrape'; if (!process.env.DB_USER || !process.env.DB_PASSWORD || !process.env.DB_HOST || !process.env.DB_PORT || !process.env.DB_NAME) { throw new Error('Failed to start pgboss worker: one or more required environment variables are missing.'); } const pgBossConnectionString = `postgresql://${process.env.DB_USER}:${encodeURIComponent(process.env.DB_PASSWORD)}@${process.env.DB_HOST}:${process.env.DB_PORT}/${process.env.DB_NAME}`; interface InitializeBrowserData { userId: string; } interface InterpretWorkflow { userId: string; } interface StopInterpretWorkflow { userId: string; } interface DestroyBrowserData { browserId: string; userId: string; } interface ExecuteRunData { userId: string; runId: string; browserId: string; } interface AbortRunData { userId: string; runId: string; } const pgBoss = new PgBoss({ connectionString: pgBossConnectionString, expireInHours: 23, max: 5, }); /** * Extract data safely from a job (single job or job array) */ function extractJobData(job: Job | Job[]): T { if (Array.isArray(job)) { if (job.length === 0) { throw new Error('Empty job array received'); } return job[0].data; } return job.data; } function AddGeneratedFlags(workflow: WorkflowFile) { const copy = JSON.parse(JSON.stringify(workflow)); for (let i = 0; i < workflow.workflow.length; i++) { copy.workflow[i].what.unshift({ action: 'flag', args: ['generated'], }); } return copy; }; function withTimeout(promise: Promise, timeoutMs: number, operation: string): Promise { return Promise.race([ promise, new Promise((_, reject) => setTimeout(() => reject(new Error(`${operation} timed out after ${timeoutMs}ms`)), timeoutMs) ) ]); } async function triggerIntegrationUpdates(runId: string, robotMetaId: string): Promise { try { addGoogleSheetUpdateTask(runId, { robotId: robotMetaId, runId: runId, status: 'pending', retries: 5, }); addAirtableUpdateTask(runId, { robotId: robotMetaId, runId: runId, status: 'pending', retries: 5, }); withTimeout(processAirtableUpdates(), 65000, 'Airtable update') .catch(err => logger.log('error', `Airtable update error: ${err.message}`)); withTimeout(processGoogleSheetUpdates(), 65000, 'Google Sheets update') .catch(err => logger.log('error', `Google Sheets update error: ${err.message}`)); } catch (err: any) { logger.log('error', `Failed to update integrations for run: ${runId}: ${err.message}`); } } /** * Modified processRunExecution function - only add browser reset */ async function processRunExecution(job: Job) { const BROWSER_INIT_TIMEOUT = 30000; const BROWSER_PAGE_TIMEOUT = 15000; const data = job.data; logger.log('info', `Processing run execution job for runId: ${data.runId}, browserId: ${data.browserId}`); try { const run = await Run.findOne({ where: { runId: data.runId } }); if (!run) { logger.log('error', `Run ${data.runId} not found in database`); return { success: false }; } if (run.status === 'aborted' || run.status === 'aborting') { logger.log('info', `Run ${data.runId} has status ${run.status}, skipping execution`); return { success: true }; } if (run.status === 'queued') { logger.log('info', `Run ${data.runId} has status 'queued', skipping stale execution job - processQueuedRuns will handle it`); return { success: true }; } const plainRun = run.toJSON(); const browserId = data.browserId || plainRun.browserId; if (!browserId) { throw new Error(`No browser ID available for run ${data.runId}`); } logger.log('info', `Looking for browser ${browserId} for run ${data.runId}`); let browser = browserPool.getRemoteBrowser(browserId); const browserWaitStart = Date.now(); let lastLogTime = 0; let pollAttempts = 0; const MAX_POLL_ATTEMPTS = 15; while (!browser && (Date.now() - browserWaitStart) < BROWSER_INIT_TIMEOUT && pollAttempts < MAX_POLL_ATTEMPTS) { const currentTime = Date.now(); pollAttempts++; const browserStatus = browserPool.getBrowserStatus(browserId); if (browserStatus === null) { throw new Error(`Browser slot ${browserId} does not exist in pool`); } if (browserStatus === "failed") { throw new Error(`Browser ${browserId} initialization failed`); } if (currentTime - lastLogTime > 10000) { logger.log('info', `Browser ${browserId} not ready yet (status: ${browserStatus}), waiting... (${Math.round((currentTime - browserWaitStart) / 1000)}s elapsed)`); lastLogTime = currentTime; } await new Promise(resolve => setTimeout(resolve, 2000)); browser = browserPool.getRemoteBrowser(browserId); } if (!browser) { const finalStatus = browserPool.getBrowserStatus(browserId); throw new Error(`Browser ${browserId} not found in pool after ${BROWSER_INIT_TIMEOUT/1000}s timeout (final status: ${finalStatus})`); } logger.log('info', `Browser ${browserId} found and ready for execution`); try { const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true }); if (!recording) { throw new Error(`Recording for run ${data.runId} not found`); } let currentPage = browser.getCurrentPage(); const pageWaitStart = Date.now(); let lastPageLogTime = 0; let pageAttempts = 0; const MAX_PAGE_ATTEMPTS = 15; while (!currentPage && (Date.now() - pageWaitStart) < BROWSER_PAGE_TIMEOUT && pageAttempts < MAX_PAGE_ATTEMPTS) { const currentTime = Date.now(); pageAttempts++; if (currentTime - lastPageLogTime > 5000) { logger.log('info', `Page not ready for browser ${browserId}, waiting... (${Math.round((currentTime - pageWaitStart) / 1000)}s elapsed)`); lastPageLogTime = currentTime; } await new Promise(resolve => setTimeout(resolve, 1000)); currentPage = browser.getCurrentPage(); } if (!currentPage) { throw new Error(`No current page available for browser ${browserId} after ${BROWSER_PAGE_TIMEOUT/1000}s timeout`); } if (recording.recording_meta.type === 'scrape') { logger.log('info', `Executing scrape robot for run ${data.runId}`); const formats = recording.recording_meta.formats || ['markdown']; await run.update({ status: 'running', log: `Converting page to ${formats.join(', ')}` }); try { const url = recording.recording_meta.url; if (!url) { throw new Error('No URL specified for markdown robot'); } let markdown = ''; let html = ''; const serializableOutput: any = {}; const binaryOutput: any = {}; const SCRAPE_TIMEOUT = 120000; if (formats.includes('markdown')) { const markdownPromise = convertPageToMarkdown(url, currentPage); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`Markdown conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT); }); markdown = await Promise.race([markdownPromise, timeoutPromise]); serializableOutput.markdown = [{ content: markdown }]; } if (formats.includes('html')) { const htmlPromise = convertPageToHTML(url, currentPage); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`HTML conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT); }); html = await Promise.race([htmlPromise, timeoutPromise]); serializableOutput.html = [{ content: html }]; } if (formats.includes("screenshot-visible")) { const screenshotPromise = convertPageToScreenshot(url, currentPage, false); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`Screenshot conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT); }); const screenshotBuffer = await Promise.race([screenshotPromise, timeoutPromise]); if (!binaryOutput['screenshot-visible']) { binaryOutput['screenshot-visible'] = { data: screenshotBuffer.toString('base64'), mimeType: 'image/png' }; } } if (formats.includes("screenshot-fullpage")) { const screenshotPromise = convertPageToScreenshot(url, currentPage, true); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`Screenshot conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT); }); const screenshotBuffer = await Promise.race([screenshotPromise, timeoutPromise]); if (!binaryOutput['screenshot-fullpage']) { binaryOutput['screenshot-fullpage'] = { data: screenshotBuffer.toString('base64'), mimeType: 'image/png' }; } } // Success update await run.update({ status: 'success', finishedAt: new Date().toLocaleString(), log: `${formats.join(', ').toUpperCase()} conversion completed successfully`, serializableOutput, binaryOutput, }); let uploadedBinaryOutput: Record = {}; if (Object.keys(binaryOutput).length > 0) { const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, binaryOutput); await run.update({ binaryOutput: uploadedBinaryOutput }); } logger.log('info', `Markdown robot execution completed for run ${data.runId}`); // Notify sockets try { const completionData = { runId: data.runId, robotMetaId: plainRun.robotMetaId, robotName: recording.recording_meta.name, status: 'success', finishedAt: new Date().toLocaleString() }; serverIo.of(browserId).emit('run-completed', completionData); serverIo.of('/queued-run').to(`user-${data.userId}`).emit('run-completed', completionData); } catch (socketError: any) { logger.log('warn', `Failed to send run-completed notification for markdown robot run ${data.runId}: ${socketError.message}`); } // Webhooks try { const webhookPayload: any = { runId: data.runId, robotId: plainRun.robotMetaId, robotName: recording.recording_meta.name, status: 'success', finishedAt: new Date().toLocaleString(), }; if (formats.includes('markdown')) webhookPayload.markdown = markdown; if (formats.includes('html')) webhookPayload.html = html; if (uploadedBinaryOutput['screenshot-visible']) webhookPayload.screenshot_visible = uploadedBinaryOutput['screenshot-visible']; if (uploadedBinaryOutput['screenshot-fullpage']) webhookPayload.screenshot_fullpage = uploadedBinaryOutput['screenshot-fullpage']; await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload); logger.log('info', `Webhooks sent successfully for markdown robot run ${data.runId}`); } catch (webhookError: any) { logger.log('warn', `Failed to send webhooks for markdown robot run ${data.runId}: ${webhookError.message}`); } capture("maxun-oss-run-created", { runId: data.runId, user_id: data.userId, status: "success", robot_type: "scrape", formats, source: "manual" }); await destroyRemoteBrowser(browserId, data.userId); return { success: true }; } catch (error: any) { logger.log('error', `${formats.join(', ')} conversion failed for run ${data.runId}: ${error.message}`); await run.update({ status: 'failed', finishedAt: new Date().toLocaleString(), log: `${formats.join(', ').toUpperCase()} conversion failed: ${error.message}`, }); try { const failureData = { runId: data.runId, robotMetaId: plainRun.robotMetaId, robotName: recording.recording_meta.name, status: 'failed', finishedAt: new Date().toLocaleString() }; serverIo.of(browserId).emit('run-completed', failureData); serverIo.of('/queued-run').to(`user-${data.userId}`).emit('run-completed', failureData); } catch (socketError: any) { logger.log('warn', `Failed to send run-failed notification for markdown robot run ${data.runId}: ${socketError.message}`); } capture("maxun-oss-run-created", { runId: data.runId, user_id: data.userId, status: "failed", robot_type: "scrape", formats, source: "manual" }); await destroyRemoteBrowser(browserId, data.userId); throw error; } } const isRunAborted = async (): Promise => { try { const currentRun = await Run.findOne({ where: { runId: data.runId } }); return currentRun ? (currentRun.status === 'aborted' || currentRun.status === 'aborting') : false; } catch (error: any) { logger.log('error', `Error checking if run ${data.runId} is aborted: ${error.message}`); return false; } }; logger.log('info', `Starting workflow execution for run ${data.runId}`); await run.update({ status: 'running', log: 'Workflow execution started' }); try { const startedData = { runId: data.runId, robotMetaId: plainRun.robotMetaId, robotName: recording.recording_meta.name, status: 'running', startedAt: new Date().toLocaleString() }; serverIo.of(browserId).emit('run-started', startedData); serverIo.of('/queued-run').to(`user-${data.userId}`).emit('run-started', startedData); } catch (socketError: any) { logger.log('warn', `Failed to send run-started notification for API run ${plainRun.runId}: ${socketError.message}`); } browser.interpreter.setRunId(data.runId); const INTERPRETATION_TIMEOUT = 600000; const interpretationPromise = browser.interpreter.InterpretRecording( AddGeneratedFlags(recording.recording), currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings, ); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`Workflow interpretation timed out after ${INTERPRETATION_TIMEOUT/1000}s`)), INTERPRETATION_TIMEOUT); }); const interpretationInfo = await Promise.race([interpretationPromise, timeoutPromise]); if (await isRunAborted()) { logger.log('info', `Run ${data.runId} was aborted during execution, not updating status`); try { await browser.interpreter.clearState(); logger.debug(`Cleared interpreter state for aborted run ${data.runId}`); } catch (clearError: any) { logger.warn(`Failed to clear interpreter state on abort: ${clearError.message}`); } await destroyRemoteBrowser(plainRun.browserId, data.userId); return { success: true }; } logger.log('info', `Workflow execution completed for run ${data.runId}`); const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput( run, interpretationInfo.binaryOutput ); const finalRun = await Run.findByPk(run.id); const categorizedOutput = { scrapeSchema: finalRun?.serializableOutput?.scrapeSchema || {}, scrapeList: finalRun?.serializableOutput?.scrapeList || {}, crawl: finalRun?.serializableOutput?.crawl || {}, search: finalRun?.serializableOutput?.search || {} }; if (await isRunAborted()) { logger.log('info', `Run ${data.runId} was aborted while processing results, not updating status`); return { success: true }; } await run.update({ status: 'success', finishedAt: new Date().toLocaleString(), log: interpretationInfo.log.join('\n'), binaryOutput: uploadedBinaryOutput, }); let totalSchemaItemsExtracted = 0; let totalListItemsExtracted = 0; let extractedScreenshotsCount = 0; if (categorizedOutput) { if (categorizedOutput.scrapeSchema) { Object.values(categorizedOutput.scrapeSchema).forEach((schemaResult: any) => { if (Array.isArray(schemaResult)) { totalSchemaItemsExtracted += schemaResult.length; } else if (schemaResult && typeof schemaResult === 'object') { totalSchemaItemsExtracted += 1; } }); } if (categorizedOutput.scrapeList) { Object.values(categorizedOutput.scrapeList).forEach((listResult: any) => { if (Array.isArray(listResult)) { totalListItemsExtracted += listResult.length; } }); } if (run.binaryOutput) { extractedScreenshotsCount = Object.keys(run.binaryOutput).length; } } const totalRowsExtracted = totalSchemaItemsExtracted + totalListItemsExtracted; // Capture metrics capture( 'maxun-oss-run-created', { runId: data.runId, user_id: data.userId, created_at: new Date().toISOString(), status: 'success', totalRowsExtracted, schemaItemsExtracted: totalSchemaItemsExtracted, listItemsExtracted: totalListItemsExtracted, extractedScreenshotsCount, is_llm: (recording.recording_meta as any).isLLM, source: 'manual' } ); try { const completionData = { runId: data.runId, robotMetaId: plainRun.robotMetaId, robotName: recording.recording_meta.name, status: 'success', finishedAt: new Date().toLocaleString() }; serverIo.of(browserId).emit('run-completed', completionData); serverIo.of('/queued-run').to(`user-${data.userId}`).emit('run-completed', completionData); } catch (socketError: any) { logger.log('warn', `Failed to send run-completed notification for API run ${plainRun.runId}: ${socketError.message}`); } const webhookPayload = { robot_id: plainRun.robotMetaId, run_id: data.runId, robot_name: recording.recording_meta.name, status: 'success', started_at: plainRun.startedAt, finished_at: new Date().toLocaleString(), extracted_data: { captured_texts: Object.keys(categorizedOutput.scrapeSchema || {}).length > 0 ? Object.entries(categorizedOutput.scrapeSchema).reduce((acc, [name, value]) => { acc[name] = Array.isArray(value) ? value : [value]; return acc; }, {} as Record) : {}, captured_lists: categorizedOutput.scrapeList, crawl_data: categorizedOutput.crawl, search_data: categorizedOutput.search, captured_texts_count: totalSchemaItemsExtracted, captured_lists_count: totalListItemsExtracted, screenshots_count: extractedScreenshotsCount }, metadata: { browser_id: plainRun.browserId, user_id: data.userId, } }; try { await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload); logger.log('info', `Webhooks sent successfully for completed run ${data.runId}`); } catch (webhookError: any) { logger.log('error', `Failed to send webhooks for run ${data.runId}: ${webhookError.message}`); } await triggerIntegrationUpdates(plainRun.runId, plainRun.robotMetaId); await destroyRemoteBrowser(browserId, data.userId); logger.log('info', `Browser ${browserId} destroyed after successful run ${data.runId}`); return { success: true }; } catch (executionError: any) { logger.log('error', `Run execution failed for run ${data.runId}: ${executionError.message}`); let partialDataExtracted = false; let partialData: any = null; let partialUpdateData: any = { status: 'failed', finishedAt: new Date().toLocaleString(), log: `Failed: ${executionError.message}`, }; try { const hasData = (run.serializableOutput && ((run.serializableOutput.scrapeSchema && run.serializableOutput.scrapeSchema.length > 0) || (run.serializableOutput.scrapeList && run.serializableOutput.scrapeList.length > 0))) || (run.binaryOutput && Object.keys(run.binaryOutput).length > 0); if (hasData) { logger.log('info', `Partial data found in failed run ${data.runId}, triggering integration updates`); await triggerIntegrationUpdates(plainRun.runId, plainRun.robotMetaId); partialDataExtracted = true; } } catch (dataCheckError: any) { logger.log('warn', `Failed to check for partial data in run ${data.runId}: ${dataCheckError.message}`); } await run.update(partialUpdateData); try { const recording = await Robot.findOne({ where: { 'recording_meta.id': run.robotMetaId }, raw: true }); const failureData = { runId: data.runId, robotMetaId: plainRun.robotMetaId, robotName: recording ? recording.recording_meta.name : 'Unknown Robot', status: 'failed', finishedAt: new Date().toLocaleString(), hasPartialData: partialDataExtracted }; serverIo.of(browserId).emit('run-completed', failureData); serverIo.of('/queued-run').to(`user-${data.userId}`).emit('run-completed', failureData); } catch (emitError: any) { logger.log('warn', `Failed to emit failure event: ${emitError.message}`); } const recording = await Robot.findOne({ where: { 'recording_meta.id': run.robotMetaId }, raw: true }); const failedWebhookPayload = { robot_id: plainRun.robotMetaId, run_id: data.runId, robot_name: recording ? recording.recording_meta.name : 'Unknown Robot', status: 'failed', started_at: plainRun.startedAt, finished_at: new Date().toLocaleString(), error: { message: executionError.message, stack: executionError.stack, type: 'ExecutionError', }, partial_data_extracted: partialDataExtracted, extracted_data: partialDataExtracted ? { captured_texts: Object.keys(partialUpdateData.serializableOutput?.scrapeSchema || {}).length > 0 ? Object.entries(partialUpdateData.serializableOutput.scrapeSchema).reduce((acc, [name, value]) => { acc[name] = Array.isArray(value) ? value : [value]; return acc; }, {} as Record) : {}, captured_lists: partialUpdateData.serializableOutput?.scrapeList || {}, captured_texts_count: partialData?.totalSchemaItemsExtracted || 0, captured_lists_count: partialData?.totalListItemsExtracted || 0, screenshots_count: partialData?.extractedScreenshotsCount || 0 } : null, metadata: { browser_id: plainRun.browserId, user_id: data.userId, } }; try { await sendWebhook(plainRun.robotMetaId, 'run_failed', failedWebhookPayload); logger.log('info', `Failure webhooks sent successfully for run ${data.runId}`); } catch (webhookError: any) { logger.log('error', `Failed to send failure webhooks for run ${data.runId}: ${webhookError.message}`); } try { const failureSocketData = { runId: data.runId, robotMetaId: run.robotMetaId, robotName: recording ? recording.recording_meta.name : 'Unknown Robot', status: 'failed', finishedAt: new Date().toLocaleString() }; serverIo.of(run.browserId).emit('run-completed', failureSocketData); serverIo.of('/queued-run').to(`user-${data.userId}`).emit('run-completed', failureSocketData); } catch (socketError: any) { logger.log('warn', `Failed to emit failure event in main catch: ${socketError.message}`); } capture('maxun-oss-run-created', { runId: data.runId, user_id: data.userId, created_at: new Date().toISOString(), status: 'failed', error_message: executionError.message, partial_data_extracted: partialDataExtracted, totalRowsExtracted: partialData?.totalSchemaItemsExtracted + partialData?.totalListItemsExtracted + partialData?.extractedScreenshotsCount || 0, is_llm: (recording?.recording_meta as any)?.isLLM, source: 'manual' }); try { if (browser && browser.interpreter) { await browser.interpreter.clearState(); logger.debug(`Cleared interpreter state for failed run ${data.runId}`); } } catch (clearError: any) { logger.warn(`Failed to clear interpreter state on error: ${clearError.message}`); } await destroyRemoteBrowser(browserId, data.userId); logger.log('info', `Browser ${browserId} destroyed after failed run`); return { success: false, partialDataExtracted }; } } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Failed to process run execution job: ${errorMessage}`); try { const run = await Run.findOne({ where: { runId: data.runId }}); if (run) { await run.update({ status: 'failed', finishedAt: new Date().toLocaleString(), log: `Failed: ${errorMessage}`, }); const recording = await Robot.findOne({ where: { 'recording_meta.id': run.robotMetaId }, raw: true }); const failedWebhookPayload = { robot_id: run.robotMetaId, run_id: data.runId, robot_name: recording ? recording.recording_meta.name : 'Unknown Robot', status: 'failed', started_at: run.startedAt, finished_at: new Date().toLocaleString(), error: { message: errorMessage, }, metadata: { browser_id: run.browserId, user_id: data.userId, } }; try { await sendWebhook(run.robotMetaId, 'run_failed', failedWebhookPayload); logger.log('info', `Failure webhooks sent successfully for run ${data.runId}`); } catch (webhookError: any) { logger.log('error', `Failed to send failure webhooks for run ${data.runId}: ${webhookError.message}`); } try { const failureSocketData = { runId: data.runId, robotMetaId: run.robotMetaId, robotName: recording ? recording.recording_meta.name : 'Unknown Robot', status: 'failed', finishedAt: new Date().toLocaleString() }; serverIo.of(run.browserId).emit('run-completed', failureSocketData); serverIo.of('/queued-run').to(`user-${data.userId}`).emit('run-completed', failureSocketData); } catch (socketError: any) { logger.log('warn', `Failed to emit failure event in main catch: ${socketError.message}`); } } } catch (updateError: any) { logger.log('error', `Failed to update run status: ${updateError.message}`); } return { success: false }; } } async function abortRun(runId: string, userId: string): Promise { try { const run = await Run.findOne({ where: { runId: runId } }); if (!run) { logger.log('warn', `Run ${runId} not found or does not belong to user ${userId}`); return false; } await run.update({ status: 'aborting' }); const plainRun = run.toJSON(); const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true }); const robotName = recording?.recording_meta?.name || 'Unknown Robot'; let browser; try { browser = browserPool.getRemoteBrowser(plainRun.browserId); } catch (browserError) { logger.log('warn', `Could not get browser for run ${runId}: ${browserError}`); browser = null; } if (!browser) { await run.update({ status: 'aborted', finishedAt: new Date().toLocaleString(), log: 'Aborted: Browser not found or already closed' }); try { serverIo.of(plainRun.browserId).emit('run-aborted', { runId, robotName: robotName, status: 'aborted', finishedAt: new Date().toLocaleString() }); } catch (socketError) { logger.log('warn', `Failed to emit run-aborted event: ${socketError}`); } logger.log('warn', `Browser not found for run ${runId}`); return true; } await run.update({ status: 'aborted', finishedAt: new Date().toLocaleString(), log: 'Run aborted by user' }); const hasData = (run.serializableOutput && ((run.serializableOutput.scrapeSchema && run.serializableOutput.scrapeSchema.length > 0) || (run.serializableOutput.scrapeList && run.serializableOutput.scrapeList.length > 0))) || (run.binaryOutput && Object.keys(run.binaryOutput).length > 0); if (hasData) { await triggerIntegrationUpdates(runId, plainRun.robotMetaId); } try { serverIo.of(plainRun.browserId).emit('run-aborted', { runId, robotName: robotName, status: 'aborted', finishedAt: new Date().toLocaleString() }); } catch (socketError) { logger.log('warn', `Failed to emit run-aborted event: ${socketError}`); } try { await new Promise(resolve => setTimeout(resolve, 500)); await destroyRemoteBrowser(plainRun.browserId, userId); logger.log('info', `Browser ${plainRun.browserId} destroyed successfully after abort`); } catch (cleanupError) { logger.log('warn', `Failed to clean up browser for aborted run ${runId}: ${cleanupError}`); } return true; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Failed to abort run ${runId}: ${errorMessage}`); return false; } } // Track registered queues globally for individual queue registration const registeredUserQueues = new Map(); const registeredAbortQueues = new Map(); const workerIntervals: NodeJS.Timeout[] = []; async function registerWorkerForQueue(queueName: string) { if (!registeredUserQueues.has(queueName)) { await pgBoss.work(queueName, async (job: Job | Job[]) => { try { const singleJob = Array.isArray(job) ? job[0] : job; return await processRunExecution(singleJob); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Run execution job failed in ${queueName}: ${errorMessage}`); throw error; } }); registeredUserQueues.set(queueName, true); logger.log('info', `Registered worker for queue: ${queueName}`); } } async function registerAbortWorkerForQueue(queueName: string) { if (!registeredAbortQueues.has(queueName)) { await pgBoss.work(queueName, async (job: Job | Job[]) => { try { const data = extractJobData(job); const { userId, runId } = data; logger.log('info', `Processing abort request for run ${runId} by user ${userId}`); const success = await abortRun(runId, userId); return { success }; } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Abort run job failed in ${queueName}: ${errorMessage}`); throw error; } }); registeredAbortQueues.set(queueName, true); logger.log('info', `Registered abort worker for queue: ${queueName}`); } } async function registerRunExecutionWorker() { try { // Worker for executing runs (Legacy) await pgBoss.work('execute-run', async (job: Job | Job[]) => { try { const singleJob = Array.isArray(job) ? job[0] : job; return await processRunExecution(singleJob); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Run execution job failed: ${errorMessage}`); throw error; } }); const checkForNewUserQueues = async () => { try { const activeQueues = await pgBoss.getQueues(); const userQueues = activeQueues.filter(q => q.name.startsWith('execute-run-user-')); for (const queue of userQueues) { await registerWorkerForQueue(queue.name); } } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Failed to check for new user queues: ${errorMessage}`); } }; await checkForNewUserQueues(); const userQueueInterval = setInterval(async () => { try { await checkForNewUserQueues(); } catch (error: any) { logger.log('error', `Error checking user queues: ${error.message}`); } }, 10000); workerIntervals.push(userQueueInterval); logger.log('info', 'Run execution worker registered successfully'); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Failed to register run execution worker: ${errorMessage}`); } } async function registerAbortRunWorker() { try { const checkForNewAbortQueues = async () => { try { const activeQueues = await pgBoss.getQueues(); const abortQueues = activeQueues.filter(q => q.name.startsWith('abort-run-user-')); for (const queue of abortQueues) { await registerAbortWorkerForQueue(queue.name); } } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Failed to check for new abort queues: ${errorMessage}`); } }; await checkForNewAbortQueues(); const abortQueueInterval = setInterval(async () => { try { await checkForNewAbortQueues(); } catch (error: any) { logger.log('error', `Error checking abort queues: ${error.message}`); } }, 10000); workerIntervals.push(abortQueueInterval); logger.log('info', 'Abort run worker registration system initialized'); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Failed to initialize abort run worker system: ${errorMessage}`); } } /** * Initialize PgBoss and register all workers */ async function startWorkers() { try { logger.log('info', 'Starting PgBoss worker...'); await pgBoss.start(); logger.log('info', 'PgBoss worker started successfully'); // Worker for initializing browser recording await pgBoss.work('initialize-browser-recording', async (job: Job | Job[]) => { try { const data = extractJobData(job); const userId = data.userId; logger.log('info', `Starting browser initialization job for user: ${userId}`); const browserId = initializeRemoteBrowserForRecording(userId); logger.log('info', `Browser recording job completed with browserId: ${browserId}`); return { browserId }; } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Browser recording job failed: ${errorMessage}`); throw error; } }); // Worker for stopping a browser await pgBoss.work('destroy-browser', async (job: Job | Job[]) => { try { const data = extractJobData(job); const { browserId, userId } = data; logger.log('info', `Starting browser destruction job for browser: ${browserId}`); const success = await destroyRemoteBrowser(browserId, userId); logger.log('info', `Browser destruction job completed with result: ${success}`); return { success }; } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Destroy browser job failed: ${errorMessage}`); throw error; } }); // Worker for interpreting workflow await pgBoss.work('interpret-workflow', async (job: Job | Job[]) => { try { const data = extractJobData(job); const userId = data.userId; logger.log('info', 'Starting workflow interpretation job'); await interpretWholeWorkflow(userId); logger.log('info', 'Workflow interpretation job completed'); return { success: true }; } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Interpret workflow job failed: ${errorMessage}`); throw error; } }); // Worker for stopping workflow interpretation await pgBoss.work('stop-interpretation', async (job: Job | Job[]) => { try { const data = extractJobData(job); const userId = data.userId; logger.log('info', 'Starting stop interpretation job'); await stopRunningInterpretation(userId); logger.log('info', 'Stop interpretation job completed'); return { success: true }; } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Stop interpretation job failed: ${errorMessage}`); throw error; } }); // Register the run execution worker await registerRunExecutionWorker(); // Register the abort run worker await registerAbortRunWorker(); logger.log('info', 'All recording workers registered successfully'); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Failed to start PgBoss workers: ${errorMessage}`); process.exit(1); } } pgBoss.on('error', (error) => { logger.log('error', `PgBoss error: ${error.message}`); }); // Handle graceful shutdown process.on('SIGTERM', async () => { logger.log('info', 'SIGTERM received, shutting down PgBoss...'); logger.log('info', `Clearing ${workerIntervals.length} worker intervals...`); workerIntervals.forEach(clearInterval); await pgBoss.stop(); logger.log('info', 'PgBoss stopped, waiting for main process cleanup...'); }); process.on('SIGINT', async () => { logger.log('info', 'SIGINT received, shutting down PgBoss...'); logger.log('info', `Clearing ${workerIntervals.length} worker intervals...`); workerIntervals.forEach(clearInterval); await pgBoss.stop(); logger.log('info', 'PgBoss stopped, waiting for main process cleanup...'); }); export { startWorkers }; ================================================ FILE: server/src/routes/auth.ts ================================================ import { Router, Request, Response } from "express"; import User from "../models/User"; import Robot from "../models/Robot"; import jwt from "jsonwebtoken"; import { hashPassword, comparePassword } from "../utils/auth"; import { requireSignIn } from "../middlewares/auth"; import { genAPIKey } from "../utils/api"; import { google } from "googleapis"; import { capture } from "../utils/analytics"; import crypto from 'crypto'; declare module "express-session" { interface SessionData { code_verifier: string; robotId: string; } } export const router = Router(); interface AuthenticatedRequest extends Request { user?: { id: string }; } router.post("/register", async (req, res) => { try { const { email, password } = req.body; if (!email) { return res.status(400).json({ error: "VALIDATION_ERROR", code: "register.validation.email_required" }); } const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/; if (!emailRegex.test(email)) { return res.status(400).json({ error: "VALIDATION_ERROR", code: "register.validation.invalid_email_format" }); } if (!password || password.length < 6) { return res.status(400).json({ error: "VALIDATION_ERROR", code: "register.validation.password_requirements" }); } let userExist = await User.findOne({ raw: true, where: { email } }); if (userExist) { return res.status(400).json({ error: "USER_EXISTS", code: "register.error.user_exists" }); } const hashedPassword = await hashPassword(password); let user: any; try { user = await User.create({ email, password: hashedPassword }); } catch (error: any) { console.log(`Could not create user - ${error}`); return res.status(500).json({ error: "DATABASE_ERROR", code: "register.error.creation_failed" }); } if (!process.env.JWT_SECRET) { console.log("JWT_SECRET is not defined in the environment"); return res.status(500).json({ error: "SERVER_ERROR", code: "register.error.server_error" }); } const token = jwt.sign({ id: user.id }, process.env.JWT_SECRET as string); user.password = undefined as unknown as string; res.cookie("token", token, { httpOnly: true, }); capture("maxun-oss-user-registered", { email: user.email, userId: user.id, registeredAt: new Date().toISOString(), }); console.log(`User registered`); res.json(user); } catch (error: any) { console.log(`Could not register user - ${error}`); return res.status(500).json({ error: "SERVER_ERROR", code: "register.error.generic" }); } }); router.post("/login", async (req, res) => { try { const { email, password } = req.body; if (!email || !password) { return res.status(400).json({ error: "VALIDATION_ERROR", code: "login.validation.required_fields" }); } if (password.length < 6) { return res.status(400).json({ error: "VALIDATION_ERROR", code: "login.validation.password_length" }); } let user = await User.findOne({ raw: true, where: { email } }); if (!user) { return res.status(404).json({ error: "USER_NOT_FOUND", code: "login.error.user_not_found" }); } const match = await comparePassword(password, user.password); if (!match) { return res.status(401).json({ error: "INVALID_CREDENTIALS", code: "login.error.invalid_credentials" }); } const token = jwt.sign({ id: user?.id }, process.env.JWT_SECRET as string); if (user) { user.password = undefined as unknown as string; } res.cookie("token", token, { httpOnly: true, }); capture("maxun-oss-user-login", { email: user.email, userId: user.id, loggedInAt: new Date().toISOString(), }); res.json(user); } catch (error: any) { console.error(`Login error: ${error.message}`); res.status(500).json({ error: "SERVER_ERROR", code: "login.error.server_error" }); } }); router.get("/logout", async (req, res) => { try { res.clearCookie("token"); return res.status(200).json({ ok: true, message: "Logged out successfully", code: "success" }); } catch (error) { console.error('Logout error:', error); return res.status(500).json({ ok: false, message: "Error during logout", code: "server", error: process.env.NODE_ENV === 'development' ? error : undefined }); } } ); router.get( "/current-user", requireSignIn, async (req: AuthenticatedRequest, res) => { try { if (!req.user) { return res.status(401).json({ ok: false, error: "Unauthorized" }); } const user = await User.findByPk(req.user.id, { attributes: { exclude: ["password"] }, }); if (!user) { return res.status(404).json({ ok: false, error: "User not found" }); } else { return res.status(200).json({ ok: true, user: user }); } } catch (error: any) { console.error("Error in current-user route:", error); return res .status(500) .json({ ok: false, error: `Could not fetch current user: ${error.message}`, }); } } ); router.get( "/user/:id", requireSignIn, async (req: AuthenticatedRequest, res) => { try { if (!req.user || !req.user.id) { return res.status(401).json({ message: "Unauthorized" }); } const userId = req.user.id; const user = await User.findByPk(userId, { attributes: { exclude: ["password"] }, }); if (!user) { return res.status(404).json({ message: "User not found" }); } return res .status(200) .json({ message: "User fetched successfully", user }); } catch (error: any) { return res .status(500) .json({ message: "Error fetching user", error: error.message }); } } ); router.post( "/generate-api-key", requireSignIn, async (req: AuthenticatedRequest, res) => { try { if (!req.user) { return res.status(401).json({ ok: false, error: "Unauthorized" }); } const user = await User.findByPk(req.user.id, { attributes: { exclude: ["password"] }, }); if (!user) { return res.status(404).json({ message: "User not found" }); } if (user.api_key) { return res.status(400).json({ message: "API key already exists" }); } const apiKey = genAPIKey(); const createdAt = new Date(); await user.update({ api_key: apiKey, api_key_created_at: createdAt }) capture("maxun-oss-api-key-created", { user_id: user.id, created_at: new Date().toISOString(), }); return res.status(200).json({ message: "API key generated successfully", api_key: apiKey, api_key_created_at: createdAt, }); } catch (error) { return res .status(500) .json({ message: "Error generating API key", error }); } } ); router.get( "/api-key", requireSignIn, async (req: AuthenticatedRequest, res) => { try { if (!req.user) { return res.status(401).json({ ok: false, error: "Unauthorized", code: "unauthorized" }); } const user = await User.findByPk(req.user.id, { raw: true, attributes: ["api_key", "api_key_created_at"] }); if (!user) { return res.status(404).json({ ok: false, error: "User not found", code: "not_found" }); } return res.status(200).json({ ok: true, message: "API key fetched successfully", api_key: user.api_key || null, api_key_created_at: user.api_key_created_at || null, }); } catch (error) { console.error('API Key fetch error:', error); return res.status(500).json({ ok: false, error: "Error fetching API key", code: "server", }); } } ); router.delete( "/delete-api-key", requireSignIn, async (req: AuthenticatedRequest, res) => { if (!req.user) { return res.status(401).send({ error: "Unauthorized" }); } try { const user = await User.findByPk(req.user.id, { raw: true }); if (!user) { return res.status(404).json({ message: "User not found" }); } if (!user.api_key) { return res.status(404).json({ message: "API Key not found" }); } await User.update({ api_key: null, api_key_created_at: null }, { where: { id: req.user.id } }); capture("maxun-oss-api-key-deleted", { user_id: user.id, deleted_at: new Date().toISOString(), }); return res.status(200).json({ message: "API Key deleted successfully" }); } catch (error: any) { return res .status(500) .json({ message: "Error deleting API key", error: error.message }); } } ); const oauth2Client = new google.auth.OAuth2( process.env.GOOGLE_CLIENT_ID, process.env.GOOGLE_CLIENT_SECRET, process.env.GOOGLE_REDIRECT_URI ); // Step 1: Redirect to Google for authentication router.get("/google", (req, res) => { const { robotId } = req.query; if (!robotId) { return res.status(400).json({ message: "Robot ID is required" }); } const scopes = [ "https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/userinfo.email", "https://www.googleapis.com/auth/drive.readonly", ]; const url = oauth2Client.generateAuthUrl({ access_type: "offline", prompt: "consent", // Ensures you get a refresh token on first login scope: scopes, state: robotId.toString(), }); res.redirect(url); }); // Step 2: Handle Google OAuth callback router.get( "/google/callback", requireSignIn, async (req: AuthenticatedRequest, res) => { const { code, state } = req.query; try { if (!state) { return res.status(400).json({ message: "Robot ID is required" }); } const robotId = state; // Get access and refresh tokens if (typeof code !== "string") { return res.status(400).json({ message: "Invalid code" }); } const { tokens } = await oauth2Client.getToken(code); oauth2Client.setCredentials(tokens); // Get user profile from Google const oauth2 = google.oauth2({ version: "v2", auth: oauth2Client }); const { data: { email }, } = await oauth2.userinfo.get(); if (!email) { return res.status(400).json({ message: "Email not found" }); } if (!req.user) { return res.status(401).send({ error: "Unauthorized" }); } // Get the currently authenticated user (from `requireSignIn`) let user = await User.findOne({ where: { id: req.user.id } }); if (!user) { return res.status(400).json({ message: "User not found" }); } let robot = await Robot.findOne({ where: { "recording_meta.id": robotId }, }); if (!robot) { return res.status(400).json({ message: "Robot not found" }); } robot = await robot.update({ google_sheet_email: email, google_access_token: tokens.access_token, google_refresh_token: tokens.refresh_token, }); capture("maxun-oss-google-sheet-integration-created", { user_id: user.id, robot_id: robot.recording_meta.id, created_at: new Date().toISOString(), }); // List user's Google Sheets from their Google Drive const drive = google.drive({ version: "v3", auth: oauth2Client }); const response = await drive.files.list({ q: "mimeType='application/vnd.google-apps.spreadsheet'", // List only Google Sheets files fields: "files(id, name)", // Retrieve the ID and name of each file }); const files = response.data.files || []; if (files.length === 0) { return res.status(404).json({ message: "No spreadsheets found." }); } // Generate JWT token for session const jwtToken = jwt.sign( { id: user.id }, process.env.JWT_SECRET as string ); res.cookie("token", jwtToken, { httpOnly: true }); // res.json({ // message: 'Google authentication successful', // google_sheet_email: robot.google_sheet_email, // jwtToken, // files // }); res.cookie("robot_auth_status", "success", { httpOnly: false, maxAge: 60000, }); // 1-minute expiration // res.cookie("robot_auth_message", "Robot successfully authenticated", { // httpOnly: false, // maxAge: 60000, // }); res.cookie('robot_auth_robotId', robotId, { httpOnly: false, maxAge: 60000, }); const baseUrl = process.env.PUBLIC_URL || "http://localhost:5173"; const redirectUrl = `${baseUrl}/robots/`; res.redirect(redirectUrl); } catch (error: any) { res.status(500).json({ message: `Google OAuth error: ${error.message}` }); } } ); // Step 3: Get data from Google Sheets router.post( "/gsheets/data", requireSignIn, async (req: AuthenticatedRequest, res) => { const { spreadsheetId, robotId } = req.body; if (!req.user) { return res.status(401).send({ error: "Unauthorized" }); } const user = await User.findByPk(req.user.id, { raw: true }); if (!user) { return res.status(400).json({ message: "User not found" }); } const robot = await Robot.findOne({ where: { "recording_meta.id": robotId }, raw: true, }); if (!robot) { return res.status(400).json({ message: "Robot not found" }); } // Set Google OAuth credentials oauth2Client.setCredentials({ access_token: robot.google_access_token, refresh_token: robot.google_refresh_token, }); const sheets = google.sheets({ version: "v4", auth: oauth2Client }); try { // Fetch data from the spreadsheet (you can let the user choose a specific range too) const sheetData = await sheets.spreadsheets.values.get({ spreadsheetId, range: "Sheet1!A1:D5", // Default range, could be dynamic based on user input }); res.json(sheetData.data); } catch (error: any) { res .status(500) .json({ message: `Error accessing Google Sheets: ${error.message}` }); } } ); // Step 4: Get user's Google Sheets files (new route) router.get("/gsheets/files", requireSignIn, async (req, res) => { try { const robotId = req.query.robotId; const robot = await Robot.findOne({ where: { "recording_meta.id": robotId }, raw: true, }); if (!robot) { return res.status(400).json({ message: "Robot not found" }); } oauth2Client.setCredentials({ access_token: robot.google_access_token, refresh_token: robot.google_refresh_token, }); // List user's Google Sheets files from their Google Drive const drive = google.drive({ version: "v3", auth: oauth2Client }); const response = await drive.files.list({ q: "mimeType='application/vnd.google-apps.spreadsheet'", fields: "files(id, name)", }); const files = response.data.files || []; if (files.length === 0) { return res.status(404).json({ message: "No spreadsheets found." }); } res.json(files); } catch (error: any) { console.log("Error fetching Google Sheets files:", error); res .status(500) .json({ message: `Error retrieving Google Sheets files: ${error.message}`, }); } }); // Step 5: Update robot's google_sheet_id when a Google Sheet is selected router.post("/gsheets/update", requireSignIn, async (req, res) => { const { spreadsheetId, spreadsheetName, robotId } = req.body; if (!spreadsheetId || !robotId) { return res .status(400) .json({ message: "Spreadsheet ID and Robot ID are required" }); } try { let robot = await Robot.findOne({ where: { "recording_meta.id": robotId }, }); if (!robot) { return res.status(404).json({ message: "Robot not found" }); } await robot.update({ google_sheet_id: spreadsheetId, google_sheet_name: spreadsheetName, }); res.json({ message: "Robot updated with selected Google Sheet ID" }); } catch (error: any) { res.status(500).json({ message: `Error updating robot: ${error.message}` }); } }); router.post( "/gsheets/remove", requireSignIn, async (req: AuthenticatedRequest, res) => { const { robotId } = req.body; if (!robotId) { return res.status(400).json({ message: "Robot ID is required" }); } if (!req.user) { return res.status(401).send({ error: "Unauthorized" }); } try { let robot = await Robot.findOne({ where: { "recording_meta.id": robotId }, }); if (!robot) { return res.status(404).json({ message: "Robot not found" }); } await robot.update({ google_sheet_id: null, google_sheet_name: null, google_sheet_email: null, google_access_token: null, google_refresh_token: null, }); capture("maxun-oss-google-sheet-integration-removed", { user_id: req.user.id, robot_id: robotId, deleted_at: new Date().toISOString(), }); res.json({ message: "Google Sheets integration removed successfully" }); } catch (error: any) { res .status(500) .json({ message: `Error removing Google Sheets integration: ${error.message}`, }); } } ); // Airtable OAuth Routes router.get("/airtable", requireSignIn, (req: Request, res) => { const authenticatedReq = req as AuthenticatedRequest; const { robotId } = authenticatedReq.query; if (!robotId) { return res.status(400).json({ message: "Robot ID is required" }); } // Generate PKCE codes const code_verifier = crypto.randomBytes(64).toString('base64url'); const code_challenge = crypto.createHash('sha256') .update(code_verifier) .digest('base64url'); // Store in session authenticatedReq.session.code_verifier = code_verifier; authenticatedReq.session.robotId = robotId.toString(); const params = new URLSearchParams({ client_id: process.env.AIRTABLE_CLIENT_ID!, redirect_uri: process.env.AIRTABLE_REDIRECT_URI!, response_type: 'code', state: robotId.toString(), scope: 'data.records:read data.records:write schema.bases:read schema.bases:write', code_challenge: code_challenge, code_challenge_method: 'S256' }); res.redirect(`https://airtable.com/oauth2/v1/authorize?${params}`); }); router.get("/airtable/callback", requireSignIn, async (req: Request, res) => { const authenticatedReq = req as AuthenticatedRequest; const baseUrl = process.env.PUBLIC_URL || "http://localhost:5173"; try { const { code, state, error } = authenticatedReq.query; if (error) { return res.redirect( `${baseUrl}/robots/${state}/integrate?error=${encodeURIComponent(error.toString())}` ); } if (!code || !state) { return res.status(400).json({ message: "Missing authorization code or state" }); } // Verify session data if (!authenticatedReq.session?.code_verifier || authenticatedReq.session.robotId !== state.toString()) { return res.status(400).json({ message: "Session expired - please restart the OAuth flow" }); } // Exchange code for tokens const tokenResponse = await fetch("https://airtable.com/oauth2/v1/token", { method: "POST", headers: { "Content-Type": "application/x-www-form-urlencoded", }, body: new URLSearchParams({ grant_type: "authorization_code", code: code.toString(), client_id: process.env.AIRTABLE_CLIENT_ID!, redirect_uri: process.env.AIRTABLE_REDIRECT_URI!, code_verifier: authenticatedReq.session.code_verifier }), }); if (!tokenResponse.ok) { const errorData = await tokenResponse.json(); console.error('Token exchange failed:', errorData); return res.redirect( `${baseUrl}/robots/${state}/integrate?error=${encodeURIComponent(errorData.error_description || 'Authentication failed')}` ); } const tokens = await tokenResponse.json(); // Update robot with credentials const robot = await Robot.findOne({ where: { "recording_meta.id": req.session.robotId } }); if (!robot) { return res.status(404).json({ message: "Robot not found" }); } await robot.update({ airtable_access_token: tokens.access_token, airtable_refresh_token: tokens.refresh_token, }); res.cookie("airtable_auth_status", "success", { httpOnly: false, maxAge: 60000, }); // 1-minute expiration // res.cookie("airtable_auth_message", "Robot successfully authenticated", { // httpOnly: false, // maxAge: 60000, // }); res.cookie('robot_auth_robotId', req.session.robotId, { httpOnly: false, maxAge: 60000, }); // Clear session data authenticatedReq.session.destroy((err) => { if (err) console.error('Session cleanup error:', err); }); const redirectUrl = `${baseUrl}/robots/`; res.redirect(redirectUrl); } catch (error: any) { console.error('Airtable callback error:', error); res.redirect( `${baseUrl}/robots/${req.session.robotId}/integrate?error=${encodeURIComponent(error.message)}` ); } }); // Get Airtable bases router.get("/airtable/bases", requireSignIn, async (req: Request, res) => { const authenticatedReq = req as AuthenticatedRequest; try { const { robotId } = authenticatedReq.query; if (!robotId) { return res.status(400).json({ message: "Robot ID is required" }); } const robot = await Robot.findOne({ where: { "recording_meta.id": robotId.toString() }, raw: true, }); if (!robot?.airtable_access_token) { return res.status(400).json({ message: "Robot not authenticated with Airtable" }); } const response = await fetch('https://api.airtable.com/v0/meta/bases', { headers: { 'Authorization': `Bearer ${robot.airtable_access_token}` } }); if (!response.ok) { const errorData = await response.json(); throw new Error(errorData.error.message || 'Failed to fetch bases'); } const data = await response.json(); res.json(data.bases.map((base: any) => ({ id: base.id, name: base.name }))); } catch (error: any) { res.status(500).json({ message: error.message }); } }); // Update robot with selected base router.post("/airtable/update", requireSignIn, async (req: Request, res) => { const authenticatedReq = req as AuthenticatedRequest; const { baseId, robotId, baseName, tableName, tableId } = req.body; if (!baseId || !robotId) { return res.status(400).json({ message: "Base ID and Robot ID are required" }); } try { const robot = await Robot.findOne({ where: { "recording_meta.id": robotId } }); if (!robot) { return res.status(404).json({ message: "Robot not found" }); } await robot.update({ airtable_base_id: baseId, airtable_table_name: tableName, airtable_table_id: tableId, airtable_base_name: baseName, }); capture("maxun-oss-airtable-integration-created", { user_id: authenticatedReq.user?.id, robot_id: robotId, created_at: new Date().toISOString(), }); res.json({ message: "Airtable base updated successfully" }); } catch (error: any) { res.status(500).json({ message: error.message }); } }); // Remove Airtable integration router.post("/airtable/remove", requireSignIn, async (req: Request, res) => { const authenticatedReq = req as AuthenticatedRequest; const { robotId } = authenticatedReq.body; if (!robotId) { return res.status(400).json({ message: "Robot ID is required" }); } try { const robot = await Robot.findOne({ where: { "recording_meta.id": robotId } }); if (!robot) { return res.status(404).json({ message: "Robot not found" }); } await robot.update({ airtable_access_token: null, airtable_refresh_token: null, airtable_base_id: null, airtable_base_name: null, airtable_table_name: null, airtable_table_id: null, }); capture("maxun-oss-airtable-integration-removed", { user_id: authenticatedReq.user?.id, robot_id: robotId, deleted_at: new Date().toISOString(), }); res.json({ message: "Airtable integration removed successfully" }); } catch (error: any) { res.status(500).json({ message: error.message }); } }); // Fetch tables from an Airtable base router.get("/airtable/tables", requireSignIn, async (req: Request, res) => { const authenticatedReq = req as AuthenticatedRequest; try { const { baseId, robotId } = authenticatedReq.query; if (!baseId || !robotId) { return res.status(400).json({ message: "Base ID and Robot ID are required" }); } const robot = await Robot.findOne({ where: { "recording_meta.id": robotId.toString() }, raw: true, }); if (!robot?.airtable_access_token) { return res.status(400).json({ message: "Robot not authenticated with Airtable" }); } const response = await fetch(`https://api.airtable.com/v0/meta/bases/${baseId}/tables`, { headers: { 'Authorization': `Bearer ${robot.airtable_access_token}` } }); if (!response.ok) { const errorData = await response.json(); throw new Error(errorData.error.message || 'Failed to fetch tables'); } const data = await response.json(); res.json(data.tables.map((table: any) => ({ id: table.id, name: table.name, fields: table.fields }))); } catch (error: any) { res.status(500).json({ message: error.message }); } }); ================================================ FILE: server/src/routes/index.ts ================================================ import { router as record } from './record'; import { router as workflow } from './workflow'; import { router as storage } from './storage'; import { router as auth } from './auth'; import { router as proxy } from './proxy'; import { router as webhook } from './webhook'; export { record, workflow, storage, auth, proxy, webhook }; ================================================ FILE: server/src/routes/proxy.ts ================================================ import { Router, Request, Response } from 'express'; import { connectToRemoteBrowser } from '../browser-management/browserConnection'; import User from '../models/User'; import { encrypt, decrypt } from '../utils/auth'; import { requireSignIn } from '../middlewares/auth'; export const router = Router(); interface AuthenticatedRequest extends Request { user?: { id: string }; } router.post('/config', requireSignIn, async (req: Request, res: Response) => { const { server_url, username, password } = req.body; const authenticatedReq = req as AuthenticatedRequest; try { if (!authenticatedReq.user) { return res.status(401).json({ ok: false, error: 'Unauthorized' }); } const user = await User.findByPk(authenticatedReq.user.id, { attributes: { exclude: ['password'] }, }); if (!user) { return res.status(404).json({ message: 'User not found' }); } if (!server_url) { return res.status(400).send('Proxy URL is required'); } const encryptedProxyUrl = encrypt(server_url); let encryptedProxyUsername: string | null = null; let encryptedProxyPassword: string | null = null; if (username && password) { encryptedProxyUsername = encrypt(username); encryptedProxyPassword = encrypt(password); } else if (username && !password) { return res.status(400).send('Proxy password is required when proxy username is provided'); } await user.update({ proxy_url: encryptedProxyUrl, proxy_username: encryptedProxyUsername, proxy_password: encryptedProxyPassword, }); res.status(200).json({ ok: true }); } catch (error: any) { console.log(`Could not save proxy configuration - ${error}`); res.status(500).json({ ok: false, error: 'Could not save proxy configuration' }); } }); router.get('/test', requireSignIn, async (req: Request, res: Response) => { const authenticatedReq = req as AuthenticatedRequest; try { if (!authenticatedReq.user) { return res.status(401).json({ ok: false, error: 'Unauthorized' }); } const user = await User.findByPk(authenticatedReq.user.id, { attributes: ['proxy_url', 'proxy_username', 'proxy_password'], raw: true }); if (!user) { return res.status(404).json({ message: 'User not found' }); } const decryptedProxyUrl = user.proxy_url ? decrypt(user.proxy_url) : null; const decryptedProxyUsername = user.proxy_username ? decrypt(user.proxy_username) : null; const decryptedProxyPassword = user.proxy_password ? decrypt(user.proxy_password) : null; const proxyOptions: any = { server: decryptedProxyUrl, ...(decryptedProxyUsername && decryptedProxyPassword && { username: decryptedProxyUsername, password: decryptedProxyPassword, }), }; const browser = await connectToRemoteBrowser(); const page = await browser.newPage(); await page.goto('https://example.com'); await browser.close(); res.status(200).send({ success: true }); } catch (error) { res.status(500).send({ success: false, error: 'Proxy connection failed' }); } }); router.get('/config', requireSignIn, async (req: Request, res: Response) => { const authenticatedReq = req as AuthenticatedRequest; try { if (!authenticatedReq.user) { return res.status(401).json({ ok: false, error: 'Unauthorized' }); } const user = await User.findByPk(authenticatedReq.user.id, { attributes: ['proxy_url', 'proxy_username', 'proxy_password'], raw: true, }); if (!user) { return res.status(404).json({ message: 'User not found' }); } const maskedProxyUrl = user.proxy_url ? maskProxyUrl(decrypt(user.proxy_url)) : null; const auth = user.proxy_username && user.proxy_password ? true : false; res.status(200).json({ proxy_url: maskedProxyUrl, auth: auth, }); } catch (error) { res.status(500).json({ ok: false, error: 'Could not retrieve proxy configuration' }); } }); router.delete('/config', requireSignIn, async (req: Request, res: Response) => { const authenticatedReq = req as AuthenticatedRequest; if (!authenticatedReq.user) { return res.status(401).json({ ok: false, error: 'Unauthorized' }); } const user = await User.findByPk(authenticatedReq.user.id); if (!user) { return res.status(404).json({ message: 'User not found' }); } await user.update({ proxy_url: null, proxy_username: null, proxy_password: null, }); res.status(200).json({ ok: true }); }); const maskProxyUrl = (url: string) => { const urlWithoutProtocol = url.replace(/^https?:\/\//, '').replace(/^socks5?:\/\//, ''); // Remove protocols const [domain, port] = urlWithoutProtocol.split(':'); const maskedDomain = `${domain.slice(0, 3)}****${domain.slice(-3)}`; // Shows first and last 3 characters if (port) { return `${maskedDomain}:${port}`; } else { return maskedDomain; } }; // TODO: Move this from here export const getDecryptedProxyConfig = async (userId: string) => { const user = await User.findByPk(userId, { raw: true, }); if (!user) { throw new Error('User not found'); } const decryptedProxyUrl = user.proxy_url ? decrypt(user.proxy_url) : null; const decryptedProxyUsername = user.proxy_username ? decrypt(user.proxy_username) : null; const decryptedProxyPassword = user.proxy_password ? decrypt(user.proxy_password) : null; return { proxy_url: decryptedProxyUrl, proxy_username: decryptedProxyUsername, proxy_password: decryptedProxyPassword, }; }; ================================================ FILE: server/src/routes/record.ts ================================================ /** * RESTful API endpoints handling remote browser recording sessions. */ import { Router, Request, Response } from 'express'; import { initializeRemoteBrowserForRecording, interpretWholeWorkflow, stopRunningInterpretation, getRemoteBrowserCurrentUrl, getRemoteBrowserCurrentTabs, getActiveBrowserIdByState, destroyRemoteBrowser, canCreateBrowserInState, } from '../browser-management/controller'; import logger from "../logger"; import { requireSignIn } from '../middlewares/auth'; import { pgBossClient } from '../storage/pgboss'; export const router = Router(); export interface AuthenticatedRequest extends Request { user?: any; } async function waitForJobCompletion(jobId: string, queueName: string, timeout = 15000): Promise { return new Promise((resolve, reject) => { const startTime = Date.now(); const checkJobStatus = async () => { if (Date.now() - startTime > timeout) { return reject(new Error(`Timeout waiting for job ${jobId} to complete`)); } try { const job = await pgBossClient.getJobById(queueName, jobId); if (!job) { return reject(new Error(`Job ${jobId} not found`)); } if (job.state === 'completed') { return resolve(job.output); } if (job.state === 'failed') { return reject(new Error(`Job ${jobId} failed.`)); } setTimeout(checkJobStatus, 200); } catch (error) { reject(error); } }; checkJobStatus(); }); } /** * Logs information about remote browser recording session. */ router.all('/', requireSignIn, (req, res, next) => { logger.log('debug', `The record API was invoked: ${req.url}`) next() // pass control to the next handler }) /** * GET endpoint for starting the remote browser recording session * Waits for job completion */ router.get('/start', requireSignIn, async (req: AuthenticatedRequest, res: Response) => { if (!req.user) { return res.status(401).send('User not authenticated'); } try { await pgBossClient.createQueue('initialize-browser-recording'); const jobId = await pgBossClient.send('initialize-browser-recording', { userId: req.user.id, timestamp: new Date().toISOString() }); if (!jobId) { const browserId = initializeRemoteBrowserForRecording(req.user.id); return res.send(browserId); } logger.log('info', `Queued browser initialization job: ${jobId}, waiting for completion...`); try { const result = await waitForJobCompletion(jobId, 'initialize-browser-recording', 15000); if (result && result.browserId) { return res.send(result.browserId); } else { return res.send(jobId); } } catch (waitError: any) { return res.send(jobId); } } catch (error: any) { logger.log('error', `Failed to queue browser initialization job: ${error.message}`); try { const browserId = initializeRemoteBrowserForRecording(req.user.id); return res.send( browserId ); } catch (directError: any) { logger.log('error', `Direct initialization also failed: ${directError.message}`); return res.status(500).send('Failed to start recording'); } } }); /** * POST endpoint for starting the remote browser recording session accepting browser launch options. * returns session's id */ router.post('/start', requireSignIn, (req: AuthenticatedRequest, res:Response) => { if (!req.user) { return res.status(401).send('User not authenticated'); } const id = initializeRemoteBrowserForRecording(req.user.id); return res.send(id); }); /** * GET endpoint for terminating the remote browser recording session. * returns whether the termination was successful */ router.get('/stop/:browserId', requireSignIn, async (req: AuthenticatedRequest, res) => { if (!req.user) { return res.status(401).send('User not authenticated'); } try { await pgBossClient.createQueue('destroy-browser'); const jobId = await pgBossClient.send('destroy-browser', { browserId: req.params.browserId, userId: req.user.id, timestamp: new Date().toISOString() }); if (!jobId) { await destroyRemoteBrowser(req.params.browserId, req.user.id); return res.send(false); } logger.log('info', `Queued browser destruction job: ${jobId}, waiting for completion...`); try { const result = await waitForJobCompletion(jobId, 'destroy-browser', 15000); if (result) { return res.send(result.success); } else { return res.send(false); } } catch (waitError: any) { return res.send(false); } } catch (error: any) { logger.log('error', `Failed to stop browser: ${error.message}`); return res.status(500).send(false); } }); /** * GET endpoint for getting the id of the active remote browser. */ router.get('/active', requireSignIn, (req: AuthenticatedRequest, res) => { if (!req.user) { return res.status(401).send('User not authenticated'); } const id = getActiveBrowserIdByState(req.user?.id, "recording"); return res.send(id); }); /** * GET endpoint for checking if the user can create a new remote browser. */ router.get('/can-create/:state', requireSignIn, (req: AuthenticatedRequest, res) => { if (!req.user) { return res.status(401).send('User not authenticated'); } const state = req.params.state as "recording" | "run"; const canCreate = canCreateBrowserInState(req.user.id, state); return res.json({ canCreate }); }); /** * GET endpoint for getting the current url of the active remote browser. */ router.get('/active/url', requireSignIn, (req: AuthenticatedRequest, res) => { if (!req.user) { return res.status(401).send('User not authenticated'); } const id = getActiveBrowserIdByState(req.user?.id, "recording"); if (id) { const url = getRemoteBrowserCurrentUrl(id, req.user?.id); return res.send(url); } return res.send(null); }); /** * GET endpoint for getting the current tabs of the active remote browser. */ router.get('/active/tabs', requireSignIn, (req: AuthenticatedRequest, res) => { if (!req.user) { return res.status(401).send('User not authenticated'); } const id = getActiveBrowserIdByState(req.user?.id, "recording"); if (id) { const hosts = getRemoteBrowserCurrentTabs(id, req.user?.id); return res.send(hosts); } return res.send([]); }); /** * GET endpoint for starting an interpretation of the currently generated workflow. */ router.get('/interpret', requireSignIn, async (req: AuthenticatedRequest, res) => { if (!req.user) { return res.status(401).send('User not authenticated'); } try { await pgBossClient.createQueue('interpret-workflow'); const jobId = await pgBossClient.send('interpret-workflow', { userId: req.user.id, timestamp: new Date().toISOString() }); if (!jobId) { await interpretWholeWorkflow(req.user?.id); return res.send('interpretation done'); } logger.log('info', `Queued interpret workflow job: ${jobId}, waiting for completion...`); try { const result = await waitForJobCompletion(jobId, 'interpret-workflow', 1000000); if (result) { return res.send('interpretation done'); } else { return res.send('interpretation failed'); } } catch (waitError: any) { return res.send('interpretation failed'); } } catch (error: any) { logger.log('error', `Failed to stop interpret workflow: ${error.message}`); return res.status(500).send('interpretation failed'); } }); router.get('/interpret/stop', requireSignIn, async (req: AuthenticatedRequest, res) => { if (!req.user) { return res.status(401).send('User not authenticated'); } try { await pgBossClient.createQueue('stop-interpretation'); const jobId = await pgBossClient.send('stop-interpretation', { userId: req.user.id, timestamp: new Date().toISOString() }); if (!jobId) { await stopRunningInterpretation(req.user?.id); return res.send('interpretation stopped'); } logger.log('info', `Queued stop interpret workflow job: ${jobId}, waiting for completion...`); try { const result = await waitForJobCompletion(jobId, 'stop-interpretation', 15000); if (result) { return res.send('interpretation stopped'); } else { return res.send('interpretation failed to stop'); } } catch (waitError: any) { return res.send('interpretation failed to stop'); } } catch (error: any) { logger.log('error', `Failed to stop interpretation: ${error.message}`); return res.status(500).send('interpretation failed to stop'); } }); export default router; ================================================ FILE: server/src/routes/storage.ts ================================================ import { Router } from 'express'; import logger from "../logger"; import { createRemoteBrowserForRun, destroyRemoteBrowser, getActiveBrowserIdByState } from "../browser-management/controller"; import { browserPool } from "../server"; import { v4 as uuid } from "uuid"; import moment from 'moment-timezone'; import cron from 'node-cron'; import { requireSignIn } from '../middlewares/auth'; import Robot from '../models/Robot'; import Run from '../models/Run'; import { AuthenticatedRequest } from './record'; import { computeNextRun } from '../utils/schedule'; import { capture } from "../utils/analytics"; import { encrypt, decrypt } from '../utils/auth'; import { WorkflowFile } from 'maxun-core'; import { cancelScheduledWorkflow, scheduleWorkflow } from '../storage/schedule'; import { pgBossClient } from '../storage/pgboss'; import { WorkflowEnricher } from '../sdk/workflowEnricher'; export const router = Router(); export const processWorkflowActions = async (workflow: any[], checkLimit: boolean = false): Promise => { const processedWorkflow = JSON.parse(JSON.stringify(workflow)); processedWorkflow.forEach((pair: any) => { pair.what.forEach((action: any) => { // Handle limit validation for scrapeList action if (action.action === 'scrapeList' && checkLimit && Array.isArray(action.args) && action.args.length > 0) { const scrapeConfig = action.args[0]; if (scrapeConfig && typeof scrapeConfig === 'object' && 'limit' in scrapeConfig) { if (typeof scrapeConfig.limit === 'number' && scrapeConfig.limit > 5) { scrapeConfig.limit = 5; } } } // Handle decryption for type and press actions if ((action.action === 'type' || action.action === 'press') && Array.isArray(action.args) && action.args.length > 1) { try { const encryptedValue = action.args[1]; if (typeof encryptedValue === 'string') { const decryptedValue = decrypt(encryptedValue); action.args[1] = decryptedValue; } else { logger.log('error', 'Encrypted value is not a string'); action.args[1] = ''; } } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Failed to decrypt input value: ${errorMessage}`); action.args[1] = ''; } } }); }); return processedWorkflow; } /** * Logs information about recordings API. */ router.all('/', requireSignIn, (req, res, next) => { logger.log('debug', `The recordings API was invoked: ${req.url}`) next() // pass control to the next handler }) /** * GET endpoint for getting an array of all stored recordings. */ router.get('/recordings', requireSignIn, async (req, res) => { try { const data = await Robot.findAll(); return res.send(data); } catch (e) { logger.log('info', 'Error while reading robots'); return res.send(null); } }); /** * GET endpoint for getting a recording. */ router.get('/recordings/:id', requireSignIn, async (req, res) => { try { const data = await Robot.findOne({ where: { 'recording_meta.id': req.params.id }, raw: true } ); if (data?.recording?.workflow) { data.recording.workflow = await processWorkflowActions( data.recording.workflow, ); } return res.send(data); } catch (e) { logger.log('info', 'Error while reading robots'); return res.send(null); } }) router.get(('/recordings/:id/runs'), requireSignIn, async (req, res) => { try { const runs = await Run.findAll({ where: { robotMetaId: req.params.id }, raw: true }); const formattedRuns = runs.map(formatRunResponse); const response = { statusCode: 200, messageCode: "success", runs: { totalCount: formattedRuns.length, items: formattedRuns, }, }; res.status(200).json(response); } catch (error) { console.error("Error fetching runs:", error); res.status(500).json({ statusCode: 500, messageCode: "error", message: "Failed to retrieve runs", }); } }) function formatRunResponse(run: any) { const formattedRun = { id: run.id, status: run.status, name: run.name, robotId: run.robotMetaId, // Renaming robotMetaId to robotId startedAt: run.startedAt, finishedAt: run.finishedAt, runId: run.runId, runByUserId: run.runByUserId, runByScheduleId: run.runByScheduleId, runByAPI: run.runByAPI, data: {}, screenshot: null, }; if (run.serializableOutput && run.serializableOutput['item-0']) { formattedRun.data = run.serializableOutput['item-0']; } else if (run.binaryOutput && run.binaryOutput['item-0']) { formattedRun.screenshot = run.binaryOutput['item-0']; } return formattedRun; } interface CredentialInfo { value: string; type: string; } interface Credentials { [key: string]: CredentialInfo; } function handleWorkflowActions(workflow: any[], credentials: Credentials) { return workflow.map(step => { if (!step.what) return step; const newWhat: any[] = []; const processedSelectors = new Set(); for (let i = 0; i < step.what.length; i++) { const action = step.what[i]; if (!action?.action || !action?.args?.[0]) { newWhat.push(action); continue; } const selector = action.args[0]; const credential = credentials[selector]; if (!credential) { newWhat.push(action); continue; } if (action.action === 'click') { newWhat.push(action); if (!processedSelectors.has(selector) && i + 1 < step.what.length && (step.what[i + 1].action === 'type' || step.what[i + 1].action === 'press')) { newWhat.push({ action: 'type', args: [selector, encrypt(credential.value), credential.type] }); newWhat.push({ action: 'waitForLoadState', args: ['networkidle'] }); processedSelectors.add(selector); while (i + 1 < step.what.length && (step.what[i + 1].action === 'type' || step.what[i + 1].action === 'press' || step.what[i + 1].action === 'waitForLoadState')) { i++; } } } else if ((action.action === 'type' || action.action === 'press') && !processedSelectors.has(selector)) { newWhat.push({ action: 'type', args: [selector, encrypt(credential.value), credential.type] }); newWhat.push({ action: 'waitForLoadState', args: ['networkidle'] }); processedSelectors.add(selector); // Skip subsequent type/press/waitForLoadState actions for this selector while (i + 1 < step.what.length && (step.what[i + 1].action === 'type' || step.what[i + 1].action === 'press' || step.what[i + 1].action === 'waitForLoadState')) { i++; } } } return { ...step, what: newWhat }; }); } /** * PUT endpoint to update the name and limit of a robot. */ router.put('/recordings/:id', requireSignIn, async (req: AuthenticatedRequest, res) => { try { const { id } = req.params; const { name, limits, credentials, targetUrl, workflow: incomingWorkflow } = req.body; if (!name && !limits && !credentials && !targetUrl && !incomingWorkflow) { return res.status(400).json({ error: 'Either "name", "limits", "credentials" or "target_url" must be provided.' }); } const robot = await Robot.findOne({ where: { 'recording_meta.id': id } }); if (!robot) { return res.status(404).json({ error: 'Robot not found.' }); } let workflow: any[] = Array.isArray(incomingWorkflow) ? JSON.parse(JSON.stringify(incomingWorkflow)) : (Array.isArray(robot.recording?.workflow) ? [...robot.recording.workflow] : []); if (targetUrl) { if (robot.recording_meta?.type === 'scrape') { workflow = workflow.map((step: any) => { const updatedWhere = step.where?.url && step.where.url !== 'about:blank' ? { ...step.where, url: targetUrl } : step.where; const updatedWhat = (step.what || []).map((action: any) => { if (action.action === 'goto' && action.args?.length) { return { ...action, args: [targetUrl, ...action.args.slice(1)] }; } if (action.action === 'scrape' && action.args?.[0] && typeof action.args[0] === 'object') { return { ...action, args: [{ ...action.args[0], url: targetUrl }, ...action.args.slice(1)] }; } return action; }); return { ...step, where: updatedWhere, what: updatedWhat }; }); } else { const entryStep = [...workflow].reverse().find((s: any) => s.where?.url === 'about:blank'); const originalEntryUrl: string | null = entryStep?.what?.find( (action: any) => action.action === 'goto' && action.args?.length )?.args?.[0] ?? null; let gotoUpdated = false; let whereUpdateStopped = false; workflow = [...workflow].reverse().map((step: any) => { let updatedWhere = step.where; if (originalEntryUrl && step.where?.url !== 'about:blank' && !whereUpdateStopped) { if (step.where?.url === originalEntryUrl) { updatedWhere = { ...step.where, url: targetUrl }; } else { whereUpdateStopped = true; } } const updatedWhat = (step.what || []).map((action: any) => { if (!gotoUpdated && action.action === 'goto' && action.args?.[0] === originalEntryUrl) { gotoUpdated = true; return { ...action, args: [targetUrl, ...action.args.slice(1)] }; } return action; }); return { ...step, where: updatedWhere, what: updatedWhat }; }).reverse(); } } if (credentials) { workflow = handleWorkflowActions(workflow, credentials); } if (limits && Array.isArray(limits) && limits.length > 0) { for (const limitInfo of limits) { const { pairIndex, actionIndex, argIndex, limit } = limitInfo; const pair = workflow[pairIndex]; if (!pair || !pair.what) continue; const action = pair.what[actionIndex]; if (!action || !action.args) continue; const arg = action.args[argIndex]; if (!arg || typeof arg !== 'object') continue; (arg as { limit: number }).limit = limit; } } let updatedMeta = { ...robot.recording_meta }; if (name) updatedMeta.name = name; if (targetUrl) updatedMeta.url = targetUrl; const updates: any = { recording: { ...robot.recording, workflow }, recording_meta: updatedMeta, }; await Robot.update(updates, { where: { 'recording_meta.id': id } }); logger.log('info', `Robot with ID ${id} was updated successfully.`); return res.status(200).json({ message: 'Robot updated successfully', robot }); } catch (error) { // Safely handle the error type if (error instanceof Error) { logger.log('error', `Error updating robot with ID ${req.params.id}: ${error.message}`); return res.status(500).json({ error: error.message }); } else { logger.log('error', `Unknown error updating robot with ID ${req.params.id}`); return res.status(500).json({ error: 'An unknown error occurred.' }); } } }); /** * POST endpoint for creating a markdown robot */ router.post('/recordings/scrape', requireSignIn, async (req: AuthenticatedRequest, res) => { try { const { url, name, formats } = req.body; if (!url) { return res.status(400).json({ error: 'The "url" field is required.' }); } if (!req.user) { return res.status(401).send({ error: 'Unauthorized' }); } // Validate URL format try { new URL(url); } catch (err) { return res.status(400).json({ error: 'Invalid URL format' }); } // Validate format const validFormats = ['markdown', 'html', 'screenshot-visible', 'screenshot-fullpage']; if (!Array.isArray(formats) || formats.length === 0) { return res.status(400).json({ error: 'At least one output format must be selected.' }); } const invalid = formats.filter(f => !validFormats.includes(f)); if (invalid.length > 0) { return res.status(400).json({ error: `Invalid formats: ${invalid.join(', ')}` }); } const robotName = name || `Markdown Robot - ${new URL(url).hostname}`; const currentTimestamp = new Date().toLocaleString(); const robotId = uuid(); const newRobot = await Robot.create({ id: uuid(), userId: req.user.id, recording_meta: { name: robotName, id: robotId, createdAt: currentTimestamp, updatedAt: currentTimestamp, pairs: 0, params: [], type: 'scrape', url: url, formats: formats, }, recording: { workflow: [] }, google_sheet_email: null, google_sheet_name: null, google_sheet_id: null, google_access_token: null, google_refresh_token: null, schedule: null, }); logger.log('info', `Markdown robot created with id: ${newRobot.id}`); capture( 'maxun-oss-robot-created', { robot_meta: newRobot.recording_meta, recording: newRobot.recording, } ) return res.status(201).json({ message: 'Markdown robot created successfully.', robot: newRobot, }); } catch (error) { if (error instanceof Error) { logger.log('error', `Error creating markdown robot: ${error.message}`); return res.status(500).json({ error: error.message }); } else { logger.log('error', 'Unknown error creating markdown robot'); return res.status(500).json({ error: 'An unknown error occurred.' }); } } }); /** * POST endpoint for creating an LLM-powered extraction robot * URL is optional - if not provided, the system will search for the target website based on the prompt */ router.post('/recordings/llm', requireSignIn, async (req: AuthenticatedRequest, res) => { try { const { url, prompt, llmProvider, llmModel, llmApiKey, llmBaseUrl, robotName } = req.body; if (!prompt) { return res.status(400).json({ error: 'The "prompt" field is required.' }); } if (!req.user) { return res.status(401).send({ error: 'Unauthorized' }); } // Validate URL format if provided if (url) { try { new URL(url); } catch (err) { return res.status(400).json({ error: 'Invalid URL format' }); } } let workflowResult: any; let finalUrl: string; const llmConfig = { provider: llmProvider || 'ollama', model: llmModel, apiKey: llmApiKey, baseUrl: llmBaseUrl }; if (url) { logger.log('info', `Starting LLM workflow generation for provided URL: ${url}`); workflowResult = await WorkflowEnricher.generateWorkflowFromPrompt(url, prompt, req.user.id, llmConfig); finalUrl = workflowResult.url || url; } else { logger.log('info', `Starting LLM workflow generation with automatic URL detection for prompt: "${prompt}"`); workflowResult = await WorkflowEnricher.generateWorkflowFromPromptWithSearch(prompt, req.user.id, llmConfig); finalUrl = workflowResult.url || ''; if (finalUrl) { logger.log('info', `Auto-detected URL: ${finalUrl}`); } } if (!workflowResult.success || !workflowResult.workflow) { logger.log('error', `Failed to generate workflow: ${JSON.stringify(workflowResult.errors)}`); return res.status(400).json({ error: 'Failed to generate workflow from prompt', details: workflowResult.errors }); } const robotId = uuid(); const currentTimestamp = new Date().toISOString(); const finalRobotName = robotName || `LLM Extract: ${prompt.substring(0, 50)}`; const newRobot = await Robot.create({ id: uuid(), userId: req.user.id, recording_meta: { name: finalRobotName, id: robotId, createdAt: currentTimestamp, updatedAt: currentTimestamp, pairs: workflowResult.workflow.length, params: [], type: 'extract', url: finalUrl, isLLM: true, }, recording: { workflow: workflowResult.workflow }, google_sheet_email: null, google_sheet_name: null, google_sheet_id: null, google_access_token: null, google_refresh_token: null, schedule: null, }); logger.log('info', `LLM robot created with id: ${newRobot.id}`); capture('maxun-oss-llm-robot-created', { robot_meta: newRobot.recording_meta, recording: newRobot.recording, llm_provider: llmProvider || 'ollama', prompt: prompt, urlAutoDetected: !url, }); return res.status(201).json({ message: 'LLM robot created successfully.', robot: newRobot, }); } catch (error) { if (error instanceof Error) { logger.log('error', `Error creating LLM robot: ${error.message}`); return res.status(500).json({ error: error.message }); } else { logger.log('error', 'Unknown error creating LLM robot'); return res.status(500).json({ error: 'An unknown error occurred.' }); } } }); /** * DELETE endpoint for deleting a recording from the storage. */ router.delete('/recordings/:id', requireSignIn, async (req: AuthenticatedRequest, res) => { if (!req.user) { return res.status(401).send({ error: 'Unauthorized' }); } try { await Robot.destroy({ where: { 'recording_meta.id': req.params.id } }); capture( 'maxun-oss-robot-deleted', { robotId: req.params.id, user_id: req.user?.id, deleted_at: new Date().toISOString(), } ) return res.send(true); } catch (e) { const { message } = e as Error; logger.log('info', `Error while deleting a recording with name: ${req.params.fileName}.json`); return res.send(false); } }); /** * POST endpoint to duplicate a robot with a new target URL. */ router.post('/recordings/:id/duplicate', requireSignIn, async (req: AuthenticatedRequest, res) => { try { const { id } = req.params; const { targetUrl } = req.body; if (!targetUrl) { return res.status(400).json({ error: 'The "targetUrl" field is required.' }); } try { const parsed = new URL(targetUrl); if (!['http:', 'https:'].includes(parsed.protocol)) { return res.status(400).json({ error: 'The "targetUrl" must use http or https protocol.' }); } } catch { return res.status(400).json({ error: 'The "targetUrl" must be a valid URL.' }); } const originalRobot = await Robot.findOne({ where: { 'recording_meta.id': id }, }); if (!originalRobot) { return res.status(404).json({ error: 'Original robot not found.' }); } const lastWord = targetUrl.split('/').filter(Boolean).pop() || 'Unnamed'; const steps: any[] = originalRobot.recording.workflow; const entryStep = steps.findLast((step: any) => step.where?.url === 'about:blank'); const originalEntryUrl: string | null = entryStep?.what?.find( (action: any) => action.action === 'goto' && action.args?.length )?.args?.[0] ?? null; let gotoUpdated = false; let whereUpdateStopped = false; const workflow = [...steps].reverse().map((step: any) => { let updatedWhere = step.where; if (originalEntryUrl && step.where?.url !== 'about:blank' && !whereUpdateStopped) { if (step.where?.url === originalEntryUrl) { updatedWhere = { ...step.where, url: targetUrl }; } else { whereUpdateStopped = true; } } const updatedWhat = step.what.map((action: any) => { if (!gotoUpdated && action.action === 'goto' && action.args?.[0] === originalEntryUrl) { gotoUpdated = true; return { ...action, args: [targetUrl, ...action.args.slice(1)] }; } return action; }); return { ...step, where: updatedWhere, what: updatedWhat }; }).reverse(); const currentTimestamp = new Date().toLocaleString(); const newRobot = await Robot.create({ id: uuid(), userId: originalRobot.userId, recording_meta: { ...originalRobot.recording_meta, id: uuid(), name: `${originalRobot.recording_meta.name} (${lastWord})`, url: targetUrl, createdAt: currentTimestamp, updatedAt: currentTimestamp, }, recording: { ...originalRobot.recording, workflow }, google_sheet_email: null, google_sheet_name: null, google_sheet_id: null, google_access_token: null, google_refresh_token: null, airtable_base_id: null, airtable_base_name: null, airtable_table_name: null, airtable_table_id: null, airtable_access_token: null, airtable_refresh_token: null, webhooks: null, schedule: null, }); logger.log('info', `Robot with ID ${id} duplicated successfully as ${newRobot.id}.`); return res.status(201).json({ message: 'Robot duplicated and target URL updated successfully.', robot: newRobot, }); } catch (error) { if (error instanceof Error) { logger.log('error', `Error duplicating robot with ID ${req.params.id}: ${error.message}`); return res.status(500).json({ error: error.message }); } else { logger.log('error', `Unknown error duplicating robot with ID ${req.params.id}`); return res.status(500).json({ error: 'An unknown error occurred.' }); } } }); /** * GET endpoint for getting an array of runs from the storage. */ router.get('/runs', requireSignIn, async (req, res) => { try { const data = await Run.findAll(); return res.send(data); } catch (e) { logger.log('info', 'Error while reading runs'); return res.send(null); } }); /** * DELETE endpoint for deleting a run from the storage. */ router.delete('/runs/:id', requireSignIn, async (req: AuthenticatedRequest, res) => { if (!req.user) { return res.status(401).send({ error: 'Unauthorized' }); } try { await Run.destroy({ where: { runId: req.params.id } }); capture( 'maxun-oss-run-deleted', { runId: req.params.id, user_id: req.user?.id, deleted_at: new Date().toISOString(), } ) return res.send(true); } catch (e) { const { message } = e as Error; logger.log('info', `Error while deleting a run with name: ${req.params.fileName}.json`); return res.send(false); } }); /** * PUT endpoint for starting a remote browser instance and saving run metadata to the storage. * Making it ready for interpretation and returning a runId. * * If the user has reached their browser limit, the run will be queued using pgBossClient. */ router.put('/runs/:id', requireSignIn, async (req: AuthenticatedRequest, res) => { try { const recording = await Robot.findOne({ where: { 'recording_meta.id': req.params.id }, raw: true }); if (!recording || !recording.recording_meta || !recording.recording_meta.id) { return res.status(404).send({ error: 'Recording not found' }); } if (!req.user) { return res.status(401).send({ error: 'Unauthorized' }); } // Generate runId first const runId = uuid(); const canCreateBrowser = await browserPool.hasAvailableBrowserSlots(req.user.id, "run"); if (canCreateBrowser) { let browserId: string; try { browserId = await createRemoteBrowserForRun(req.user.id); if (!browserId || browserId.trim() === '') { throw new Error('Failed to generate valid browser ID'); } logger.log('info', `Created browser ${browserId} for run ${runId}`); } catch (browserError: any) { logger.log('error', `Failed to create browser: ${browserError.message}`); return res.status(500).send({ error: 'Failed to create browser instance' }); } try { await Run.create({ status: 'running', name: recording.recording_meta.name, robotId: recording.id, robotMetaId: recording.recording_meta.id, startedAt: new Date().toLocaleString(), finishedAt: '', browserId: browserId, interpreterSettings: req.body, log: '', runId, runByUserId: req.user.id, serializableOutput: {}, binaryOutput: {}, }); logger.log('info', `Created run ${runId} with browser ${browserId}`); } catch (dbError: any) { logger.log('error', `Database error creating run: ${dbError.message}`); try { await destroyRemoteBrowser(browserId, req.user.id); } catch (cleanupError: any) { logger.log('warn', `Failed to cleanup browser after run creation failure: ${cleanupError.message}`); } return res.status(500).send({ error: 'Failed to create run record' }); } try { const userQueueName = `execute-run-user-${req.user.id}`; await pgBossClient.createQueue(userQueueName); const jobId = await pgBossClient.send(userQueueName, { userId: req.user.id, runId: runId, browserId: browserId, }); logger.log('info', `Queued run execution job with ID: ${jobId} for run: ${runId}`); } catch (queueError: any) { logger.log('error', `Failed to queue run execution: ${queueError.message}`); try { await Run.update({ status: 'failed', finishedAt: new Date().toLocaleString(), log: 'Failed to queue execution job' }, { where: { runId: runId } }); await destroyRemoteBrowser(browserId, req.user.id); } catch (cleanupError: any) { logger.log('warn', `Failed to cleanup after queue error: ${cleanupError.message}`); } return res.status(503).send({ error: 'Unable to queue run, please try again later' }); } return res.send({ browserId: browserId, runId: runId, robotMetaId: recording.recording_meta.id, queued: false }); } else { const browserId = uuid(); await Run.create({ status: 'queued', name: recording.recording_meta.name, robotId: recording.id, robotMetaId: recording.recording_meta.id, startedAt: new Date().toLocaleString(), finishedAt: '', browserId, interpreterSettings: req.body, log: 'Run queued - waiting for available browser slot', runId, runByUserId: req.user.id, serializableOutput: {}, binaryOutput: {}, }); return res.send({ browserId: browserId, runId: runId, robotMetaId: recording.recording_meta.id, queued: true }); } } catch (e) { const { message } = e as Error; logger.log('error', `Error while creating a run with robot id: ${req.params.id} - ${message}`); return res.status(500).send({ error: 'Internal server error' }); } }); /** * GET endpoint for getting a run from the storage. */ router.get('/runs/run/:id', requireSignIn, async (req, res) => { try { const run = await Run.findOne({ where: { runId: req.params.runId }, raw: true }); if (!run) { return res.status(404).send(null); } return res.send(run); } catch (e) { const { message } = e as Error; logger.log('error', `Error ${message} while reading a run with id: ${req.params.id}.json`); return res.send(null); } }); function AddGeneratedFlags(workflow: WorkflowFile) { const copy = JSON.parse(JSON.stringify(workflow)); for (let i = 0; i < workflow.workflow.length; i++) { copy.workflow[i].what.unshift({ action: 'flag', args: ['generated'], }); } return copy; }; /** * PUT endpoint for finishing a run and saving it to the storage. */ router.post('/runs/run/:id', requireSignIn, async (req: AuthenticatedRequest, res) => { try { if (!req.user) { return res.status(401).send({ error: 'Unauthorized' }); } const run = await Run.findOne({ where: { runId: req.params.id } }); if (!run) { return res.status(404).send(false); } const plainRun = run.toJSON(); const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true }); if (!recording) { return res.status(404).send(false); } try { const userQueueName = `execute-run-user-${req.user.id}`; // Queue the execution job await pgBossClient.createQueue(userQueueName); const jobId = await pgBossClient.send(userQueueName, { userId: req.user.id, runId: req.params.id, browserId: plainRun.browserId }); logger.log('info', `Queued run execution job with ID: ${jobId} for run: ${req.params.id}`); } catch (queueError: any) { logger.log('error', `Failed to queue run execution`); } } catch (e) { const { message } = e as Error; // If error occurs, set run status to failed const run = await Run.findOne({ where: { runId: req.params.id } }); if (run) { await run.update({ status: 'failed', finishedAt: new Date().toLocaleString(), }); } logger.log('info', `Error while running a robot with id: ${req.params.id} - ${message}`); capture( 'maxun-oss-run-created', { runId: req.params.id, user_id: req.user?.id, created_at: new Date().toISOString(), status: 'failed', error_message: message, source: 'manual' } ); return res.send(false); } }); router.put('/schedule/:id/', requireSignIn, async (req: AuthenticatedRequest, res) => { try { const { id } = req.params; const { runEvery, runEveryUnit, startFrom, dayOfMonth, atTimeStart, atTimeEnd, timezone } = req.body; const robot = await Robot.findOne({ where: { 'recording_meta.id': id } }); if (!robot) { return res.status(404).json({ error: 'Robot not found' }); } // Validate required parameters if (!runEvery || !runEveryUnit || !startFrom || !atTimeStart || !atTimeEnd || !timezone) { return res.status(400).json({ error: 'Missing required parameters' }); } // Validate time zone if (!moment.tz.zone(timezone)) { return res.status(400).json({ error: 'Invalid timezone' }); } // Validate and parse start and end times const [startHours, startMinutes] = atTimeStart.split(':').map(Number); const [endHours, endMinutes] = atTimeEnd.split(':').map(Number); if (isNaN(startHours) || isNaN(startMinutes) || isNaN(endHours) || isNaN(endMinutes) || startHours < 0 || startHours > 23 || startMinutes < 0 || startMinutes > 59 || endHours < 0 || endHours > 23 || endMinutes < 0 || endMinutes > 59) { return res.status(400).json({ error: 'Invalid time format' }); } const days = ['SUNDAY', 'MONDAY', 'TUESDAY', 'WEDNESDAY', 'THURSDAY', 'FRIDAY', 'SATURDAY']; if (!days.includes(startFrom)) { return res.status(400).json({ error: 'Invalid start day' }); } // Build cron expression based on run frequency and starting day let cronExpression; const dayIndex = days.indexOf(startFrom); switch (runEveryUnit) { case 'MINUTES': cronExpression = `*/${runEvery} * * * *`; break; case 'HOURS': cronExpression = `${startMinutes} */${runEvery} * * *`; break; case 'DAYS': cronExpression = `${startMinutes} ${startHours} */${runEvery} * *`; break; case 'WEEKS': cronExpression = `${startMinutes} ${startHours} * * ${dayIndex}`; break; case 'MONTHS': // todo: handle leap year cronExpression = `${startMinutes} ${startHours} ${dayOfMonth} */${runEvery} *`; if (startFrom !== 'SUNDAY') { cronExpression += ` ${dayIndex}`; } break; default: return res.status(400).json({ error: 'Invalid runEveryUnit' }); } // Validate cron expression if (!cronExpression || !cron.validate(cronExpression)) { return res.status(400).json({ error: 'Invalid cron expression generated' }); } if (!req.user) { return res.status(401).json({ error: 'Unauthorized' }); } try { await cancelScheduledWorkflow(id); } catch (cancelError) { logger.log('warn', `Failed to cancel existing schedule for robot ${id}: ${cancelError}`); } await scheduleWorkflow(id, req.user.id, cronExpression, timezone); const nextRunAt = computeNextRun(cronExpression, timezone); await robot.update({ schedule: { runEvery, runEveryUnit, startFrom, dayOfMonth, atTimeStart, atTimeEnd, timezone, cronExpression, lastRunAt: undefined, nextRunAt: nextRunAt || undefined, }, }); capture( 'maxun-oss-robot-scheduled', { robotId: id, user_id: req.user.id, scheduled_at: new Date().toISOString(), } ) // Fetch updated schedule details after setting it const updatedRobot = await Robot.findOne({ where: { 'recording_meta.id': id } }); res.status(200).json({ message: 'success', robot: updatedRobot, }); } catch (error) { console.error('Error scheduling workflow:', error); res.status(500).json({ error: 'Failed to schedule workflow' }); } }); // Endpoint to get schedule details router.get('/schedule/:id', requireSignIn, async (req, res) => { try { const robot = await Robot.findOne({ where: { 'recording_meta.id': req.params.id }, raw: true }); if (!robot) { return res.status(404).json({ error: 'Robot not found' }); } return res.status(200).json({ schedule: robot.schedule }); } catch (error) { console.error('Error getting schedule:', error); res.status(500).json({ error: 'Failed to get schedule' }); } }); // Endpoint to delete schedule router.delete('/schedule/:id', requireSignIn, async (req: AuthenticatedRequest, res) => { try { const { id } = req.params; if (!req.user) { return res.status(401).json({ error: 'Unauthorized' }); } const robot = await Robot.findOne({ where: { 'recording_meta.id': id } }); if (!robot) { return res.status(404).json({ error: 'Robot not found' }); } // Cancel the scheduled job in pgBossClient try { await cancelScheduledWorkflow(id); } catch (error) { logger.log('error', `Error cancelling scheduled job for robot ${id}: ${error}`); // Continue with robot update even if cancellation fails } // Delete the schedule from the robot await robot.update({ schedule: null }); capture( 'maxun-oss-robot-schedule-deleted', { robotId: id, user_id: req.user?.id, unscheduled_at: new Date().toISOString(), } ) res.status(200).json({ message: 'Schedule deleted successfully' }); } catch (error) { console.error('Error deleting schedule:', error); res.status(500).json({ error: 'Failed to delete schedule' }); } }); /** * POST endpoint for aborting a current interpretation of the run. */ router.post('/runs/abort/:id', requireSignIn, async (req: AuthenticatedRequest, res) => { try { if (!req.user) { return res.status(401).send({ error: 'Unauthorized' }); } const run = await Run.findOne({ where: { runId: req.params.id } }); if (!run) { return res.status(404).send({ error: 'Run not found' }); } if (!['running', 'queued'].includes(run.status)) { return res.status(400).send({ error: `Cannot abort run with status: ${run.status}` }); } const isQueued = run.status === 'queued'; await run.update({ status: 'aborting' }); if (isQueued) { await run.update({ status: 'aborted', finishedAt: new Date().toLocaleString(), log: 'Run aborted while queued' }); return res.send({ success: true, message: 'Queued run aborted', isQueued: true }); } // Immediately stop interpreter like cloud version try { const browser = browserPool.getRemoteBrowser(run.browserId); if (browser && browser.interpreter) { logger.log('info', `Immediately stopping interpreter for run ${req.params.id}`); await browser.interpreter.stopInterpretation(); } } catch (immediateStopError: any) { logger.log('warn', `Failed to immediately stop interpreter: ${immediateStopError.message}`); } const userQueueName = `abort-run-user-${req.user.id}`; await pgBossClient.createQueue(userQueueName); const jobId = await pgBossClient.send(userQueueName, { userId: req.user.id, runId: req.params.id }); logger.log('info', `Abort signal sent for run ${req.params.id}, job ID: ${jobId}`); return res.send({ success: true, message: 'Run stopped immediately, cleanup queued', jobId, isQueued: false }); } catch (e) { const { message } = e as Error; logger.log('error', `Error aborting run ${req.params.id}: ${message}`); return res.status(500).send({ error: 'Failed to abort run' }); } }); // Circuit breaker for database connection issues let consecutiveDbErrors = 0; const MAX_CONSECUTIVE_ERRORS = 3; const CIRCUIT_BREAKER_COOLDOWN = 30000; let circuitBreakerOpenUntil = 0; async function processQueuedRuns() { try { if (Date.now() < circuitBreakerOpenUntil) { return; } const queuedRun = await Run.findOne({ where: { status: 'queued' }, order: [['startedAt', 'ASC']], }); consecutiveDbErrors = 0; if (!queuedRun) return; const userId = queuedRun.runByUserId; const canCreateBrowser = await browserPool.hasAvailableBrowserSlots(userId, "run"); if (canCreateBrowser) { logger.log('info', `Processing queued run ${queuedRun.runId} for user ${userId}`); const recording = await Robot.findOne({ where: { 'recording_meta.id': queuedRun.robotMetaId }, raw: true }); if (!recording) { await queuedRun.update({ status: 'failed', finishedAt: new Date().toLocaleString(), log: 'Recording not found' }); return; } try { const newBrowserId = await createRemoteBrowserForRun(userId); logger.log('info', `Created and initialized browser ${newBrowserId} for queued run ${queuedRun.runId}`); await queuedRun.update({ status: 'running', browserId: newBrowserId, log: 'Browser created and ready for execution' }); const userQueueName = `execute-run-user-${userId}`; await pgBossClient.createQueue(userQueueName); const jobId = await pgBossClient.send(userQueueName, { userId: userId, runId: queuedRun.runId, browserId: newBrowserId, }); logger.log('info', `Queued execution for run ${queuedRun.runId} with ready browser ${newBrowserId}, job ID: ${jobId}`); } catch (browserError: any) { logger.log('error', `Failed to create browser for queued run: ${browserError.message}`); await queuedRun.update({ status: 'failed', finishedAt: new Date().toLocaleString(), log: `Failed to create browser: ${browserError.message}` }); } } } catch (error: any) { consecutiveDbErrors++; if (consecutiveDbErrors >= MAX_CONSECUTIVE_ERRORS) { circuitBreakerOpenUntil = Date.now() + CIRCUIT_BREAKER_COOLDOWN; logger.log('error', `Circuit breaker opened after ${MAX_CONSECUTIVE_ERRORS} consecutive errors. Cooling down for ${CIRCUIT_BREAKER_COOLDOWN/1000}s`); } logger.log('error', `Error processing queued runs (${consecutiveDbErrors}/${MAX_CONSECUTIVE_ERRORS}): ${error.message}`); } } /** * Recovers orphaned runs that were left in "running" status due to instance crashes * This function runs on server startup to ensure data reliability */ export async function recoverOrphanedRuns() { try { logger.log('info', 'Starting recovery of orphaned runs...'); const orphanedRuns = await Run.findAll({ where: { status: ['running', 'scheduled'] }, order: [['startedAt', 'ASC']] }); if (orphanedRuns.length === 0) { logger.log('info', 'No orphaned runs found'); return; } logger.log('info', `Found ${orphanedRuns.length} orphaned runs to recover (including scheduled runs)`); for (const run of orphanedRuns) { try { const runData = run.toJSON(); logger.log('info', `Recovering orphaned run: ${runData.runId}`); const browser = browserPool.getRemoteBrowser(runData.browserId); if (!browser) { const retryCount = runData.retryCount || 0; if (retryCount < 3) { await run.update({ status: 'queued', retryCount: retryCount + 1, serializableOutput: {}, binaryOutput: {}, browserId: undefined, log: runData.log ? `${runData.log}\n[RETRY ${retryCount + 1}/3] Re-queuing due to server crash` : `[RETRY ${retryCount + 1}/3] Re-queuing due to server crash` }); logger.log('info', `Re-queued crashed run ${runData.runId} (retry ${retryCount + 1}/3)`); } else { const crashRecoveryMessage = `Max retries exceeded (3/3) - Run failed after multiple server crashes.`; await run.update({ status: 'failed', finishedAt: new Date().toLocaleString(), log: runData.log ? `${runData.log}\n${crashRecoveryMessage}` : crashRecoveryMessage }); logger.log('warn', `Max retries reached for run ${runData.runId}, marked as permanently failed`); } if (runData.browserId) { try { browserPool.deleteRemoteBrowser(runData.browserId); logger.log('info', `Cleaned up stale browser reference: ${runData.browserId}`); } catch (cleanupError: any) { logger.log('warn', `Failed to cleanup browser reference ${runData.browserId}: ${cleanupError.message}`); } } } else { logger.log('info', `Run ${runData.runId} browser still active, not orphaned`); } } catch (runError: any) { logger.log('error', `Failed to recover run ${run.runId}: ${runError.message}`); } } logger.log('info', `Orphaned run recovery completed. Processed ${orphanedRuns.length} runs.`); } catch (error: any) { logger.log('error', `Failed to recover orphaned runs: ${error.message}`); } } /** * POST endpoint for creating a crawl robot * @route POST /recordings/crawl * @auth requireSignIn - JWT authentication required */ router.post('/recordings/crawl', requireSignIn, async (req: AuthenticatedRequest, res) => { try { const { url, name, crawlConfig } = req.body; if (!url || !crawlConfig) { return res.status(400).json({ error: 'URL and crawl configuration are required.' }); } if (!req.user) { return res.status(401).send({ error: 'Unauthorized' }); } try { new URL(url); } catch (err) { return res.status(400).json({ error: 'Invalid URL format' }); } const robotName = name || `Crawl Robot - ${new URL(url).hostname}`; const currentTimestamp = new Date().toLocaleString('en-US'); const robotId = uuid(); const newRobot = await Robot.create({ id: uuid(), userId: req.user.id, recording_meta: { name: robotName, id: robotId, createdAt: currentTimestamp, updatedAt: currentTimestamp, pairs: 1, params: [], type: 'crawl', url: url, }, recording: { workflow: [ { where: { url }, what: [ { action: 'flag', args: ['generated'] }, { action: 'crawl', args: [crawlConfig], name: 'Crawl' } ] }, { where: { url: 'about:blank' }, what: [ { action: 'goto', args: [url] }, { action: 'waitForLoadState', args: ['networkidle'] } ] } ] }, google_sheet_email: null, google_sheet_name: null, google_sheet_id: null, google_access_token: null, google_refresh_token: null, airtable_base_id: null, airtable_base_name: null, airtable_table_name: null, airtable_table_id: null, airtable_access_token: null, airtable_refresh_token: null, schedule: null, webhooks: null }); logger.log('info', `Crawl robot created with id: ${newRobot.id}`); capture('maxun-oss-robot-created', { userId: req.user.id.toString(), robotId: robotId, robotName: robotName, url: url, robotType: 'crawl', crawlConfig: crawlConfig, robot_meta: newRobot.recording_meta, recording: newRobot.recording, }); return res.status(201).json({ message: 'Crawl robot created successfully.', robot: newRobot, }); } catch (error) { if (error instanceof Error) { logger.log('error', `Error creating crawl robot: ${error.message}`); return res.status(500).json({ error: error.message }); } else { logger.log('error', 'Unknown error creating crawl robot'); return res.status(500).json({ error: 'An unknown error occurred.' }); } } }); /** * POST endpoint for creating a search robot * @route POST /recordings/search * @auth requireSignIn - JWT authentication required */ router.post('/recordings/search', requireSignIn, async (req: AuthenticatedRequest, res) => { try { const { searchConfig, name } = req.body; if (!searchConfig || !searchConfig.query) { return res.status(400).json({ error: 'Search configuration with query is required.' }); } if (!req.user) { return res.status(401).send({ error: 'Unauthorized' }); } const robotName = name || `Search Robot - ${searchConfig.query.substring(0, 50)}`; const currentTimestamp = new Date().toLocaleString('en-US'); const robotId = uuid(); const newRobot = await Robot.create({ id: uuid(), userId: req.user.id, recording_meta: { name: robotName, id: robotId, createdAt: currentTimestamp, updatedAt: currentTimestamp, pairs: 1, params: [], type: 'search', }, recording: { workflow: [ { where: { url: 'about:blank' }, what: [{ action: 'search', args: [searchConfig], name: 'Search' }] } ] }, google_sheet_email: null, google_sheet_name: null, google_sheet_id: null, google_access_token: null, google_refresh_token: null, airtable_base_id: null, airtable_base_name: null, airtable_table_name: null, airtable_table_id: null, airtable_access_token: null, airtable_refresh_token: null, schedule: null, webhooks: null }); logger.log('info', `Search robot created with id: ${newRobot.id}`); capture('maxun-oss-robot-created', { userId: req.user.id.toString(), robotId: robotId, robotName: robotName, robotType: 'search', searchQuery: searchConfig.query, searchProvider: searchConfig.provider || 'duckduckgo', searchLimit: searchConfig.limit || 10, robot_meta: newRobot.recording_meta, recording: newRobot.recording, }); return res.status(201).json({ message: 'Search robot created successfully.', robot: newRobot, }); } catch (error) { if (error instanceof Error) { logger.log('error', `Error creating search robot: ${error.message}`); return res.status(500).json({ error: error.message }); } else { logger.log('error', 'Unknown error creating search robot'); return res.status(500).json({ error: 'An unknown error occurred.' }); } } }); export { processQueuedRuns }; ================================================ FILE: server/src/routes/webhook.ts ================================================ import { Router, Request, Response } from 'express'; import Robot from '../models/Robot'; import { requireSignIn } from '../middlewares/auth'; import axios from 'axios'; import { v4 as uuid } from "uuid"; export const router = Router(); interface AuthenticatedRequest extends Request { user?: { id: string }; } interface WebhookConfig { id: string; url: string; events: string[]; active: boolean; createdAt: string; updatedAt: string; lastCalledAt?: string | null; retryAttempts?: number; retryDelay?: number; timeout?: number; } const updateWebhookLastCalled = async (robotId: string, webhookId: string): Promise => { try { const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot || !robot.webhooks) { return; } const updatedWebhooks = robot.webhooks.map((w: WebhookConfig) => { if (w.id === webhookId) { return { ...w, lastCalledAt: new Date().toISOString() }; } return w; }); await robot.update({ webhooks: updatedWebhooks }); } catch (error) { console.error('Error updating webhook lastCalledAt:', error); } }; // Add new webhook router.post('/add', requireSignIn, async (req: Request, res: Response) => { const { webhook, robotId } = req.body; const authenticatedReq = req as AuthenticatedRequest; try { if (!authenticatedReq.user) { return res.status(401).json({ ok: false, error: 'Unauthorized' }); } if (!webhook || !robotId) { return res.status(400).json({ ok: false, error: 'Webhook configuration and robot ID are required' }); } if (!webhook.url) { return res.status(400).json({ ok: false, error: 'Webhook URL is required' }); } // Validate URL format try { new URL(webhook.url); } catch (error) { return res.status(400).json({ ok: false, error: 'Invalid webhook URL format' }); } const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) { return res.status(404).json({ ok: false, error: 'Robot not found' }); } const currentWebhooks = robot.webhooks || []; const existingWebhook = currentWebhooks.find((w: WebhookConfig) => w.url === webhook.url); if (existingWebhook) { return res.status(400).json({ ok: false, error: 'Webhook with this url already exists' }); } const newWebhook: WebhookConfig = { ...webhook, id: webhook.id || uuid(), createdAt: new Date().toISOString(), updatedAt: new Date().toISOString(), lastCalledAt: null, retryAttempts: webhook.retryAttempts || 3, retryDelay: webhook.retryDelay || 5, timeout: webhook.timeout || 30, }; const updatedWebhooks = [...currentWebhooks, newWebhook]; await robot.update({ webhooks: updatedWebhooks }); res.status(200).json({ ok: true, message: 'Webhook added successfully', webhook: newWebhook }); } catch (error: any) { console.log(`Could not add webhook - ${error}`); res.status(500).json({ ok: false, error: 'Could not add webhook configuration' }); } }); // Update existing webhook router.post('/update', requireSignIn, async (req: Request, res: Response) => { const { webhook, robotId } = req.body; const authenticatedReq = req as AuthenticatedRequest; try { if (!authenticatedReq.user) { return res.status(401).json({ ok: false, error: 'Unauthorized' }); } if (!webhook || !robotId || !webhook.id) { return res.status(400).json({ ok: false, error: 'Webhook configuration, webhook ID, and robot ID are required' }); } // Validate URL format if provided if (webhook.url) { try { new URL(webhook.url); } catch (error) { return res.status(400).json({ ok: false, error: 'Invalid webhook URL format' }); } } const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) { return res.status(404).json({ ok: false, error: 'Robot not found' }); } const currentWebhooks = robot.webhooks || []; const webhookIndex = currentWebhooks.findIndex((w: WebhookConfig) => w.id === webhook.id); if (webhookIndex === -1) { return res.status(404).json({ ok: false, error: 'Webhook not found' }); } // Check for duplicate URLs (excluding current webhook) const duplicateUrl = currentWebhooks.find((w: WebhookConfig, index: number) => w.url === webhook.url && index !== webhookIndex ); if (duplicateUrl) { return res.status(400).json({ ok: false, error: 'Webhook with this URL already exists' }); } const updatedWebhook: WebhookConfig = { ...currentWebhooks[webhookIndex], ...webhook, updatedAt: new Date().toISOString(), lastCalledAt: currentWebhooks[webhookIndex].lastCalledAt }; const updatedWebhooks = [...currentWebhooks]; updatedWebhooks[webhookIndex] = updatedWebhook; await robot.update({ webhooks: updatedWebhooks }); res.status(200).json({ ok: true, message: 'Webhook updated successfully', webhook: updatedWebhook }); } catch (error: any) { console.log(`Could not update webhook - ${error}`); res.status(500).json({ ok: false, error: 'Could not update webhook configuration' }); } }); // Remove webhook router.post('/remove', requireSignIn, async (req: Request, res: Response) => { const { webhookId, robotId } = req.body; const authenticatedReq = req as AuthenticatedRequest; try { if (!authenticatedReq.user) { return res.status(401).json({ ok: false, error: 'Unauthorized' }); } if (!webhookId || !robotId) { return res.status(400).json({ ok: false, error: 'Webhook ID and robot ID are required' }); } const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) { return res.status(404).json({ ok: false, error: 'Robot not found' }); } const currentWebhooks = robot.webhooks || []; const webhookExists = currentWebhooks.find((w: WebhookConfig) => w.id === webhookId); if (!webhookExists) { return res.status(404).json({ ok: false, error: 'Webhook not found' }); } const updatedWebhooks = currentWebhooks.filter((w: WebhookConfig) => w.id !== webhookId); await robot.update({ webhooks: updatedWebhooks }); res.status(200).json({ ok: true, message: 'Webhook removed successfully' }); } catch (error: any) { console.log(`Could not remove webhook - ${error}`); res.status(500).json({ ok: false, error: 'Could not remove webhook configuration' }); } }); // Get all webhooks for a robot router.get('/list/:robotId', requireSignIn, async (req: Request, res: Response) => { const { robotId } = req.params; const authenticatedReq = req as AuthenticatedRequest; try { if (!authenticatedReq.user) { return res.status(401).json({ ok: false, error: 'Unauthorized' }); } const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId }, attributes: ['webhooks'] }); if (!robot) { return res.status(404).json({ ok: false, error: 'Robot not found' }); } const webhooks = robot.webhooks || []; res.status(200).json({ ok: true, webhooks: webhooks }); } catch (error: any) { console.log(`Could not retrieve webhooks - ${error}`); res.status(500).json({ ok: false, error: 'Could not retrieve webhook configurations' }); } }); // Test webhook endpoint router.post('/test', requireSignIn, async (req: Request, res: Response) => { const { webhook, robotId } = req.body; const authenticatedReq = req as AuthenticatedRequest; try { if (!authenticatedReq.user) { return res.status(401).json({ ok: false, error: 'Unauthorized' }); } if (!webhook || !robotId) { return res.status(400).json({ ok: false, error: 'Webhook configuration and robot ID are required' }); } const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) { return res.status(404).json({ ok: false, error: 'Robot not found' }); } // Create test payload const testPayload = { event_type: "webhook_test", timestamp: new Date().toISOString(), webhook_id: webhook.id, data: { robot_id: robotId, run_id: "110c4dae-c39b-4b30-a932-eff1022e4bb0", robot_name: robot.recording_meta?.name || "E-commerce Product Scraper", status: "test", started_at: new Date(Date.now() - 45000).toISOString(), finished_at: new Date().toISOString(), extracted_data: { captured_texts: [ { "Product Name": "MacBook Pro 16-inch M3 Max", "Price": "$3,999.00", "Rating": "4.8/5 stars", "Availability": "In Stock - Ships within 2-3 business days", "SKU": "MBPM3-16-1TB-SLV", "Description": "The most powerful MacBook Pro ever is here. With the blazing-fast M3 Max chip, pro-level performance has never been more portable." } ], captured_lists: { "list_1": [ { "Rank": "1", "Product": "MacBook Air M2", "Category": "Laptops", "Units Sold": "2,847", "Revenue": "$2,847,000" }, { "Rank": "2", "Product": "iPhone 15", "Category": "Smartphones", "Units Sold": "1,923", "Revenue": "$1,923,000" }, { "Rank": "3", "Product": "iPad Pro 12.9", "Category": "Tablets", "Units Sold": "1,456", "Revenue": "$1,456,000" } ], "list_0": [ { "Customer": "Sarah M.", "Rating": "5 stars", "Review": "Absolutely love my new MacBook! The battery life is incredible and the performance is outstanding.", "Date": "2024-12-15", "Verified Purchase": "Yes" }, { "Customer": "John D.", "Rating": "4 stars", "Review": "Great phone overall, but wish the battery lasted a bit longer with heavy usage.", "Date": "2024-12-14", "Verified Purchase": "Yes" }, { "Customer": "Emily R.", "Rating": "5 stars", "Review": "The camera quality is phenomenal! Perfect for my photography business.", "Date": "2024-12-13", "Verified Purchase": "Yes" } ], }, total_rows: 11, captured_texts_count: 5, captured_lists_count: 6, screenshots_count: 5 }, metadata: { test_mode: true, browser_id: "d27ace57-75cb-441c-8589-8ba34e52f7d1", user_id: 108, } } }; await updateWebhookLastCalled(robotId, webhook.id); const response = await axios.post(webhook.url, testPayload, { timeout: (webhook.timeout || 30) * 1000, validateStatus: (status) => status < 500 }); const success = response.status >= 200 && response.status < 300; res.status(200).json({ ok: true, message: success ? 'Test webhook sent successfully' : 'Webhook endpoint responded with non-success status', details: { status: response.status, statusText: response.statusText, success: success } }); } catch (error: any) { console.log(`Could not test webhook - ${error}`); try { await updateWebhookLastCalled(robotId, webhook.id); } catch (updateError) { console.error('Failed to update lastCalledAt after webhook error:', updateError); } let errorMessage = 'Could not send test webhook'; if (error.code === 'ECONNREFUSED') { errorMessage = 'Connection refused - webhook URL is not accessible'; } else if (error.code === 'ETIMEDOUT') { errorMessage = 'Request timeout - webhook endpoint did not respond in time'; } else if (error.response) { errorMessage = `Webhook endpoint responded with error: ${error.response.status} ${error.response.statusText}`; } res.status(500).json({ ok: false, error: errorMessage, details: { code: error.code, message: error.message } }); } }); // Send webhook export const sendWebhook = async (robotId: string, eventType: string, data: any): Promise => { try { const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot || !robot.webhooks) { return; } const activeWebhooks = robot.webhooks.filter((w: WebhookConfig) => w.active && w.events.includes(eventType) ); if (activeWebhooks.length === 0) { return; } const webhookPromises = activeWebhooks.map(async (webhook: WebhookConfig) => { const payload = { event_type: eventType, timestamp: new Date().toISOString(), webhook_id: webhook.id, data: data }; return sendWebhookWithRetry(robotId, webhook, payload); }); await Promise.allSettled(webhookPromises); } catch (error) { console.error('Error sending webhooks:', error); } }; // Helper function to send webhook with retry logic const sendWebhookWithRetry = async (robotId: string, webhook: WebhookConfig, payload: any, attempt: number = 1): Promise => { const maxRetries = webhook.retryAttempts || 3; const retryDelay = webhook.retryDelay || 5; const timeout = webhook.timeout || 30; try { await updateWebhookLastCalled(robotId, webhook.id); const response = await axios.post(webhook.url, payload, { timeout: timeout * 1000, validateStatus: (status) => status >= 200 && status < 300 }); console.log(`Webhook sent successfully to ${webhook.url}: ${response.status}`); } catch (error: any) { console.error(`Webhook failed for ${webhook.url} (attempt ${attempt}):`, error.message); if (attempt < maxRetries) { const delay = retryDelay * Math.pow(2, attempt - 1); console.log(`Retrying webhook ${webhook.url} in ${delay} seconds...`); setTimeout(async () => { await sendWebhookWithRetry(robotId, webhook, payload, attempt + 1); }, delay * 1000); } else { console.error(`Webhook ${webhook.url} failed after ${maxRetries} attempts`); } } }; // Clear all webhooks for a robot router.delete('/clear/:robotId', requireSignIn, async (req: Request, res: Response) => { const { robotId } = req.params; const authenticatedReq = req as AuthenticatedRequest; try { if (!authenticatedReq.user) { return res.status(401).json({ ok: false, error: 'Unauthorized' }); } const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) { return res.status(404).json({ ok: false, error: 'Robot not found' }); } await robot.update({ webhooks: [] }); res.status(200).json({ ok: true, message: 'All webhooks cleared successfully' }); } catch (error: any) { console.log(`Could not clear webhooks - ${error}`); res.status(500).json({ ok: false, error: 'Could not clear webhook configurations' }); } }); ================================================ FILE: server/src/routes/workflow.ts ================================================ /** * RESTful API endpoints handling currently generated workflow management. */ import { Router } from 'express'; import logger from "../logger"; import { browserPool } from "../server"; import { requireSignIn } from '../middlewares/auth'; import Robot from '../models/Robot'; import { AuthenticatedRequest } from './record'; export const router = Router(); /** * Logs information about workflow API. */ router.all('/', requireSignIn, (req, res, next) => { logger.log('debug', `The workflow API was invoked: ${req.url}`) next() // pass control to the next handler }) /** * GET endpoint for a recording linked to a remote browser instance. * returns session's id */ router.get('/:browserId', requireSignIn, (req, res) => { const activeBrowser = browserPool.getRemoteBrowser(req.params.browserId); let workflowFile = null; if (activeBrowser && activeBrowser.generator) { workflowFile = activeBrowser.generator.getWorkflowFile(); } return res.send(workflowFile); }); /** * Get endpoint returning the parameter array of the recording associated with the browserId browser instance. */ router.get('/params/:browserId', requireSignIn, (req, res) => { const activeBrowser = browserPool.getRemoteBrowser(req.params.browserId); let params = null; if (activeBrowser && activeBrowser.generator) { params = activeBrowser.generator.getParams(); } return res.send(params); }); /** * DELETE endpoint for deleting a pair from the generated workflow. */ router.delete('/pair/:index', requireSignIn, (req: AuthenticatedRequest, res) => { if (!req.user) { return res.status(401).send('User not authenticated'); } const id = browserPool.getActiveBrowserId(req.user?.id, "recording"); if (id) { const browser = browserPool.getRemoteBrowser(id); if (browser) { browser.generator?.removePairFromWorkflow(parseInt(req.params.index)); const workflowFile = browser.generator?.getWorkflowFile(); return res.send(workflowFile); } } return res.send(null); }); /** * POST endpoint for adding a pair to the generated workflow. */ router.post('/pair/:index', requireSignIn, (req: AuthenticatedRequest, res) => { if (!req.user) { return res.status(401).send('User not authenticated'); } const id = browserPool.getActiveBrowserId(req.user?.id, "recording"); if (id) { const browser = browserPool.getRemoteBrowser(id); logger.log('debug', `Adding pair to workflow`); if (browser) { logger.log('debug', `Adding pair to workflow: ${JSON.stringify(req.body)}`); if (req.body.pair) { browser.generator?.addPairToWorkflow(parseInt(req.params.index), req.body.pair); const workflowFile = browser.generator?.getWorkflowFile(); return res.send(workflowFile); } } } return res.send(null); }); /** * PUT endpoint for updating a pair in the generated workflow. */ router.put('/pair/:index', requireSignIn, (req: AuthenticatedRequest, res) => { if (!req.user) { return res.status(401).send('User not authenticated'); } const id = browserPool.getActiveBrowserId(req.user?.id, "recording"); if (id) { const browser = browserPool.getRemoteBrowser(id); logger.log('debug', `Updating pair in workflow`); if (browser) { logger.log('debug', `New value: ${JSON.stringify(req.body)}`); if (req.body.pair) { browser.generator?.updatePairInWorkflow(parseInt(req.params.index), req.body.pair); const workflowFile = browser.generator?.getWorkflowFile(); return res.send(workflowFile); } } } return res.send(null); }); /** * PUT endpoint for updating the currently generated workflow file from the one in the storage. */ router.put('/:browserId/:id', requireSignIn, async (req, res) => { try { const browser = browserPool.getRemoteBrowser(req.params.browserId); logger.log('debug', `Updating workflow for Robot: ${req.params.id}`); if (browser && browser.generator) { const robot = await Robot.findOne({ where: { 'recording_meta.id': req.params.id }, raw: true }); if (!robot) { logger.log('info', `Robot not found with ID: ${req.params.id}`); return res.status(404).send({ error: 'Robot not found' }); } const { recording, recording_meta } = robot; if (recording && recording.workflow) { browser.generator.updateWorkflowFile(recording, recording_meta); const workflowFile = browser.generator.getWorkflowFile(); return res.send(workflowFile); } else { logger.log('info', `Invalid recording data for Robot ID: ${req.params.id}`); return res.status(400).send({ error: 'Invalid recording data' }); } } logger.log('info', `Browser or generator not available for ID: ${req.params.id}`); return res.status(400).send({ error: 'Browser or generator not available' }); } catch (e) { const { message } = e as Error; logger.log('error', `Error while updating workflow for Robot ID: ${req.params.id}. Error: ${message}`); return res.status(500).send({ error: 'Internal server error' }); } }); export default router; ================================================ FILE: server/src/schedule-worker.ts ================================================ /** * Worker process focused solely on scheduling logic */ import PgBoss, { Job } from 'pg-boss'; import logger from './logger'; import Robot from './models/Robot'; import { handleRunRecording } from './workflow-management/scheduler'; import { computeNextRun } from './utils/schedule'; if (!process.env.DB_USER || !process.env.DB_PASSWORD || !process.env.DB_HOST || !process.env.DB_PORT || !process.env.DB_NAME) { throw new Error('One or more required environment variables are missing.'); } const pgBossConnectionString = `postgresql://${process.env.DB_USER}:${encodeURIComponent(process.env.DB_PASSWORD)}@${process.env.DB_HOST}:${process.env.DB_PORT}/${process.env.DB_NAME}`; export const pgBoss = new PgBoss({ connectionString: pgBossConnectionString, max: 3, expireInHours: 23, }); const registeredQueues = new Set(); interface ScheduledWorkflowData { id: string; runId: string; userId: string; } /** * Process a scheduled workflow job */ async function processScheduledWorkflow(job: Job) { const { id, runId, userId } = job.data; logger.log('info', `Processing scheduled workflow job for robotId: ${id}, runId: ${runId}, userId: ${userId}`); try { // Execute the workflow using the existing handleRunRecording function await handleRunRecording(id, userId); // Update the robot's schedule with last run and next run times const robot = await Robot.findOne({ where: { 'recording_meta.id': id } }); if (robot && robot.schedule && robot.schedule.cronExpression && robot.schedule.timezone) { // Update lastRunAt to the current time const lastRunAt = new Date(); // Compute the next run date const nextRunAt = computeNextRun(robot.schedule.cronExpression, robot.schedule.timezone) || undefined; await robot.update({ schedule: { ...robot.schedule, lastRunAt, nextRunAt, }, }); logger.log('info', `Updated robot ${id} schedule - next run at: ${nextRunAt}`); } else { logger.log('error', `Robot ${id} schedule, cronExpression, or timezone is missing.`); } return { success: true }; } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Scheduled workflow job failed: ${errorMessage}`); return { success: false }; } } /** * Register a worker to handle scheduled workflow jobs */ async function registerScheduledWorkflowWorker() { try { const jobs = await pgBoss.getSchedules(); for (const job of jobs) { await pgBoss.createQueue(job.name); await registerWorkerForQueue(job.name); } logger.log('info', 'Scheduled workflow workers registered successfully'); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Failed to register scheduled workflow workers: ${errorMessage}`); } } /** * Register a worker for a specific queue * Exported to allow dynamic registration when new schedules are created */ export async function registerWorkerForQueue(queueName: string) { try { if (registeredQueues.has(queueName)) { return; } await pgBoss.work(queueName, async (job: Job | Job[]) => { try { const singleJob = Array.isArray(job) ? job[0] : job; return await processScheduledWorkflow(singleJob); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Scheduled workflow job failed in queue ${queueName}: ${errorMessage}`); throw error; } }); registeredQueues.add(queueName); logger.log('info', `Registered worker for queue: ${queueName}`); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Failed to register worker for queue ${queueName}: ${errorMessage}`); } } /** * Initialize PgBoss and register scheduling workers */ async function startScheduleWorker() { try { logger.log('info', 'Starting PgBoss scheduling worker...'); await pgBoss.start(); logger.log('info', 'PgBoss scheduling worker started successfully'); // Register the scheduled workflow worker await registerScheduledWorkflowWorker(); logger.log('info', 'Scheduling worker registered successfully'); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Failed to start PgBoss scheduling worker: ${errorMessage}`); process.exit(1); } } startScheduleWorker(); pgBoss.on('error', (error) => { logger.log('error', `PgBoss scheduler error: ${error.message}`); }); process.on('SIGTERM', async () => { logger.log('info', 'SIGTERM received, shutting down PgBoss scheduler...'); await pgBoss.stop(); logger.log('info', 'PgBoss scheduler stopped, ready for termination'); }); process.on('SIGINT', async () => { logger.log('info', 'SIGINT received, shutting down PgBoss scheduler...'); await pgBoss.stop(); logger.log('info', 'PgBoss scheduler stopped, waiting for main process cleanup...'); }); ================================================ FILE: server/src/sdk/browserSide/pageAnalyzer.js ================================================ /** * Page Analyzer for pagination auto-detection, selector generation and grouping */ (function () { 'use strict'; /** * Helper function to evaluate both CSS and XPath selectors * Returns array of matching elements */ function evaluateSelector(selector, doc) { try { const isXPath = selector.startsWith('//') || selector.startsWith('(//'); if (isXPath) { const result = doc.evaluate( selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ); const elements = []; for (let i = 0; i < result.snapshotLength; i++) { const node = result.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { elements.push(node); } } return elements; } else { return Array.from(doc.querySelectorAll(selector)); } } catch (err) { return []; } } /** * Convert CSS selector to XPath */ function cssToXPath(cssSelector) { if (cssSelector.startsWith('//') || cssSelector.startsWith('/')) { return cssSelector; } try { let xpath = ''; const parts = cssSelector.split(/\s+(?![^[]*])/); for (let i = 0; i < parts.length; i++) { const part = parts[i].trim(); if (!part) continue; if (part === '>') continue; const xpathPart = convertCssPart(part); if (i === 0) { xpath = '//' + xpathPart; } else if (parts[i - 1] === '>') { xpath += '/' + xpathPart; } else { xpath += '//' + xpathPart; } } return xpath || `//*`; } catch (error) { return `//*`; } } /** * Convert a single CSS selector part to XPath */ function convertCssPart(cssPart) { const tagMatch = cssPart.match(/^([a-zA-Z][\w-]*|\*)/); const tag = tagMatch ? tagMatch[1] : '*'; const predicates = []; const idMatch = cssPart.match(/#([\w-]+)/); if (idMatch) { predicates.push(`@id='${idMatch[1]}'`); } const classMatches = cssPart.match(/\.((?:\\.|[^.#[\s])+)/g); if (classMatches) { classMatches.forEach(cls => { let className = cls.substring(1).replace(/\\/g, ''); predicates.push(`contains(@class, '${className}')`); }); } const attrMatches = cssPart.match(/\[([^\]]+)\]/g); if (attrMatches) { attrMatches.forEach(attr => { const content = attr.slice(1, -1); const eqMatch = content.match(/([^=]+)="([^"]+)"/); if (eqMatch) { predicates.push(`@${eqMatch[1]}='${eqMatch[2]}'`); } else { predicates.push(`@${content}`); } }); } if (predicates.length > 0) { return `${tag}[${predicates.join(' and ')}]`; } return tag; } /** * Main entry point for SDK - auto-converts CSS to XPath */ window.autoDetectListFields = function (selector) { try { let xpathSelector = cssToXPath(selector); const testElements = evaluateXPath(xpathSelector, document); if (testElements.length === 0) { console.error('No elements matched the XPath selector!'); return { fields: {}, listSelector: xpathSelector, listFallbackSelector: null, error: 'Selector did not match any elements on the page' }; } if (testElements.length > 0 && !xpathSelector.includes('count(*)')) { const childCounts = testElements.slice(0, 5).map(el => el.children.length); const uniqueCounts = [...new Set(childCounts)]; if (uniqueCounts.length > 1 && childCounts.filter(c => c === 1).length > childCounts.length / 2) { if (xpathSelector.includes('[') && xpathSelector.endsWith(']')) { xpathSelector = xpathSelector.slice(0, -1) + ' and count(*)=1]'; } else if (xpathSelector.includes('[')) { xpathSelector = xpathSelector.replace(/\]$/, ' and count(*)=1]'); } else { const lastSlash = xpathSelector.lastIndexOf('/'); if (lastSlash !== -1) { const beforeTag = xpathSelector.substring(0, lastSlash + 1); const tag = xpathSelector.substring(lastSlash + 1); xpathSelector = beforeTag + tag + '[count(*)=1]'; } else { xpathSelector = xpathSelector + '[count(*)=1]'; } } } } const fields = window.getChildSelectors(xpathSelector); return { fields: fields, listSelector: xpathSelector, listFallbackSelector: null, error: Object.keys(fields).length === 0 ? 'No valid fields could be auto-detected from the list items' : null }; } catch (error) { console.error('Exception:', error); return { fields: {}, error: error.message || 'Failed to auto-detect fields' }; } }; const pathCache = new WeakMap(); const descendantsCache = new WeakMap(); const meaningfulCache = new WeakMap(); const classCache = new Map(); /** * Main entry point - returns detected fields for a list selector */ window.getChildSelectors = function (parentSelector) { try { const parentElements = evaluateXPath(parentSelector, document); if (parentElements.length === 0) { console.error('No parent elements found!'); return {}; } const maxItems = 10; const limitedParents = parentElements.slice(0, Math.min(maxItems, parentElements.length)); const allChildSelectors = []; for (let i = 0; i < limitedParents.length; i++) { const parent = limitedParents[i]; const otherListElements = limitedParents.filter((_, index) => index !== i); const selectors = generateOptimizedChildXPaths( parent, parentSelector, otherListElements ); allChildSelectors.push(...selectors); } const childSelectors = Array.from(new Set(allChildSelectors)).sort() const fields = createFieldsFromSelectors( childSelectors, limitedParents, parentSelector ); return fields; } catch (error) { console.error('Exception:', error); return {}; } }; /** * Generate optimized XPath selectors for all meaningful children */ function generateOptimizedChildXPaths(parentElement, listSelector, otherListElements) { const selectors = []; const processedElements = new Set(); const allDescendants = getAllDescendantsIncludingShadow(parentElement); const batchSize = 25; for (let i = 0; i < allDescendants.length; i += batchSize) { const batch = allDescendants.slice(i, i + batchSize); for (const descendant of batch) { if (processedElements.has(descendant)) continue; processedElements.add(descendant); const xpath = buildOptimizedAbsoluteXPath( descendant, listSelector, parentElement, otherListElements ); if (xpath.primary) { selectors.push({ primary: xpath.primary, fallback: xpath.fallback, element: descendant }); } if (selectors.length >= 250) { break; } } if (selectors.length >= 250) { break; } } return selectors; } /** * Get all meaningful descendants including shadow DOM */ function getAllDescendantsIncludingShadow(parentElement) { if (descendantsCache.has(parentElement)) { return descendantsCache.get(parentElement); } const meaningfulDescendants = []; const queue = [parentElement]; const visited = new Set(); visited.add(parentElement); const MAX_MEANINGFUL_ELEMENTS = 300; const MAX_NODES_TO_CHECK = 1200; const MAX_DEPTH = 20; let nodesChecked = 0; const depths = [0]; let queueIndex = 0; while (queueIndex < queue.length) { const element = queue[queueIndex]; const currentDepth = depths[queueIndex]; queueIndex++; nodesChecked++; if ( nodesChecked > MAX_NODES_TO_CHECK || meaningfulDescendants.length >= MAX_MEANINGFUL_ELEMENTS || currentDepth > MAX_DEPTH ) { break; } if (element !== parentElement && isMeaningfulElement(element)) { meaningfulDescendants.push(element); } if (currentDepth >= MAX_DEPTH) { continue; } const children = element.children; const childLimit = Math.min(children.length, 30); for (let i = 0; i < childLimit; i++) { const child = children[i]; if (!visited.has(child)) { visited.add(child); queue.push(child); depths.push(currentDepth + 1); } } // Process shadow DOM if (element.shadowRoot && currentDepth < MAX_DEPTH - 1) { const shadowChildren = element.shadowRoot.children; const shadowLimit = Math.min(shadowChildren.length, 20); for (let i = 0; i < shadowLimit; i++) { const child = shadowChildren[i]; if (!visited.has(child)) { visited.add(child); queue.push(child); depths.push(currentDepth + 1); } } } } descendantsCache.set(parentElement, meaningfulDescendants); return meaningfulDescendants; } /** * Check if element has meaningful content for extraction */ function isMeaningfulElement(element) { if (meaningfulCache.has(element)) { return meaningfulCache.get(element); } const tagName = element.tagName.toLowerCase(); if (tagName === 'img' && element.hasAttribute('src')) { meaningfulCache.set(element, true); return true; } if (tagName === 'a' && element.hasAttribute('href')) { meaningfulCache.set(element, true); return true; } const text = (element.textContent || '').trim(); const hasVisibleText = text.length > 0; if (hasVisibleText || element.querySelector('svg')) { meaningfulCache.set(element, true); return true; } if (element.children.length > 0) { meaningfulCache.set(element, false); return false; } meaningfulCache.set(element, false); return false; } /** * Build optimized absolute XPath */ function buildOptimizedAbsoluteXPath(targetElement, listSelector, listElement, otherListElements) { try { let primary = null; const pathFromList = getOptimizedStructuralPath( targetElement, listElement, otherListElements ); if (pathFromList) { primary = listSelector + pathFromList; } const fallback = generateMandatoryChildFallbackXPath(targetElement, listElement); return { primary, fallback }; } catch (error) { const fallback = generateMandatoryChildFallbackXPath(targetElement, listElement); return { primary: null, fallback }; } } /** * Get optimized structural path from element to root */ function getOptimizedStructuralPath(targetElement, rootElement, otherListElements) { if (pathCache.has(targetElement)) { return pathCache.get(targetElement); } if (!elementContains(rootElement, targetElement) || targetElement === rootElement) { return null; } const pathParts = []; let current = targetElement; let pathDepth = 0; const MAX_PATH_DEPTH = 20; while (current && current !== rootElement && pathDepth < MAX_PATH_DEPTH) { const classes = getCommonClassesAcrossLists(current, otherListElements); const hasConflictingElement = classes.length > 0 && rootElement ? queryElementsInScope(rootElement, current.tagName.toLowerCase()) .filter(el => el !== current) .some(el => classes.every(cls => normalizeClasses(el.classList).split(' ').includes(cls) )) : false; const pathPart = generateOptimizedStructuralStep( current, rootElement, hasConflictingElement, otherListElements ); if (pathPart) { pathParts.unshift(pathPart); } current = current.parentElement || ((current.getRootNode()).host); pathDepth++; } if (current !== rootElement) { pathCache.set(targetElement, null); return null; } const result = pathParts.length > 0 ? '/' + pathParts.join('/') : null; pathCache.set(targetElement, result); return result; } /** * Generate optimized structural step for XPath */ function generateOptimizedStructuralStep(element, rootElement, addPositionToAll, otherListElements) { const tagName = element.tagName.toLowerCase(); const parent = element.parentElement || ((element.getRootNode()).host); if (!parent) { return tagName; } const classes = getCommonClassesAcrossLists(element, otherListElements); if (classes.length > 0 && !addPositionToAll) { const classSelector = classes .map(cls => `contains(@class, '${cls}')`) .join(' and '); const hasConflictingElement = rootElement ? queryElementsInScope(rootElement, element.tagName.toLowerCase()) .filter(el => el !== element) .some(el => classes.every(cls => normalizeClasses(el.classList).split(' ').includes(cls) )) : false; if (!hasConflictingElement) { return `${tagName}[${classSelector}]`; } else { const position = getSiblingPosition(element, parent); return `${tagName}[${classSelector}][${position}]`; } } if (!addPositionToAll) { const meaningfulAttrs = ['role', 'type']; for (const attrName of meaningfulAttrs) { if (element.hasAttribute(attrName)) { const value = element.getAttribute(attrName).replace(/'/g, "\\'"); const isCommon = isAttributeCommonAcrossLists( element, attrName, value, otherListElements ); if (isCommon) { return `${tagName}[@${attrName}='${value}']`; } } } } const position = getSiblingPosition(element, parent); if (addPositionToAll || classes.length === 0) { return `${tagName}[${position}]`; } return tagName; } /** * Get common classes across list items */ function getCommonClassesAcrossLists(targetElement, otherListElements) { if (otherListElements.length === 0) { return normalizeClasses(targetElement.classList).split(' ').filter(Boolean); } const targetClasses = normalizeClasses(targetElement.classList).split(' ').filter(Boolean); if (targetClasses.length === 0) { return []; } const cacheKey = `${targetElement.tagName}_${targetClasses.join(',')}_${otherListElements.length}`; if (classCache.has(cacheKey)) { return classCache.get(cacheKey); } const targetClassSet = new Set(targetClasses); const similarElements = []; const maxElementsToCheck = 100; let checkedElements = 0; for (const listEl of otherListElements) { if (checkedElements >= maxElementsToCheck) break; const descendants = getAllDescendantsIncludingShadow(listEl); for (const child of descendants) { if (checkedElements >= maxElementsToCheck) break; if (child.tagName === targetElement.tagName) { similarElements.push(child); checkedElements++; } } } if (similarElements.length === 0) { classCache.set(cacheKey, targetClasses); return targetClasses; } const exactMatches = similarElements.filter(el => { const elClasses = normalizeClasses(el.classList).split(' ').filter(Boolean); if (elClasses.length !== targetClasses.length) return false; return elClasses.every(cls => targetClassSet.has(cls)); }); if (exactMatches.length > 0) { classCache.set(cacheKey, targetClasses); return targetClasses; } const commonClasses = []; for (const targetClass of targetClasses) { const existsInAllOtherLists = otherListElements.every(listEl => { const elementsInThisList = getAllDescendantsIncludingShadow(listEl).filter(child => child.tagName === targetElement.tagName ); return elementsInThisList.some(el => normalizeClasses(el.classList).split(' ').includes(targetClass) ); }); if (existsInAllOtherLists) { commonClasses.push(targetClass); } } classCache.set(cacheKey, commonClasses); return commonClasses; } /** * Normalize class names by removing dynamic parts */ function normalizeClasses(classList) { return Array.from(classList) .filter(cls => { return ( !cls.match(/\d{3,}|uuid|hash|id-|_\d+$/i) && !cls.startsWith('_ngcontent-') && !cls.startsWith('_nghost-') && !cls.match(/^ng-tns-c\d+-\d+$/) ); }) .sort() .join(' '); } /** * Check if attribute is common across lists */ function isAttributeCommonAcrossLists(targetElement, attrName, attrValue, otherListElements) { if (otherListElements.length === 0) { return true; } const targetPath = getElementPath(targetElement); for (const otherListElement of otherListElements) { const correspondingElement = findCorrespondingElement(otherListElement, targetPath); if (correspondingElement) { const otherValue = correspondingElement.getAttribute(attrName); if (otherValue !== attrValue) { return false; } } } return true; } /** * Get element path as indices */ function getElementPath(element) { const path = []; let current = element; while (current && current.parentElement) { const siblings = Array.from(current.parentElement.children); path.unshift(siblings.indexOf(current)); current = current.parentElement; } return path; } /** * Find corresponding element in another list */ function findCorrespondingElement(rootElement, path) { let current = rootElement; for (const index of path) { const children = Array.from(current.children); if (index >= children.length) { return null; } current = children[index]; } return current; } /** * Get sibling position */ function getSiblingPosition(element, parent) { const siblings = Array.from(parent.children || []).filter( child => child.tagName === element.tagName ); return siblings.indexOf(element) + 1; } /** * Query elements in scope (handles shadow DOM) */ function queryElementsInScope(rootElement, tagName) { if (rootElement.shadowRoot || isInShadowDOM(rootElement)) { return deepQuerySelectorAll(rootElement, tagName); } else { return Array.from(rootElement.querySelectorAll(tagName)); } } /** * Check if element is in shadow DOM */ function isInShadowDOM(element) { return element.getRootNode() instanceof ShadowRoot; } /** * Deep query selector for shadow DOM */ function deepQuerySelectorAll(root, selector) { const elements = []; function process(node) { if (node instanceof Element && node.matches(selector)) { elements.push(node); } for (const child of node.children) { process(child); } if (node instanceof HTMLElement && node.shadowRoot) { process(node.shadowRoot); } } process(root); return elements; } /** * Check if container contains element (works with shadow DOM) */ function elementContains(container, element) { if (container.contains(element)) { return true; } let current = element; while (current) { if (current === container) { return true; } current = current.parentElement || ((current.getRootNode()).host); } return false; } /** * Generate fallback XPath using data-mx-id */ function generateMandatoryChildFallbackXPath(childElement, parentElement) { try { const parentMxId = parentElement.getAttribute('data-mx-id'); const childMxId = childElement.getAttribute('data-mx-id'); if (!parentMxId) { return null; } const parentTagName = parentElement.tagName.toLowerCase(); const childTagName = childElement.tagName.toLowerCase(); if (childMxId) { return `//${parentTagName}[@data-mx-id='${parentMxId}']//${childTagName}[@data-mx-id='${childMxId}']`; } else { const pathElements = getMandatoryFallbackPath(childElement, parentElement); if (pathElements.length > 0) { const parentPath = `//${parentTagName}[@data-mx-id='${parentMxId}']`; const childPath = pathElements.join('/'); return `${parentPath}/${childPath}`; } } return null; } catch (error) { return null; } } /** * Build mandatory fallback path using data-mx-id */ function getMandatoryFallbackPath(targetElement, rootElement) { const pathParts = []; let current = targetElement; while (current && current !== rootElement && current.parentElement) { const mxId = current.getAttribute('data-mx-id'); const tagName = current.tagName.toLowerCase(); if (mxId) { pathParts.unshift(`${tagName}[@data-mx-id='${mxId}']`); } else { const position = Array.from(current.parentElement.children) .filter(child => child.tagName === current.tagName) .indexOf(current) + 1; pathParts.unshift(`${tagName}[${position}]`); } current = current.parentElement; } return pathParts; } /** * Evaluate XPath and return elements */ function evaluateXPath(xpath, contextNode) { try { const doc = contextNode instanceof ShadowRoot ? contextNode.host.ownerDocument : contextNode; const result = doc.evaluate( xpath, contextNode, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ); const elements = []; for (let i = 0; i < result.snapshotLength; i++) { const node = result.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { elements.push(node); } } return elements; } catch (error) { return []; } } /** * Create fields from selectors by evaluating them and extracting data */ function createFieldsFromSelectors(selectorObjects, listElements, parentSelector) { const candidates = []; for (const selectorObj of selectorObjects) { try { const elements = evaluateXPath(selectorObj.primary, document); if (elements.length === 0) continue; const element = elements[0]; const tagName = element.tagName.toLowerCase(); if (tagName === 'a') { const href = element.getAttribute('href'); const text = (element.textContent || '').trim(); if (text) { const textField = createFieldData(element, selectorObj.primary, 'innerText'); if (textField && textField.data) { candidates.push({ field: textField, element: element, position: getElementPosition(element) }); } } if (href && href !== '#' && !href.startsWith('javascript:')) { const hrefField = createFieldData(element, selectorObj.primary, 'href'); if (hrefField && hrefField.data) { candidates.push({ field: hrefField, element: element, position: getElementPosition(element) }); } } } else { const field = createFieldData(element, selectorObj.primary); if (field && field.data) { candidates.push({ field: field, element: element, position: getElementPosition(element) }); } } } catch (error) { } } const filtered = removeParentChildDuplicates(candidates); filtered.sort((a, b) => { if (Math.abs(a.position.y - b.position.y) > 5) { return a.position.y - b.position.y; } return a.position.x - b.position.x; }); return removeDuplicateContentAndFormat(filtered); } /** * Create field data from element */ function createFieldData(element, selector, forceAttribute) { const tagName = element.tagName.toLowerCase(); let data = ''; let attribute = forceAttribute || 'innerText'; if (forceAttribute) { if (forceAttribute === 'href') { data = element.getAttribute('href') || ''; } else if (forceAttribute === 'innerText') { data = (element.textContent || '').trim(); } } else if (tagName === 'img') { data = element.getAttribute('src') || ''; attribute = 'src'; } else if (tagName === 'a') { const href = element.getAttribute('href') || ''; const text = (element.textContent || '').trim(); if (href && href !== '#' && !href.startsWith('javascript:')) { data = href; attribute = 'href'; } else if (text) { data = text; attribute = 'innerText'; } } else { data = (element.textContent || '').trim(); attribute = 'innerText'; } if (!data) { return null; } const isShadow = element.getRootNode() instanceof ShadowRoot; return { data: data, selectorObj: { selector: selector, attribute: attribute, tag: tagName.toUpperCase(), isShadow: isShadow } }; } /** * Get element position */ function getElementPosition(element) { const rect = element.getBoundingClientRect(); return { x: rect.left, y: rect.top }; } /** * Remove parent-child duplicates */ function removeParentChildDuplicates(candidates) { const filtered = []; for (const candidate of candidates) { let shouldInclude = true; const tagName = candidate.element.tagName.toLowerCase(); for (const existing of filtered) { if (candidate.element.contains(existing.element)) { shouldInclude = false; break; } else if (existing.element.contains(candidate.element)) { const existingIndex = filtered.indexOf(existing); filtered.splice(existingIndex, 1); break; } } if (tagName === 'a' || tagName === 'img') { shouldInclude = true; } if (shouldInclude) { filtered.push(candidate); } } return filtered; } /** * Remove duplicate content and format for workflow */ function removeDuplicateContentAndFormat(candidates) { const finalFields = {}; const seenContent = new Set(); const seenSelectors = new Set(); let labelCounter = 1; for (const candidate of candidates) { const content = candidate.field.data.trim().toLowerCase(); const selectorKey = `${candidate.field.selectorObj.selector}::${candidate.field.selectorObj.attribute}`; if (!seenContent.has(content) && !seenSelectors.has(selectorKey)) { seenContent.add(content); seenSelectors.add(selectorKey); const fieldName = `Label ${labelCounter}`; finalFields[fieldName] = { selector: candidate.field.selectorObj.selector, attribute: candidate.field.selectorObj.attribute, tag: candidate.field.selectorObj.tag, isShadow: candidate.field.selectorObj.isShadow }; labelCounter++; } } return finalFields; } /** * Auto-detect pagination type and selector * Returns: { type: string, selector: string | null } * Types: 'scrollDown', 'scrollUp', 'clickNext', 'clickLoadMore', '' */ window.autoDetectPagination = function (listSelector, options) { try { var MAX_BUTTON_TEXT_LENGTH = 50; var nextButtonTextPatterns = [ /^\s*next\s*$/i, /\bnext\s+page\b/i, /\bpage\s+suivante\b/i, /\bsiguiente\b/i, /\bweiter\b/i, /\bnächste\b/i, /\bvolgende\b/i, /\bpróximo\b/i, /\bavanti\b/i, ]; var nextButtonArrowPatterns = [ /^[>\s›→»⟩]+$/, /^>>$/, ]; var loadMorePatterns = [ /^\s*load\s+more\s*$/i, /^\s*show\s+more\s*$/i, /^\s*view\s+more\s*$/i, /^\s*see\s+more\s*$/i, /^\s*more\s+results\s*$/i, /^\s*plus\s+de\s+résultats\s*$/i, /^\s*más\s+resultados\s*$/i, /^\s*weitere\s+ergebnisse\s*$/i, /^\s*meer\s+laden\s*$/i, /^\s*carica\s+altri\s*$/i, /^\s*carregar\s+mais\s*$/i, ]; var paginationContainerPattern = /paginat|page-nav|pager|page-numbers|page-list/i; // --- Utility functions --- function matchesAnyPattern(text, patterns) { return patterns.some(function (pattern) { return pattern.test(text); }); } function isVisible(element) { try { var style = window.getComputedStyle(element); return style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0' && element.offsetWidth > 0 && element.offsetHeight > 0; } catch (e) { return false; } } function getClickableElements(root) { var clickables = []; var selectors = ['button', 'a', '[role="button"]', '[onclick]', '.btn', '.button']; for (var i = 0; i < selectors.length; i++) { var elements = root.querySelectorAll(selectors[i]); clickables.push.apply(clickables, Array.from(elements)); } if (root !== document && (root.tagName === 'BUTTON' || root.tagName === 'A' || root.getAttribute('role') === 'button')) { clickables.push(root); } return Array.from(new Set(clickables)); } function isNearList(element, listCont) { try { var listRect = listCont.getBoundingClientRect(); var elementRect = element.getBoundingClientRect(); if (elementRect.top >= listRect.bottom && elementRect.top <= listRect.bottom + 300) { return true; } if (elementRect.bottom <= listRect.top && elementRect.bottom >= listRect.top - 200) { return true; } var verticalOverlap = !(elementRect.bottom < listRect.top || elementRect.top > listRect.bottom); if (verticalOverlap) { var horizontalDistance = Math.min( Math.abs(elementRect.left - listRect.right), Math.abs(elementRect.right - listRect.left) ); if (horizontalDistance < 150) return true; } return false; } catch (e) { return false; } } function isSkippable(element, listCont) { if (listCont.contains(element)) return true; if (element.hasAttribute('disabled') || element.getAttribute('aria-disabled') === 'true') return true; return false; } function isNextButton(text, ariaLabel, combinedText) { if (matchesAnyPattern(combinedText, nextButtonTextPatterns)) return true; if (text.length <= 3 && matchesAnyPattern(text, nextButtonArrowPatterns)) return true; if (!text.trim() && matchesAnyPattern(ariaLabel, nextButtonTextPatterns)) return true; return false; } function generatePaginationSelector(element) { try { element.scrollIntoView({ behavior: 'instant', block: 'center', inline: 'center' }); } catch (e) { } var rect = element.getBoundingClientRect(); var coordinates = { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }; var result = getSelectors(document, coordinates); var selectorChain = []; if (result.primary) { if (result.primary.id) selectorChain.push(result.primary.id); if (result.primary.testIdSelector) selectorChain.push(result.primary.testIdSelector); if (result.primary.relSelector) selectorChain.push(result.primary.relSelector); if (result.primary.accessibilitySelector) selectorChain.push(result.primary.accessibilitySelector); if (result.primary.hrefSelector) selectorChain.push(result.primary.hrefSelector); if (result.primary.formSelector) selectorChain.push(result.primary.formSelector); if (result.primary.attrSelector) selectorChain.push(result.primary.attrSelector); if (result.primary.generalSelector) selectorChain.push(result.primary.generalSelector); } return selectorChain.length > 0 ? selectorChain.join(',') : element.tagName.toLowerCase(); } /** * Comprehensive selector generator (based on @medv/finder) * Supports shadow DOM, iframes, and multiple selector strategies */ function getSelectors(iframeDoc, coordinates) { try { // ===== FINDER ALGORITHM ===== // Based on @medv/finder by Anton Medvedev // https://github.com/antonmedv/finder/blob/master/finder.ts const Limit = { All: 0, Two: 1, One: 2 }; let config; let rootDocument; function finder(input, options) { if (input.nodeType !== Node.ELEMENT_NODE) { throw new Error("Can't generate CSS selector for non-element node type."); } if ('html' === input.tagName.toLowerCase()) { return 'html'; } const defaults = { root: iframeDoc.body, idName: function (name) { return true; }, className: function (name) { return true; }, tagName: function (name) { return true; }, attr: function (name, value) { return false; }, seedMinLength: 1, optimizedMinLength: 2, threshold: 900, maxNumberOfTries: 9000 }; config = Object.assign({}, defaults, options || {}); rootDocument = findRootDocument(config.root, defaults); let path = bottomUpSearch(input, Limit.All, function () { return bottomUpSearch(input, Limit.Two, function () { return bottomUpSearch(input, Limit.One); }); }); if (path) { const optimized = sort(optimize(path, input)); if (optimized.length > 0) { path = optimized[0]; } return selector(path); } else { throw new Error('Selector was not found.'); } } function findRootDocument(rootNode, defaults) { if (rootNode.nodeType === Node.DOCUMENT_NODE) { return rootNode; } if (rootNode === defaults.root) { return rootNode.ownerDocument; } return rootNode; } function bottomUpSearch(input, limit, fallback) { let path = null; let stack = []; let current = input; let i = 0; while (current && current !== config.root.parentElement) { let level = maybe(id(current)) || maybe.apply(null, attr(current)) || maybe.apply(null, classNames(current)) || maybe(tagName(current)) || [any()]; const nth = index(current); if (limit === Limit.All) { if (nth) { level = level.concat( level.filter(dispensableNth).map(function (node) { return nthChild(node, nth); }) ); } } else if (limit === Limit.Two) { level = level.slice(0, 1); if (nth) { level = level.concat( level.filter(dispensableNth).map(function (node) { return nthChild(node, nth); }) ); } } else if (limit === Limit.One) { const node = level[0]; level = level.slice(0, 1); if (nth && dispensableNth(node)) { level = [nthChild(node, nth)]; } } for (let j = 0; j < level.length; j++) { level[j].level = i; } stack.push(level); if (stack.length >= config.seedMinLength) { path = findUniquePath(stack, fallback); if (path) { break; } } current = current.parentElement; i++; } if (!path) { path = findUniquePath(stack, fallback); } return path; } function findUniquePath(stack, fallback) { const paths = sort(combinations(stack)); if (paths.length > config.threshold) { return fallback ? fallback() : null; } for (let i = 0; i < paths.length; i++) { if (unique(paths[i])) { return paths[i]; } } return null; } function selector(path) { let node = path[0]; let query = node.name; for (let i = 1; i < path.length; i++) { const level = path[i].level || 0; if (node.level === level - 1) { query = path[i].name + ' > ' + query; } else { query = path[i].name + ' ' + query; } node = path[i]; } return query; } function penalty(path) { return path.map(function (node) { return node.penalty; }) .reduce(function (acc, i) { return acc + i; }, 0); } function unique(path) { const elements = rootDocument.querySelectorAll(selector(path)); switch (elements.length) { case 0: throw new Error("Can't select any node with this selector: " + selector(path)); case 1: return true; default: return false; } } function id(input) { const elementId = input.getAttribute('id'); if (elementId && config.idName(elementId)) { return { name: '#' + cssesc(elementId, { isIdentifier: true }), penalty: 0 }; } return null; } function attr(input) { const attrs = Array.from(input.attributes).filter(function (attr) { return config.attr(attr.name, attr.value) && attr.name !== 'data-mx-id'; }); return attrs.map(function (attr) { let attrValue = attr.value; if (attr.name === 'href' && attr.value.includes('://')) { try { const url = new URL(attr.value); const siteOrigin = url.protocol + '//' + url.host; attrValue = attr.value.replace(siteOrigin, ''); } catch (e) { // Keep original if URL parsing fails } } return { name: '[' + cssesc(attr.name, { isIdentifier: true }) + '="' + cssesc(attrValue) + '"]', penalty: 0.5 }; }); } function classNames(input) { const names = Array.from(input.classList).filter(config.className); return names.map(function (name) { return { name: '.' + cssesc(name, { isIdentifier: true }), penalty: 1 }; }); } function tagName(input) { const name = input.tagName.toLowerCase(); if (config.tagName(name)) { return { name: name, penalty: 2 }; } return null; } function any() { return { name: '*', penalty: 3 }; } function index(input) { const parent = input.parentNode; if (!parent) { return null; } let child = parent.firstChild; if (!child) { return null; } let i = 0; while (child) { if (child.nodeType === Node.ELEMENT_NODE) { i++; } if (child === input) { break; } child = child.nextSibling; } return i; } function nthChild(node, i) { return { name: node.name + ':nth-child(' + i + ')', penalty: node.penalty + 1 }; } function dispensableNth(node) { return node.name !== 'html' && !node.name.startsWith('#'); } function maybe() { const args = Array.prototype.slice.call(arguments); const list = args.filter(notEmpty); if (list.length > 0) { return list; } return null; } function notEmpty(value) { return value !== null && value !== undefined; } function combinations(stack, path) { path = path || []; const results = []; function* generate(s, p) { if (s.length > 0) { for (let i = 0; i < s[0].length; i++) { yield* generate(s.slice(1), p.concat(s[0][i])); } } else { yield p; } } const gen = generate(stack, path); let next = gen.next(); while (!next.done) { results.push(next.value); next = gen.next(); } return results; } function sort(paths) { return Array.from(paths).sort(function (a, b) { return penalty(a) - penalty(b); }); } function* optimize(path, input, scope) { scope = scope || { counter: 0, visited: new Map() }; if (path.length > 2 && path.length > config.optimizedMinLength) { for (let i = 1; i < path.length - 1; i++) { if (scope.counter > config.maxNumberOfTries) { return; } scope.counter += 1; const newPath = path.slice(); newPath.splice(i, 1); const newPathKey = selector(newPath); if (scope.visited.has(newPathKey)) { continue; } try { if (unique(newPath) && same(newPath, input)) { yield newPath; scope.visited.set(newPathKey, true); yield* optimize(newPath, input, scope); } } catch (e) { continue; } } } } function same(path, input) { return rootDocument.querySelector(selector(path)) === input; } // ===== CSSESC UTILITY ===== const regexAnySingleEscape = /[ -,\.\/:-@\[-\^`\{-~]/; const regexSingleEscape = /[ -,\.\/:-@\[\]\^`\{-~]/; const regexExcessiveSpaces = /(^|\\+)?(\\[A-F0-9]{1,6})\x20(?![a-fA-F0-9\x20])/g; const defaultCssEscOptions = { escapeEverything: false, isIdentifier: false, quotes: 'single', wrap: false }; function cssesc(string, opt) { const options = Object.assign({}, defaultCssEscOptions, opt || {}); if (options.quotes != 'single' && options.quotes != 'double') { options.quotes = 'single'; } const quote = options.quotes == 'double' ? '"' : "'"; const isIdentifier = options.isIdentifier; const firstChar = string.charAt(0); let output = ''; let counter = 0; const length = string.length; while (counter < length) { const character = string.charAt(counter++); let codePoint = character.charCodeAt(0); let value = undefined; if (codePoint < 0x20 || codePoint > 0x7e) { if (codePoint >= 0xd800 && codePoint <= 0xdbff && counter < length) { const extra = string.charCodeAt(counter++); if ((extra & 0xfc00) == 0xdc00) { codePoint = ((codePoint & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; } else { counter--; } } value = '\\' + codePoint.toString(16).toUpperCase() + ' '; } else { if (options.escapeEverything) { if (regexAnySingleEscape.test(character)) { value = '\\' + character; } else { value = '\\' + codePoint.toString(16).toUpperCase() + ' '; } } else if (/[\t\n\f\r\x0B]/.test(character)) { value = '\\' + codePoint.toString(16).toUpperCase() + ' '; } else if ( character == '\\' || (!isIdentifier && ((character == '"' && quote == character) || (character == "'" && quote == character))) || (isIdentifier && regexSingleEscape.test(character)) ) { value = '\\' + character; } else { value = character; } } output += value; } if (isIdentifier) { if (/^-[-\d]/.test(output)) { output = '\\-' + output.slice(1); } else if (/\d/.test(firstChar)) { output = '\\3' + firstChar + ' ' + output.slice(1); } } output = output.replace(regexExcessiveSpaces, function ($0, $1, $2) { if ($1 && $1.length % 2) { return $0; } return ($1 || '') + $2; }); if (!isIdentifier && options.wrap) { return quote + output + quote; } return output; } function getDeepestElementFromPoint(x, y) { let elements = iframeDoc.elementsFromPoint(x, y); if (!elements || elements.length === 0) return null; const dialogElement = elements.find(function (el) { return el.getAttribute('role') === 'dialog'; }); if (dialogElement) { const dialogElements = elements.filter(function (el) { return el === dialogElement || dialogElement.contains(el); }); const findDeepestInDialog = function (elems) { if (!elems.length) return null; if (elems.length === 1) return elems[0]; let deepestElement = elems[0]; let maxDepth = 0; for (let i = 0; i < elems.length; i++) { let depth = 0; let current = elems[i]; while (current && current.parentElement && current !== dialogElement.parentElement) { depth++; current = current.parentElement; } if (depth > maxDepth) { maxDepth = depth; deepestElement = elems[i]; } } return deepestElement; }; return findDeepestInDialog(dialogElements); } const findDeepestElement = function (elems) { if (!elems.length) return null; if (elems.length === 1) return elems[0]; for (let i = 0; i < Math.min(3, elems.length); i++) { const element = elems[i]; const style = window.getComputedStyle(element); const zIndex = parseInt(style.zIndex) || 0; if ((style.position === 'fixed' || style.position === 'absolute') && zIndex > 50) { return element; } if (element.tagName === 'SVG' && i < 2) { return element; } } let deepestElement = elems[0]; let maxDepth = 0; for (let i = 0; i < elems.length; i++) { let depth = 0; let current = elems[i]; while (current) { depth++; if (current.parentElement) { current = current.parentElement; } else { break; } } if (depth > maxDepth) { maxDepth = depth; deepestElement = elems[i]; } } return deepestElement; }; let deepestElement = findDeepestElement(elements); if (!deepestElement) return null; const traverseShadowDOM = function (element) { let current = element; let shadowRoot = current.shadowRoot; let deepest = current; let depth = 0; const MAX_SHADOW_DEPTH = 4; while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint(x, y); if (!shadowElement || shadowElement === current) break; deepest = shadowElement; current = shadowElement; shadowRoot = current.shadowRoot; depth++; } return deepest; }; deepestElement = traverseShadowDOM(deepestElement); return deepestElement; } // ===== SELECTOR GENERATION ===== function genAttributeSet(element, attributes) { return new Set( attributes.filter(function (attr) { const attrValue = element.getAttribute(attr); return attrValue != null && attrValue.length > 0; }) ); } function isAttributesDefined(element, attributes) { return genAttributeSet(element, attributes).size > 0; } function genValidAttributeFilter(element, attributes) { const attrSet = genAttributeSet(element, attributes); return function (name) { return attrSet.has(name); }; } function genSelectorForAttributes(element, attributes) { let selector = null; try { if (attributes.includes('rel') && element.hasAttribute('rel')) { const relValue = element.getAttribute('rel'); return '[rel="' + relValue + '"]'; } selector = isAttributesDefined(element, attributes) ? finder(element, { idName: function () { return false; }, attr: genValidAttributeFilter(element, attributes) }) : null; } catch (e) { } return selector; } function isCharacterNumber(char) { return char && char.length === 1 && /[0-9]/.test(char); } function generateMandatoryCSSFallback(element) { const mxId = Math.floor(Math.random() * 10000).toString(); element.setAttribute('data-mx-id', mxId); return element.tagName.toLowerCase() + '[data-mx-id="' + mxId + '"]'; } function genSelectors(element) { if (element == null) { return null; } const href = element.getAttribute('href'); let generalSelector = null; try { generalSelector = finder(element); } catch (e) { } let attrSelector = null; try { attrSelector = finder(element, { attr: function () { return true; } }); } catch (e) { } const relSelector = genSelectorForAttributes(element, ['rel']); const hrefSelector = genSelectorForAttributes(element, ['href']); const formSelector = genSelectorForAttributes(element, ['name', 'placeholder', 'for']); const accessibilitySelector = genSelectorForAttributes(element, ['aria-label', 'alt', 'title']); const testIdSelector = genSelectorForAttributes(element, [ 'data-testid', 'data-test-id', 'data-testing', 'data-test', 'data-qa', 'data-cy' ]); let idSelector = null; try { const elementId = element.getAttribute('id'); idSelector = isAttributesDefined(element, ['id']) && !isCharacterNumber(elementId ? elementId[0] : '') ? finder(element, { attr: function (name) { return name === 'id'; } }) : null; } catch (e) { } return { id: idSelector, generalSelector: generalSelector, attrSelector: attrSelector, testIdSelector: testIdSelector, text: element.innerText, href: href || undefined, hrefSelector: hrefSelector, accessibilitySelector: accessibilitySelector, formSelector: formSelector, relSelector: relSelector, iframeSelector: null, shadowSelector: null }; } const hoveredElement = getDeepestElementFromPoint(coordinates.x, coordinates.y); if (hoveredElement != null) { const parentElement = hoveredElement.parentElement; const element = (parentElement && parentElement.tagName === 'A') ? parentElement : hoveredElement; const generatedSelectors = genSelectors(element); return { primary: generatedSelectors }; } } catch (e) { } return { primary: null }; } // --- Structural detection helpers --- function containsNumericPageLinks(container) { var links = container.querySelectorAll('a, button, [role="button"]'); var numbers = []; for (var i = 0; i < links.length; i++) { var text = (links[i].textContent || '').trim(); if (/^\d+$/.test(text)) { numbers.push(parseInt(text, 10)); } } if (numbers.length < 2) return false; numbers.sort(function (a, b) { return a - b; }); for (var j = 0; j < numbers.length - 1; j++) { if (numbers[j + 1] - numbers[j] === 1) return true; } return false; } function containsPaginationLinks(container) { var links = container.querySelectorAll('a, button, [role="button"]'); var numericCount = 0; var hasNextPrev = false; for (var i = 0; i < links.length; i++) { var text = (links[i].textContent || '').trim(); if (/^\d+$/.test(text)) numericCount++; if (matchesAnyPattern(text, nextButtonTextPatterns)) hasNextPrev = true; if (matchesAnyPattern(text, loadMorePatterns)) hasNextPrev = true; } return numericCount >= 2 || hasNextPrev; } function getListContainer(listElements) { if (listElements.length === 0) return listElements[0]; var firstParent = listElements[0].parentElement; if (!firstParent) return listElements[0]; var allShareParent = listElements.every(function (el) { return el.parentElement === firstParent; }); if (allShareParent) return firstParent; var ancestor = firstParent; while (ancestor) { var a = ancestor; if (listElements.every(function (el) { return a.contains(el); })) { return ancestor; } ancestor = ancestor.parentElement; } return firstParent; } function findPaginationContainer(listCont) { var scope = listCont.parentElement; var MAX_LEVELS = 4; for (var level = 0; level < MAX_LEVELS && scope; level++) { var children = Array.from(scope.children); for (var i = 0; i < children.length; i++) { var child = children[i]; if (child === listCont || child.contains(listCont) || listCont.contains(child)) continue; if (!isVisible(child)) continue; var classAndLabel = (child.className || '') + ' ' + (child.getAttribute('aria-label') || '') + ' ' + (child.getAttribute('role') || ''); if (paginationContainerPattern.test(classAndLabel)) { return child; } if (child.tagName === 'NAV') { if (containsPaginationLinks(child)) { return child; } } if (containsNumericPageLinks(child)) { return child; } } scope = scope.parentElement; } return null; } function findLastPageLink(container) { var links = Array.from(container.querySelectorAll('a, button, [role="button"]')); for (var i = 0; i < links.length; i++) { var link = links[i]; var isActive = link.getAttribute('aria-current') === 'page' || link.classList.contains('active') || link.classList.contains('current') || link.classList.contains('selected') || (link.closest('[aria-current="page"]') !== null); if (isActive && i + 1 < links.length) { return links[i + 1]; } } return null; } // --- Phase functions --- function detectFromPaginationWrapper(wrapper) { var clickables = getClickableElements(wrapper); var nextBtn = null; var nextScore = 0; var loadMoreBtn = null; var lmScore = 0; for (var i = 0; i < clickables.length; i++) { var element = clickables[i]; if (!isVisible(element)) continue; if (element.hasAttribute('disabled') || element.getAttribute('aria-disabled') === 'true') continue; var text = (element.textContent || '').trim(); var ariaLabel = element.getAttribute('aria-label') || ''; var title = element.getAttribute('title') || ''; if (text.length > MAX_BUTTON_TEXT_LENGTH) continue; var combinedText = text + ' ' + ariaLabel + ' ' + title; if (matchesAnyPattern(combinedText, loadMorePatterns)) { if (20 > lmScore) { lmScore = 20; loadMoreBtn = element; } } if (isNextButton(text, ariaLabel, combinedText)) { if (20 > nextScore) { nextScore = 20; nextBtn = element; } } } var hasNumberedPages = containsNumericPageLinks(wrapper); if (loadMoreBtn) { return { type: 'clickLoadMore', selector: generatePaginationSelector(loadMoreBtn), confidence: 'high' }; } if (nextBtn) { return { type: 'clickNext', selector: generatePaginationSelector(nextBtn), confidence: 'high' }; } if (hasNumberedPages) { var lastLink = findLastPageLink(wrapper); if (lastLink) { return { type: 'clickNext', selector: generatePaginationSelector(lastLink), confidence: 'medium' }; } } return null; } function detectFromNearbyElements(listCont) { var clickables = getClickableElements(document); var nextBtn = null; var nextScore = 0; var loadMoreBtn = null; var lmScore = 0; for (var i = 0; i < clickables.length; i++) { var element = clickables[i]; if (!isVisible(element)) continue; if (isSkippable(element, listCont)) continue; var text = (element.textContent || '').trim(); var ariaLabel = element.getAttribute('aria-label') || ''; var title = element.getAttribute('title') || ''; if (text.length > MAX_BUTTON_TEXT_LENGTH) continue; var combinedText = text + ' ' + ariaLabel + ' ' + title; if (!isNearList(element, listCont)) continue; if (matchesAnyPattern(combinedText, loadMorePatterns)) { var score = 15; if (element.tagName === 'BUTTON') score += 2; var className = element.className || ''; if (paginationContainerPattern.test(className)) score += 3; if (score > lmScore) { lmScore = score; loadMoreBtn = element; } } if (isNextButton(text, ariaLabel, combinedText)) { var nScore = 15; if (element.tagName === 'BUTTON') nScore += 2; var cn = element.className || ''; if (paginationContainerPattern.test(cn)) nScore += 3; try { var pagAnc = element.closest('[class*="paginat"], [class*="pager"], [aria-label*="paginat" i]'); if (pagAnc) nScore += 5; } catch (e) { } if (nScore > nextScore) { nextScore = nScore; nextBtn = element; } } } if (loadMoreBtn && lmScore >= 15) { var conf = lmScore >= 18 ? 'high' : 'medium'; return { type: 'clickLoadMore', selector: generatePaginationSelector(loadMoreBtn), confidence: conf }; } if (nextBtn && nextScore >= 15) { var nConf = nextScore >= 18 ? 'high' : 'medium'; return { type: 'clickNext', selector: generatePaginationSelector(nextBtn), confidence: nConf }; } return null; } function detectInfiniteScrollScore() { try { var score = 0; var initialHeight = document.documentElement.scrollHeight; var viewportHeight = window.innerHeight; if (initialHeight <= viewportHeight) return 0; var sentinelPatterns = [ '[data-infinite]', '[data-scroll-trigger]', '#infinite-scroll-trigger', '[class*="infinite-scroll"]', '[id*="infinite-scroll"]', ]; for (var i = 0; i < sentinelPatterns.length; i++) { if (document.querySelector(sentinelPatterns[i])) { score += 6; break; } } var infiniteScrollLibraries = [ '.infinite-scroll', '[data-infinite-scroll]', '[class*="infinite-scroll"]', ]; for (var j = 0; j < infiniteScrollLibraries.length; j++) { if (document.querySelector(infiniteScrollLibraries[j])) { score += 6; break; } } var scrollToTopPatterns = [ '[aria-label*="scroll to top" i]', '[title*="back to top" i]', '.back-to-top', '#back-to-top', '[class*="scrolltop"]', '[class*="backtotop"]', ]; for (var k = 0; k < scrollToTopPatterns.length; k++) { try { var el = document.querySelector(scrollToTopPatterns[k]); if (el && isVisible(el)) { score += 2; break; } } catch (e) { continue; } } if (initialHeight > viewportHeight * 5) score += 2; return score; } catch (e) { return 0; } } function detectFromFullDocument(listCont) { var clickables = getClickableElements(document); var nextBtn = null; var nextScore = 0; var loadMoreBtn = null; var lmScore = 0; for (var i = 0; i < clickables.length; i++) { var element = clickables[i]; if (!isVisible(element)) continue; if (isSkippable(element, listCont)) continue; var text = (element.textContent || '').trim(); var ariaLabel = element.getAttribute('aria-label') || ''; var title = element.getAttribute('title') || ''; if (text.length > MAX_BUTTON_TEXT_LENGTH) continue; var combinedText = text + ' ' + ariaLabel + ' ' + title; var nearList = isNearList(element, listCont); if (matchesAnyPattern(combinedText, loadMorePatterns)) { var score = 10; if (nearList) score += 5; if (element.tagName === 'BUTTON') score += 2; if (score > lmScore) { lmScore = score; loadMoreBtn = element; } } if (isNextButton(text, ariaLabel, combinedText)) { var nScore = 10; if (nearList) nScore += 5; if (element.tagName === 'BUTTON') nScore += 2; if (nScore > nextScore) { nextScore = nScore; nextBtn = element; } } } if (loadMoreBtn && lmScore >= 10) { var conf = lmScore >= 15 ? 'medium' : 'low'; return { type: 'clickLoadMore', selector: generatePaginationSelector(loadMoreBtn), confidence: conf }; } if (nextBtn && nextScore >= 10) { var nConf = nextScore >= 15 ? 'medium' : 'low'; return { type: 'clickNext', selector: generatePaginationSelector(nextBtn), confidence: nConf }; } return null; } var listElements = evaluateSelector(listSelector, document); if (listElements.length === 0) { return { type: '', selector: null, confidence: 'low', debug: 'No list elements found' }; } var listContainer = getListContainer(listElements); var paginationWrapper = findPaginationContainer(listContainer); if (paginationWrapper) { var scopedResult = detectFromPaginationWrapper(paginationWrapper); if (scopedResult) return scopedResult; } var nearbyResult = detectFromNearbyElements(listContainer); if (nearbyResult) return nearbyResult; var infiniteScrollScore = (options && options.disableScrollDetection) ? 0 : detectInfiniteScrollScore(); if (infiniteScrollScore >= 8) { var confidence = infiniteScrollScore >= 15 ? 'high' : infiniteScrollScore >= 12 ? 'medium' : 'low'; return { type: 'scrollDown', selector: null, confidence: confidence }; } var fallbackResult = detectFromFullDocument(listContainer); if (fallbackResult) return fallbackResult; return { type: '', selector: null, confidence: 'low', debug: { listElementsCount: listElements.length, paginationWrapperFound: !!paginationWrapper, infiniteScrollScore: infiniteScrollScore } }; } catch (error) { return { type: '', selector: null, confidence: 'low', error: error.message, debug: 'Exception thrown: ' + error.message }; } }; /** * Analyze element groups on the page * Returns grouped elements with their structural fingerprints */ window.analyzeElementGroups = function() { try { const normalizeClasses = (classList) => { return Array.from(classList) .filter((cls) => { return ( !cls.match(/\d{3,}|uuid|hash|id-|_\d+$/i) && !cls.startsWith('_ngcontent-') && !cls.startsWith('_nghost-') && !cls.match(/^ng-tns-c\d+-\d+$/) ); }) .sort() .join(' '); }; const getStructuralFingerprint = (element) => { if (element.nodeType !== Node.ELEMENT_NODE) return null; const tagName = element.tagName.toLowerCase(); const isCustomElement = tagName.includes('-'); const standardExcludeSelectors = ['script', 'style', 'meta', 'link', 'title', 'head']; if (!isCustomElement && standardExcludeSelectors.includes(tagName)) { return null; } const children = Array.from(element.children); let childrenStructureString; if (tagName === 'table') { const thead = element.querySelector('thead'); const representativeRow = thead ? thead.querySelector('tr') : element.querySelector('tr'); if (representativeRow) { const structure = Array.from(representativeRow.children).map(child => ({ tag: child.tagName.toLowerCase(), classes: normalizeClasses(child.classList), })); childrenStructureString = JSON.stringify(structure); } else { childrenStructureString = JSON.stringify([]); } } else if (tagName === 'tr') { const structure = children.map((child) => ({ tag: child.tagName.toLowerCase(), classes: normalizeClasses(child.classList), })); childrenStructureString = JSON.stringify(structure); } else { const structure = children.map((child) => ({ tag: child.tagName.toLowerCase(), classes: normalizeClasses(child.classList), hasText: (child.textContent ?? '').trim().length > 0, })); childrenStructureString = JSON.stringify(structure); } const normalizedClasses = normalizeClasses(element.classList); const relevantAttributes = Array.from(element.attributes) .filter((attr) => { if (isCustomElement) { return !['id', 'style', 'data-reactid', 'data-react-checksum'].includes(attr.name.toLowerCase()); } else { return ( !['id', 'style', 'data-reactid', 'data-react-checksum'].includes(attr.name.toLowerCase()) && (!attr.name.startsWith('data-') || attr.name === 'data-type' || attr.name === 'data-role') ); } }) .map((attr) => `${attr.name}=${attr.value}`) .sort(); let depth = 0; let parent = element.parentElement; while (parent && depth < 20) { depth++; parent = parent.parentElement; } const textContent = (element.textContent ?? '').trim(); const textCharacteristics = { hasText: textContent.length > 0, textLength: Math.floor(textContent.length / 20) * 20, hasLinks: element.querySelectorAll('a').length, hasImages: element.querySelectorAll('img').length, hasButtons: element.querySelectorAll('button, input[type="button"], input[type="submit"]').length, }; const signature = `${tagName}::${normalizedClasses}::${children.length}::${childrenStructureString}::${relevantAttributes.join('|')}`; return { tagName, normalizedClasses, childrenCount: children.length, childrenStructure: childrenStructureString, attributes: relevantAttributes.join('|'), depth, textCharacteristics, signature, }; }; const calculateSimilarity = (fp1, fp2) => { if (!fp1 || !fp2) return 0; let score = 0; let maxScore = 0; maxScore += 10; if (fp1.tagName === fp2.tagName) score += 10; else return 0; maxScore += 8; if (fp1.normalizedClasses === fp2.normalizedClasses) score += 8; else if (fp1.normalizedClasses && fp2.normalizedClasses) { const classes1 = fp1.normalizedClasses.split(' ').filter((c) => c); const classes2 = fp2.normalizedClasses.split(' ').filter((c) => c); const commonClasses = classes1.filter((c) => classes2.includes(c)); if (classes1.length > 0 && classes2.length > 0) { score += (commonClasses.length / Math.max(classes1.length, classes2.length)) * 8; } } maxScore += 8; if (fp1.childrenStructure === fp2.childrenStructure) score += 8; else if (fp1.childrenCount === fp2.childrenCount) score += 4; maxScore += 5; if (fp1.attributes === fp2.attributes) score += 5; else if (fp1.attributes && fp2.attributes) { const attrs1 = fp1.attributes.split('|').filter((a) => a); const attrs2 = fp2.attributes.split('|').filter((a) => a); const commonAttrs = attrs1.filter((a) => attrs2.includes(a)); if (attrs1.length > 0 && attrs2.length > 0) { score += (commonAttrs.length / Math.max(attrs1.length, attrs2.length)) * 5; } } maxScore += 2; if (Math.abs(fp1.depth - fp2.depth) <= 1) score += 2; else if (Math.abs(fp1.depth - fp2.depth) <= 2) score += 1; maxScore += 3; const tc1 = fp1.textCharacteristics; const tc2 = fp2.textCharacteristics; if (tc1.hasText === tc2.hasText) score += 1; if (Math.abs(tc1.textLength - tc2.textLength) <= 40) score += 1; if (tc1.hasLinks === tc2.hasLinks && tc1.hasImages === tc2.hasImages) score += 1; return maxScore > 0 ? score / maxScore : 0; }; const hasAnyMeaningfulChildren = (element) => { const meaningfulChildren = []; const traverse = (el, depth) => { if (depth === undefined) depth = 0; if (depth > 5) return; Array.from(el.children).forEach(function(child) { const tagName = child.tagName.toLowerCase(); if (tagName === 'img' && child.hasAttribute('src')) { meaningfulChildren.push(child); return; } if (tagName === 'a' && child.hasAttribute('href')) { meaningfulChildren.push(child); return; } const text = (child.textContent || '').trim(); const hasVisibleText = text.length > 0; if (hasVisibleText || child.querySelector('svg')) { meaningfulChildren.push(child); return; } if (child.children.length > 0) { traverse(child, depth + 1); } }); if (el.shadowRoot) { Array.from(el.shadowRoot.children).forEach(function(shadowChild) { const tagName = shadowChild.tagName.toLowerCase(); if (tagName === 'img' && shadowChild.hasAttribute('src')) { meaningfulChildren.push(shadowChild); return; } if (tagName === 'a' && shadowChild.hasAttribute('href')) { meaningfulChildren.push(shadowChild); return; } const text = (shadowChild.textContent || '').trim(); const hasVisibleText = text.length > 0; if (hasVisibleText || shadowChild.querySelector('svg')) { meaningfulChildren.push(shadowChild); return; } if (shadowChild.children.length > 0) { traverse(shadowChild, depth + 1); } }); } }; traverse(element); return meaningfulChildren.length > 0; }; const getAllVisibleElements = () => { const allElements = []; const visited = new Set(); const traverseContainer = (container) => { try { const elements = Array.from(container.querySelectorAll('*')).filter((el) => { const rect = el.getBoundingClientRect(); return rect.width > 0 && rect.height > 0; }); elements.forEach((element) => { if (!visited.has(element)) { visited.add(element); allElements.push(element); if (element.shadowRoot) { traverseContainer(element.shadowRoot); } } }); } catch (error) { console.warn('Error traversing container:', error); } }; traverseContainer(document); return allElements; }; const allElements = getAllVisibleElements(); const processedInTables = new Set(); const elementGroups = new Map(); const groupedElements = new Set(); const tables = allElements.filter(el => el.tagName === 'TABLE'); tables.forEach(table => { const rows = Array.from(table.querySelectorAll('tbody > tr')).filter(row => { const parent = row.parentElement; if (!parent || !table.contains(parent)) return false; const rect = row.getBoundingClientRect(); return rect.width > 0 && rect.height > 0; }); if (rows.length >= 2) { const representativeFingerprint = getStructuralFingerprint(rows[0]); if (!representativeFingerprint) return; const group = { elements: rows, fingerprint: representativeFingerprint, representative: rows[0], }; rows.forEach(row => { elementGroups.set(row, group); groupedElements.add(row); processedInTables.add(row); }); } }); const remainingElements = allElements.filter(el => !processedInTables.has(el)); const elementFingerprints = new Map(); remainingElements.forEach((element) => { const fingerprint = getStructuralFingerprint(element); if (fingerprint) { elementFingerprints.set(element, fingerprint); } }); const processedElements = new Set(); const similarityThreshold = 0.7; const minGroupSize = 2; const maxParentLevels = 5; elementFingerprints.forEach((fingerprint, element) => { if (processedElements.has(element)) return; const currentGroup = [element]; processedElements.add(element); elementFingerprints.forEach((otherFingerprint, otherElement) => { if (processedElements.has(otherElement)) return; const similarity = calculateSimilarity(fingerprint, otherFingerprint); if (similarity >= similarityThreshold) { currentGroup.push(otherElement); processedElements.add(otherElement); } }); if (currentGroup.length >= minGroupSize && hasAnyMeaningfulChildren(element)) { let grouped = false; for (let level = 1; level <= maxParentLevels && !grouped; level++) { let ancestor = currentGroup[0]; for (let i = 0; i < level && ancestor; i++) { ancestor = ancestor.parentElement; } if (!ancestor) break; const allShareAncestor = currentGroup.every(el => { let elAncestor = el; for (let i = 0; i < level && elAncestor; i++) { elAncestor = elAncestor.parentElement; } return elAncestor === ancestor; }); if (allShareAncestor) { const group = { elements: currentGroup, fingerprint, representative: element, }; currentGroup.forEach((el) => { elementGroups.set(el, group); groupedElements.add(el); }); grouped = true; } } if (!grouped) { currentGroup.forEach((el, idx) => { if (idx > 0) processedElements.delete(el); }); } } }); const uniqueGroups = new Map(); elementGroups.forEach((group) => { const signature = group.fingerprint.signature; if (!uniqueGroups.has(signature)) { const tagName = group.fingerprint.tagName; const classes = group.fingerprint.normalizedClasses.split(' ').filter(Boolean); let xpath = `//${tagName}`; if (classes.length > 0) { const classConditions = classes.map(cls => `contains(@class, '${cls}')`).join(' and '); xpath += `[${classConditions}]`; } const sampleTexts = group.elements.slice(0, 3).map((el) => { return (el.textContent || '').trim().substring(0, 200); }); const sampleHTML = group.representative.outerHTML.substring(0, 500); uniqueGroups.set(signature, { fingerprint: group.fingerprint, count: group.elements.length, xpath: xpath, sampleTexts: sampleTexts, sampleHTML: sampleHTML, }); } }); return Array.from(uniqueGroups.values()); } catch (error) { console.error('[analyzeElementGroups] Error:', error); return []; } }; })(); ================================================ FILE: server/src/sdk/selectorValidator.ts ================================================ /** * Selector Validator * Validates and enriches selectors with metadata using Playwright page instance */ import { Page } from 'playwright-core'; import logger from '../logger'; interface SelectorInput { selector: string; attribute?: string; } interface EnrichedSelector { tag: string; isShadow: boolean; selector: string; attribute: string; } interface ValidationResult { valid: boolean; enriched?: EnrichedSelector; error?: string; } export class SelectorValidator { private page: Page | null = null; /** * Initialize with an existing Page instance and navigate to URL * @param page Page instance from RemoteBrowser * @param url URL to navigate to */ async initialize(page: Page, url: string): Promise { this.page = page; try { await page.goto(url, { waitUntil: "networkidle", timeout: 100000, }); } catch (err) { await page.goto(url, { waitUntil: "domcontentloaded", timeout: 100000, }); } logger.info(`Navigated to ${url} using RemoteBrowser page`); } /** * Validate and enrich a single selector */ async validateSelector(input: SelectorInput): Promise { if (!this.page) { return { valid: false, error: 'Browser not initialized' }; } const { selector, attribute = 'innerText' } = input; try { const isXPath = selector.startsWith('//') || selector.startsWith('(//'); let element; if (isXPath) { element = await this.page.locator(`xpath=${selector}`).first(); } else { element = await this.page.locator(selector).first(); } const count = await element.count(); if (count === 0) { return { valid: false, error: `Selector "${selector}" did not match any elements` }; } const tagName = await element.evaluate((el) => el.tagName); const isShadow = await element.evaluate((el) => { let parent = el.parentNode; while (parent) { if (parent instanceof ShadowRoot) { return true; } parent = parent.parentNode; } return false; }); return { valid: true, enriched: { tag: tagName, isShadow, selector, attribute } }; } catch (error: any) { logger.error(`Error validating selector "${selector}":`, error.message); return { valid: false, error: `Invalid selector: ${error.message}` }; } } /** * Validate and enrich multiple selectors */ async validateSchemaFields( fields: Record ): Promise<{ valid: boolean; enriched?: Record; errors?: string[] }> { const enriched: Record = {}; const errors: string[] = []; for (const [fieldName, fieldInput] of Object.entries(fields)) { const input: SelectorInput = typeof fieldInput === 'string' ? { selector: fieldInput } : fieldInput; const result = await this.validateSelector(input); if (result.valid && result.enriched) { enriched[fieldName] = result.enriched; } else { errors.push(`Field "${fieldName}": ${result.error}`); } } if (errors.length > 0) { return { valid: false, errors }; } return { valid: true, enriched }; } /** * Validate list selector and fields */ async validateListFields(config: { itemSelector: string; fields: Record; }): Promise<{ valid: boolean; enriched?: { listSelector: string; listTag: string; fields: Record; }; errors?: string[] }> { const errors: string[] = []; const listResult = await this.validateSelector({ selector: config.itemSelector, attribute: 'innerText' }); if (!listResult.valid || !listResult.enriched) { errors.push(`List selector: ${listResult.error}`); return { valid: false, errors }; } const fieldsResult = await this.validateSchemaFields(config.fields); if (!fieldsResult.valid) { errors.push(...(fieldsResult.errors || [])); return { valid: false, errors }; } return { valid: true, enriched: { listSelector: config.itemSelector, listTag: listResult.enriched.tag, fields: fieldsResult.enriched! } }; } /** * Detect input type for a given selector */ async detectInputType(selector: string): Promise { if (!this.page) { throw new Error('Browser not initialized'); } try { const isXPath = selector.startsWith('//') || selector.startsWith('(//'); let element; if (isXPath) { element = await this.page.locator(`xpath=${selector}`).first(); } else { element = await this.page.locator(selector).first(); } const count = await element.count(); if (count === 0) { throw new Error(`Selector "${selector}" did not match any elements`); } const inputType = await element.evaluate((el) => { if (el instanceof HTMLInputElement) { return el.type || 'text'; } if (el instanceof HTMLTextAreaElement) { return 'textarea'; } if (el instanceof HTMLSelectElement) { return 'select'; } return 'text'; }); return inputType; } catch (error: any) { throw new Error(`Failed to detect input type: ${error.message}`); } } /** * Auto-detect fields from list selector */ async autoDetectListFields(listSelector: string): Promise<{ success: boolean; fields?: Record; listSelector?: string; error?: string; }> { if (!this.page) { return { success: false, error: 'Browser not initialized' }; } try { const fs = require('fs'); const path = require('path'); const scriptPath = path.join(__dirname, 'browserSide/pageAnalyzer.js'); const scriptContent = fs.readFileSync(scriptPath, 'utf8'); await this.page.evaluate((script) => { eval(script); }, scriptContent); const result = await this.page.evaluate((selector) => { const win = window as any; if (typeof win.autoDetectListFields === 'function') { return win.autoDetectListFields(selector); } else { return { fields: {}, error: 'Auto-detection function not loaded' }; } }, listSelector); // Log debug information if (result.debug) { logger.info(`Debug info: ${JSON.stringify(result.debug)}`); } if (result.error || !result.fields || Object.keys(result.fields).length === 0) { return { success: false, error: result.error || 'No fields detected from list selector' }; } const convertedListSelector = result.listSelector || listSelector; logger.info(`Auto-detected ${Object.keys(result.fields).length} fields from list`); return { success: true, fields: result.fields, listSelector: convertedListSelector, }; } catch (error: any) { logger.error('Field auto-detection error:', error); return { success: false, error: `Field auto-detection failed: ${error.message}` }; } } /** * Auto-detect pagination type and selector from list selector */ async autoDetectPagination(listSelector: string): Promise<{ success: boolean; type?: string; selector?: string | null; error?: string; }> { if (!this.page) { return { success: false, error: 'Browser not initialized' }; } try { const fs = require('fs'); const path = require('path'); const scriptPath = path.join(__dirname, 'browserSide/pageAnalyzer.js'); const scriptContent = fs.readFileSync(scriptPath, 'utf8'); await this.page.evaluate((script) => { eval(script); }, scriptContent); const buttonResult = await this.page.evaluate((selector) => { const win = window as any; if (typeof win.autoDetectPagination === 'function') { const result = win.autoDetectPagination(selector); return result; } else { console.error('autoDetectPagination function not found!'); return { type: '', selector: null, error: 'Pagination auto-detection function not loaded' }; } }, listSelector); if (buttonResult.debug) { logger.info(`Pagination debug info: ${JSON.stringify(buttonResult.debug)}`); } if (buttonResult.error) { logger.error(`Button detection error: ${buttonResult.error}`); return { success: false, error: buttonResult.error }; } if (buttonResult.type && buttonResult.type !== '') { if (buttonResult.type === 'clickLoadMore' && buttonResult.selector) { logger.info('Testing Load More button by clicking...'); const loadMoreVerified = await this.testLoadMoreButton(buttonResult.selector, listSelector); if (!loadMoreVerified) { logger.warn('Load More button did not load content, falling back to scroll detection'); const scrollTestResult = await this.testInfiniteScrollByScrolling(listSelector); if (scrollTestResult.detected) { return { success: true, type: 'scrollDown', selector: null }; } } else { logger.info(`Verified Load More button works`); return { success: true, type: buttonResult.type, selector: buttonResult.selector }; } } else { logger.info(`Detected pagination type: ${buttonResult.type}${buttonResult.selector ? ` with selector: ${buttonResult.selector}` : ''}`); return { success: true, type: buttonResult.type, selector: buttonResult.selector }; } } return { success: true, type: '', selector: null }; } catch (error: any) { logger.error('Pagination auto-detection error:', error); return { success: false, error: `Pagination auto-detection failed: ${error.message}` }; } } /** * Test Load More button by clicking it and checking if content loads */ private async testLoadMoreButton(buttonSelector: string, listSelector: string): Promise { if (!this.page) { return false; } try { const initialState = await this.page.evaluate((selector) => { function evaluateSelector(sel: string, doc: Document) { const isXPath = sel.startsWith('//') || sel.startsWith('(//'); if (isXPath) { const result = doc.evaluate(sel, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); const elements = []; for (let i = 0; i < result.snapshotLength; i++) { elements.push(result.snapshotItem(i)); } return elements; } else { return Array.from(doc.querySelectorAll(sel)); } } const listElements = evaluateSelector(selector, document); return { itemCount: listElements.length, scrollHeight: document.documentElement.scrollHeight }; }, listSelector); try { const selectors = buttonSelector.split(',').map(s => s.trim()); let clicked = false; for (const sel of selectors) { try { await this.page.click(sel, { timeout: 1000 }); clicked = true; break; } catch (e) { continue; } } if (!clicked) { return false; } await this.page.waitForTimeout(2000); } catch (clickError: any) { logger.warn(`Failed to click button: ${clickError.message}`); return false; } const afterClickState = await this.page.evaluate((selector) => { function evaluateSelector(sel: string, doc: Document) { const isXPath = sel.startsWith('//') || sel.startsWith('(//'); if (isXPath) { const result = doc.evaluate(sel, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); const elements = []; for (let i = 0; i < result.snapshotLength; i++) { elements.push(result.snapshotItem(i)); } return elements; } else { return Array.from(doc.querySelectorAll(sel)); } } const listElements = evaluateSelector(selector, document); return { itemCount: listElements.length, scrollHeight: document.documentElement.scrollHeight }; }, listSelector); logger.info(`After click: ${afterClickState.itemCount} items, scrollHeight: ${afterClickState.scrollHeight}`); const itemsAdded = afterClickState.itemCount > initialState.itemCount; const heightIncreased = afterClickState.scrollHeight > initialState.scrollHeight + 100; if (itemsAdded || heightIncreased) { const details = `Items: ${initialState.itemCount} → ${afterClickState.itemCount}, Height: ${initialState.scrollHeight} → ${afterClickState.scrollHeight}`; logger.info(`Content loaded after click: ${details}`); return true; } logger.info('No content change detected after clicking'); return false; } catch (error: any) { logger.error('Error during Load More test:', error.message); return false; } } /** * Test for infinite scroll by actually scrolling and checking if content loads */ private async testInfiniteScrollByScrolling(listSelector: string): Promise<{ detected: boolean; details?: string; }> { if (!this.page) { return { detected: false }; } try { const initialState = await this.page.evaluate((selector) => { function evaluateSelector(sel: string, doc: Document) { const isXPath = sel.startsWith('//') || sel.startsWith('(//'); if (isXPath) { const result = doc.evaluate(sel, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); const elements = []; for (let i = 0; i < result.snapshotLength; i++) { elements.push(result.snapshotItem(i)); } return elements; } else { return Array.from(doc.querySelectorAll(sel)); } } const listElements = evaluateSelector(selector, document); return { itemCount: listElements.length, scrollHeight: document.documentElement.scrollHeight, scrollY: window.scrollY }; }, listSelector); logger.info(`Initial state: ${initialState.itemCount} items, scrollHeight: ${initialState.scrollHeight}`); await this.page.evaluate(() => { window.scrollTo(0, document.documentElement.scrollHeight); }); await this.page.waitForTimeout(2000); const afterScrollState = await this.page.evaluate((selector) => { function evaluateSelector(sel: string, doc: Document) { const isXPath = sel.startsWith('//') || sel.startsWith('(//'); if (isXPath) { const result = doc.evaluate(sel, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); const elements = []; for (let i = 0; i < result.snapshotLength; i++) { elements.push(result.snapshotItem(i)); } return elements; } else { return Array.from(doc.querySelectorAll(sel)); } } const listElements = evaluateSelector(selector, document); return { itemCount: listElements.length, scrollHeight: document.documentElement.scrollHeight, scrollY: window.scrollY }; }, listSelector); await this.page.evaluate((originalY) => { window.scrollTo(0, originalY); }, initialState.scrollY); const itemsAdded = afterScrollState.itemCount > initialState.itemCount; const heightIncreased = afterScrollState.scrollHeight > initialState.scrollHeight + 100; if (itemsAdded || heightIncreased) { const details = `Items: ${initialState.itemCount} → ${afterScrollState.itemCount}, Height: ${initialState.scrollHeight} → ${afterScrollState.scrollHeight}`; logger.info(`Content changed: ${details}`); return { detected: true, details }; } logger.info('No content change detected'); return { detected: false }; } catch (error: any) { logger.error('Error during scroll test:', error.message); return { detected: false }; } } /** * Clear page reference */ async close(): Promise { this.page = null; logger.info('Page reference cleared'); } } ================================================ FILE: server/src/sdk/workflowEnricher.ts ================================================ /** * Workflow Enricher * Converts simplified SDK workflow to full format with validation */ import { SelectorValidator } from './selectorValidator'; import { createRemoteBrowserForValidation, destroyRemoteBrowser } from '../browser-management/controller'; import logger from '../logger'; import { v4 as uuid } from 'uuid'; import { encrypt } from '../utils/auth'; import Anthropic from '@anthropic-ai/sdk'; interface SimplifiedAction { action: string | typeof Symbol.asyncDispose; args?: any[]; name?: string; actionId?: string; } type RegexableString = string | { $regex: string }; interface SimplifiedWorkflowPair { where: { url?: RegexableString; [key: string]: any; }; what: SimplifiedAction[]; } export class WorkflowEnricher { /** * Enrich a simplified workflow with full metadata */ static async enrichWorkflow( simplifiedWorkflow: SimplifiedWorkflowPair[], userId: string ): Promise<{ success: boolean; workflow?: any[]; errors?: string[]; url?: string }> { const errors: string[] = []; const enrichedWorkflow: any[] = []; if (simplifiedWorkflow.length === 0) { return { success: false, errors: ['Workflow is empty'] }; } let url: string | undefined; for (const step of simplifiedWorkflow) { const rawUrl = step.where.url; if (rawUrl && rawUrl !== 'about:blank') { url = typeof rawUrl === 'string' ? rawUrl : rawUrl.$regex; break; } } if (!url) { return { success: false, errors: ['No valid URL found in workflow'] }; } let browserId: string | null = null; const validator = new SelectorValidator(); try { logger.info('Creating RemoteBrowser for validation'); const { browserId: id, page } = await createRemoteBrowserForValidation(userId); browserId = id; await validator.initialize(page, url); for (const step of simplifiedWorkflow) { const enrichedStep: any = { where: { ...step.where }, what: [] }; const selectors: string[] = []; for (const action of step.what) { if (typeof action.action !== 'string') { continue; } if (action.action === 'type') { if (!action.args || action.args.length < 2) { errors.push('type action missing selector or value'); continue; } const selector = action.args[0]; const value = action.args[1]; const providedInputType = action.args[2]; selectors.push(selector); const encryptedValue = encrypt(value); if (!providedInputType) { try { const inputType = await validator.detectInputType(selector); enrichedStep.what.push({ ...action, args: [selector, encryptedValue, inputType] }); } catch (error: any) { errors.push(`type action: ${error.message}`); continue; } } else { enrichedStep.what.push({ ...action, args: [selector, encryptedValue, providedInputType] }); } enrichedStep.what.push({ action: 'waitForLoadState', args: ['networkidle'] }); continue; } if (action.action !== 'scrapeSchema' && action.action !== 'scrapeList') { enrichedStep.what.push(action); continue; } if (action.action === 'scrapeSchema') { if (!action.args || !action.args[0]) { errors.push('scrapeSchema action missing fields argument'); continue; } const fields = action.args[0]; const result = await validator.validateSchemaFields(fields); if (!result.valid) { errors.push(...(result.errors || [])); continue; } const enrichedFields: Record = {}; for (const [fieldName, enrichedData] of Object.entries(result.enriched!)) { enrichedFields[fieldName] = { tag: enrichedData.tag, isShadow: enrichedData.isShadow, selector: enrichedData.selector, attribute: enrichedData.attribute }; selectors.push(enrichedData.selector); } const enrichedAction: any = { action: 'scrapeSchema', actionId: `text-${uuid()}`, args: [enrichedFields] }; if (action.name) { enrichedAction.name = action.name; } enrichedStep.what.push(enrichedAction); enrichedStep.what.push({ action: 'waitForLoadState', args: ['networkidle'] }); } else if (action.action === 'scrapeList') { if (!action.args || !action.args[0]) { errors.push('scrapeList action missing config argument'); continue; } const config = action.args[0]; let enrichedFields: Record = {}; let listSelector: string; try { const autoDetectResult = await validator.autoDetectListFields(config.itemSelector); if (!autoDetectResult.success || !autoDetectResult.fields || Object.keys(autoDetectResult.fields).length === 0) { errors.push(autoDetectResult.error || 'Failed to auto-detect fields from list selector'); continue; } enrichedFields = autoDetectResult.fields; listSelector = autoDetectResult.listSelector!; } catch (error: any) { errors.push(`Field auto-detection failed: ${error.message}`); continue; } let paginationType = 'none'; let paginationSelector = ''; if (config.pagination && config.pagination.type) { paginationType = config.pagination.type; paginationSelector = config.pagination.selector || ''; } else { try { const paginationResult = await validator.autoDetectPagination(config.itemSelector); if (paginationResult.success && paginationResult.type) { paginationType = paginationResult.type; paginationSelector = paginationResult.selector || ''; } } catch (error: any) { logger.warn('Pagination auto-detection failed, using default (none):', error.message); } } const enrichedListAction: any = { action: 'scrapeList', actionId: `list-${uuid()}`, args: [{ fields: enrichedFields, listSelector: listSelector, pagination: { type: paginationType, selector: paginationSelector }, limit: config.maxItems || 100 }] }; if (action.name) { enrichedListAction.name = action.name; } enrichedStep.what.push(enrichedListAction); enrichedStep.what.push({ action: 'waitForLoadState', args: ['networkidle'] }); } } if (selectors.length > 0) { enrichedStep.where.selectors = selectors; } enrichedWorkflow.push(enrichedStep); } await validator.close(); if (browserId) { await destroyRemoteBrowser(browserId, userId); logger.info('RemoteBrowser cleaned up successfully'); } if (errors.length > 0) { return { success: false, errors }; } return { success: true, workflow: enrichedWorkflow, url }; } catch (error: any) { await validator.close(); if (browserId) { try { await destroyRemoteBrowser(browserId, userId); logger.info('RemoteBrowser cleaned up after error'); } catch (cleanupError) { logger.warn('Failed to cleanup RemoteBrowser:', cleanupError); } } logger.error('Error enriching workflow:', error); return { success: false, errors: [error.message] }; } } /** * Generate workflow from natural language prompt using LLM with vision */ static async generateWorkflowFromPrompt( url: string, prompt: string, userId: string, llmConfig?: { provider?: 'anthropic' | 'openai' | 'ollama'; model?: string; apiKey?: string; baseUrl?: string; }, ): Promise<{ success: boolean; workflow?: any[]; url?: string; errors?: string[] }> { let browserId: string | null = null; const validator = new SelectorValidator(); try { logger.info(`Generating workflow from prompt for URL: ${url}`); logger.info(`Prompt: ${prompt}`); logger.info('Creating RemoteBrowser for LLM workflow generation'); const { browserId: id, page } = await createRemoteBrowserForValidation(userId); browserId = id; await validator.initialize(page as any, url); const validatorPage = (validator as any).page; // Use JPEG with quality 85 for faster processing and smaller file size // Vision models handle this compression well while maintaining accuracy const screenshotBuffer = await page.screenshot({ fullPage: true, type: 'jpeg', quality: 85 }); const screenshotBase64 = screenshotBuffer.toString('base64'); const elementGroups = await this.analyzePageGroups(validator); logger.info(`Found ${elementGroups.length} element groups`); const pageHTML = await validatorPage.content(); const llmDecision = await this.getLLMDecisionWithVision( prompt, screenshotBase64, elementGroups, pageHTML, llmConfig ); logger.info(`LLM decided action type: ${llmDecision.actionType}`); const workflow = await this.buildWorkflowFromLLMDecision(llmDecision, url, validator, prompt, llmConfig); await validator.close(); if (browserId) { await destroyRemoteBrowser(browserId, userId); logger.info('RemoteBrowser cleaned up after LLM workflow generation'); } return { success: true, workflow, url }; } catch (error: any) { await validator.close(); if (browserId) { try { await destroyRemoteBrowser(browserId, userId); logger.info('RemoteBrowser cleaned up after LLM generation error'); } catch (cleanupError) { logger.warn('Failed to cleanup RemoteBrowser:', cleanupError); } } logger.error('Error generating workflow from prompt:', error); return { success: false, errors: [error.message] }; } } /** * Analyze page groups using browser-side script */ private static async analyzePageGroups(validator: SelectorValidator): Promise { try { const page = (validator as any).page; const fs = require('fs'); const path = require('path'); const scriptPath = path.join(__dirname, 'browserSide/pageAnalyzer.js'); const scriptContent = fs.readFileSync(scriptPath, 'utf8'); await page.evaluate((script: string) => { eval(script); }, scriptContent); const groups = await page.evaluate(() => { const win = window as any; if (typeof win.analyzeElementGroups === 'function') { return win.analyzeElementGroups(); } return []; }); return groups; } catch (error: any) { logger.error('Error analyzing page groups:', error); return []; } } /** * Use LLM (with or without vision) to decide action and select best element/group */ private static async getLLMDecisionWithVision( prompt: string, screenshotBase64: string, elementGroups: any[], pageHTML: string, llmConfig?: { provider?: 'anthropic' | 'openai' | 'ollama'; model?: string; apiKey?: string; baseUrl?: string; } ): Promise { try { const provider = llmConfig?.provider || 'ollama'; const axios = require('axios'); const groupsDescription = elementGroups.map((group, index) => { const sampleText = group.sampleTexts.slice(0, 2).filter((t: string) => t && t.trim().length > 0).join(' | '); const hasContent = sampleText.length > 0; const contentPreview = hasContent ? sampleText : '(no text content - likely images/icons)'; return `Group ${index}: - Tag: ${group.fingerprint.tagName} - Count: ${group.count} similar elements - Has text content: ${hasContent ? 'YES' : 'NO'} - Sample content: ${contentPreview.substring(0, 300)}`; }).join('\n\n'); const systemPrompt = `You are a request classifier for list extraction. Your job is to: 1. Identify that the user wants to extract a list of items 2. Select the BEST element group that matches what they want 3. Extract any numeric limit from their request CRITICAL GROUP SELECTION RULES: - Match the sample content to what the user is asking for - this is the PRIMARY criterion - Groups with text content are often easier to match, but image galleries, icon grids, or data-attribute based groups can also be correct - Analyze the keywords in the user's request and find the group whose sample content or structure best matches - Consider the context: product sites may have image grids, job sites have text listings, etc. - The group with the most relevant content should be selected, NOT just the first group or the group with most text LIMIT EXTRACTION: - Look for numbers in the request that indicate quantity (e.g., "50", "25", "100", "first 30", "top 10") - If no limit specified, use null Must return valid JSON: {"actionType": "captureList", "reasoning": "...", "selectedGroupIndex": NUMBER, "limit": NUMBER_OR_NULL}`; const userPrompt = `User's request: "${prompt}" Available element groups on page: ${groupsDescription} TASK: 1. Identify the key terms from the user's request 2. Look through ALL the groups above 3. Find the group whose "Sample content" best matches the key terms from the request 4. Prefer groups with "Has text content: YES" over "NO" 5. Extract any numeric limit from the request if present Return JSON: { "actionType": "captureList", "reasoning": "Brief explanation of why this group was selected", "selectedGroupIndex": INDEX_NUMBER, "limit": NUMBER_OR_NULL } Note: selectedGroupIndex must be between 0 and ${elementGroups.length - 1}`; let llmResponse: string; if (provider === 'ollama') { const ollamaBaseUrl = llmConfig?.baseUrl || process.env.OLLAMA_BASE_URL || 'http://localhost:11434'; const ollamaModel = llmConfig?.model || 'llama3.2-vision'; const jsonSchema = { type: 'object', required: ['actionType', 'reasoning', 'selectedGroupIndex'], properties: { actionType: { type: 'string', enum: ['captureList'] }, reasoning: { type: 'string' }, selectedGroupIndex: { type: 'integer' }, limit: { type: ['integer', 'null'] } } }; const response = await axios.post(`${ollamaBaseUrl}/api/chat`, { model: ollamaModel, messages: [ { role: 'system', content: systemPrompt }, { role: 'user', content: userPrompt, images: [screenshotBase64] } ], stream: false, format: jsonSchema, options: { temperature: 0.1 } }); llmResponse = response.data.message.content; } else if (provider === 'anthropic') { const anthropic = new Anthropic({ apiKey: llmConfig?.apiKey || process.env.ANTHROPIC_API_KEY }); const anthropicModel = llmConfig?.model || 'claude-3-5-sonnet-20241022'; const response = await anthropic.messages.create({ model: anthropicModel, max_tokens: 1024, messages: [{ role: 'user', content: [ { type: 'image', source: { type: 'base64', media_type: 'image/png', data: screenshotBase64 } }, { type: 'text', text: userPrompt } ] }], system: systemPrompt }); const textContent = response.content.find((c: any) => c.type === 'text'); llmResponse = textContent?.type === 'text' ? textContent.text : ''; } else if (provider === 'openai') { const openaiBaseUrl = llmConfig?.baseUrl || 'https://api.openai.com/v1'; const openaiModel = llmConfig?.model || 'gpt-4-vision-preview'; const response = await axios.post(`${openaiBaseUrl}/chat/completions`, { model: openaiModel, messages: [ { role: 'system', content: systemPrompt }, { role: 'user', content: [ { type: 'text', text: userPrompt }, { type: 'image_url', image_url: { url: `data:image/png;base64,${screenshotBase64}` } } ] } ], max_tokens: 1024, temperature: 0.1 }, { headers: { 'Authorization': `Bearer ${llmConfig?.apiKey || process.env.OPENAI_API_KEY}`, 'Content-Type': 'application/json' } }); llmResponse = response.data.choices[0].message.content; } else { throw new Error(`Unsupported LLM provider: ${provider}`); } logger.info(`LLM Response: ${llmResponse}`); let jsonStr = llmResponse.trim(); const jsonMatch = jsonStr.match(/```json\s*([\s\S]*?)\s*```/) || jsonStr.match(/```\s*([\s\S]*?)\s*```/); if (jsonMatch) { jsonStr = jsonMatch[1].trim(); } const objectMatch = jsonStr.match(/\{[\s\S]*"actionType"[\s\S]*\}/); if (objectMatch) { jsonStr = objectMatch[0]; } const decision = JSON.parse(jsonStr); if (!decision.actionType || decision.actionType !== 'captureList') { throw new Error('LLM response must have actionType: "captureList"'); } if (decision.selectedGroupIndex === undefined || decision.selectedGroupIndex < 0 || decision.selectedGroupIndex >= elementGroups.length) { throw new Error(`Invalid selectedGroupIndex: ${decision.selectedGroupIndex}. Must be between 0 and ${elementGroups.length - 1}`); } const selectedGroup = elementGroups[decision.selectedGroupIndex]; return { actionType: 'captureList', selectedGroup, itemSelector: selectedGroup.xpath, reasoning: decision.reasoning, limit: decision.limit || null }; } catch (error: any) { logger.error('LLM decision error:', error); return this.fallbackHeuristicDecision(prompt, elementGroups); } } /** * Fallback heuristic decision when LLM fails */ private static fallbackHeuristicDecision(prompt: string, elementGroups: any[]): any { const promptLower = prompt.toLowerCase(); if (elementGroups.length === 0) { throw new Error('No element groups found on page for list extraction'); } const scoredGroups = elementGroups.map((group, index) => { let score = 0; for (const sampleText of group.sampleTexts) { const keywords = promptLower.split(' ').filter((w: string) => w.length > 3); for (const keyword of keywords) { if (sampleText.toLowerCase().includes(keyword)) score += 2; } } score += Math.min(group.count / 10, 5); return { group, score, index }; }); scoredGroups.sort((a, b) => b.score - a.score); const best = scoredGroups[0]; return { actionType: 'captureList', selectedGroup: best.group, itemSelector: best.group.xpath }; } /** * Generate semantic field labels using LLM based on content and context */ private static async generateFieldLabels( fields: Record, fieldSamples: Record, prompt: string, url: string, llmConfig?: { provider?: 'anthropic' | 'openai' | 'ollama'; model?: string; apiKey?: string; baseUrl?: string; } ): Promise> { try { const provider = llmConfig?.provider || 'ollama'; const BATCH_SIZE = provider === 'ollama' ? 25 : 50; const fieldEntries = Object.entries(fieldSamples); const totalFields = fieldEntries.length; logger.info(`Processing ${totalFields} fields in batches of ${BATCH_SIZE} for LLM labeling`); const allLabels: Record = {}; for (let i = 0; i < fieldEntries.length; i += BATCH_SIZE) { const batch = fieldEntries.slice(i, i + BATCH_SIZE); const batchNumber = Math.floor(i / BATCH_SIZE) + 1; const totalBatches = Math.ceil(fieldEntries.length / BATCH_SIZE); logger.info(`Processing batch ${batchNumber}/${totalBatches} (${batch.length} fields)`); const batchLabels = await this.generateFieldLabelsBatch( fields, Object.fromEntries(batch), prompt, url, llmConfig, allLabels ); Object.assign(allLabels, batchLabels); } logger.info(`Completed labeling for ${Object.keys(allLabels).length}/${totalFields} fields`); return allLabels; } catch (error: any) { logger.error(`Error generating field labels with LLM: ${error.message}`); logger.error(`Using fallback: keeping generic field labels`); const fallbackLabels: Record = {}; Object.keys(fields).forEach(label => { fallbackLabels[label] = label; }); return fallbackLabels; } } private static async generateFieldLabelsBatch( allFields: Record, fieldSamplesBatch: Record, prompt: string, url: string, llmConfig?: { provider?: 'anthropic' | 'openai' | 'ollama'; model?: string; apiKey?: string; baseUrl?: string; }, previousLabels?: Record ): Promise> { try { const provider = llmConfig?.provider || 'ollama'; const axios = require('axios'); const fieldDescriptions = Object.entries(fieldSamplesBatch).map(([genericLabel, samples]) => { const fieldInfo = allFields[genericLabel]; const tagType = fieldInfo?.tag?.toLowerCase() || 'unknown'; const attribute = fieldInfo?.attribute || 'innerText'; let typeHint = ''; if (attribute === 'href') typeHint = '(link/URL)'; else if (attribute === 'src') typeHint = '(image)'; else if (tagType === 'img') typeHint = '(image)'; else if (tagType === 'a') typeHint = '(link)'; return `${genericLabel}: Type: ${tagType} ${typeHint} Attribute: ${attribute} Sample values: ${samples.slice(0, 3).map((s, i) => ` ${i + 1}. "${s}"`).join('\n')}`; }).join('\n\n'); const hasPreviousLabels = previousLabels && Object.keys(previousLabels).length > 0; const previousLabelsText = hasPreviousLabels ? `\n\nPREVIOUSLY ASSIGNED LABELS (from earlier batches):\n${Object.entries(previousLabels!).map(([orig, sem]) => `- "${sem}"`).join('\n')}\n\nIMPORTANT: DO NOT reuse these exact labels. Use them as context to maintain consistent naming patterns and avoid duplicates. Add qualifiers like "Secondary", "Alternative", numbers, or additional context to distinguish similar fields.` : ''; const systemPrompt = `You are a data field labeling assistant. Your job is to generate clear, semantic field names for extracted data based on the user's request and the actual field content. RULES FOR FIELD NAMING: 1. Use clear, descriptive names that match the content and context 2. Keep names concise (2-4 words maximum) 3. Use Title Case for field names 4. Match the user's terminology when possible 5. Be specific - include context when needed (e.g., "Product Name", "Job Title", "Article Headline", "Company Name") 6. For images, include "Image" or "Photo" in the name (e.g., "Product Image", "Profile Photo", "Thumbnail") 7. For links/URLs, you can use "URL" or "Link" (e.g., "Details Link", "Company Website") 8. Avoid generic terms like "Text", "Field", "Data" unless absolutely necessary 9. If you can't determine the meaning, use a descriptive observation based on the content type 10. Adapt to the domain: e-commerce (Product, Price), jobs (Title, Company), articles (Headline, Author), etc. 11. CRITICAL: Check previously assigned labels to avoid duplicates and maintain consistent naming patterns${previousLabelsText} You must return a JSON object mapping each generic label to its semantic name.`; const userPrompt = `URL: ${url} User's extraction request: "${prompt}" Detected fields with sample data: ${fieldDescriptions} TASK: Generate a semantic name for each field that accurately describes what it contains. Consider: - What the user is trying to extract (from their request) - The actual content in the sample values - The HTML element type and attribute being extracted - Common naming conventions for this type of data Return a JSON object with this exact structure: { "Label 1": "Semantic Field Name 1", "Label 2": "Semantic Field Name 2", ... }`; let llmResponse: string; if (provider === 'ollama') { const ollamaBaseUrl = llmConfig?.baseUrl || process.env.OLLAMA_BASE_URL || 'http://localhost:11434'; const ollamaModel = llmConfig?.model || 'llama3.2-vision'; logger.info(`Using Ollama at ${ollamaBaseUrl} with model ${ollamaModel}`); const jsonSchema = { type: 'object', required: ['fieldLabels'], properties: { fieldLabels: { type: 'object', description: 'Mapping of generic labels to semantic field names', patternProperties: { '^Label \\d+$': { type: 'string', description: 'Semantic field name in Title Case' } } } } }; try { const response = await axios.post(`${ollamaBaseUrl}/api/chat`, { model: ollamaModel, messages: [ { role: 'system', content: systemPrompt }, { role: 'user', content: userPrompt } ], stream: false, format: jsonSchema, options: { temperature: 0.1, top_p: 0.9 } }); llmResponse = response.data.message.content; } catch (ollamaError: any) { logger.error(`Ollama request failed: ${ollamaError.message}`); if (ollamaError.response) { logger.error(`Ollama response status: ${ollamaError.response.status}`); logger.error(`Ollama response data: ${JSON.stringify(ollamaError.response.data)}`); } throw new Error(`Ollama API error: ${ollamaError.message}. Make sure Ollama is running at ${ollamaBaseUrl}`); } } else if (provider === 'anthropic') { const anthropic = new Anthropic({ apiKey: llmConfig?.apiKey || process.env.ANTHROPIC_API_KEY }); const anthropicModel = llmConfig?.model || 'claude-3-5-sonnet-20241022'; const response = await anthropic.messages.create({ model: anthropicModel, max_tokens: 2048, temperature: 0.1, messages: [{ role: 'user', content: userPrompt }], system: systemPrompt }); const textContent = response.content.find((c: any) => c.type === 'text'); llmResponse = textContent?.type === 'text' ? textContent.text : ''; } else if (provider === 'openai') { const openaiBaseUrl = llmConfig?.baseUrl || 'https://api.openai.com/v1'; const openaiModel = llmConfig?.model || 'gpt-4o-mini'; const response = await axios.post(`${openaiBaseUrl}/chat/completions`, { model: openaiModel, messages: [ { role: 'system', content: systemPrompt }, { role: 'user', content: userPrompt } ], max_tokens: 2048, temperature: 0.1, response_format: { type: 'json_object' } }, { headers: { 'Authorization': `Bearer ${llmConfig?.apiKey || process.env.OPENAI_API_KEY}`, 'Content-Type': 'application/json' } }); llmResponse = response.data.choices[0].message.content; } else { throw new Error(`Unsupported LLM provider: ${provider}`); } let jsonStr = llmResponse.trim(); const jsonMatch = jsonStr.match(/```json\s*([\s\S]*?)\s*```/) || jsonStr.match(/```\s*([\s\S]*?)\s*```/); if (jsonMatch) { jsonStr = jsonMatch[1].trim(); } const objectMatch = jsonStr.match(/\{[\s\S]*\}/); if (objectMatch) { jsonStr = objectMatch[0]; } const parsedResponse = JSON.parse(jsonStr); let labelMapping: Record; if (parsedResponse.fieldLabels) { labelMapping = parsedResponse.fieldLabels; } else { labelMapping = parsedResponse; } const missingLabels: string[] = []; Object.keys(fieldSamplesBatch).forEach(genericLabel => { if (!labelMapping[genericLabel]) { missingLabels.push(genericLabel); } }); if (missingLabels.length > 0) { logger.warn(`LLM did not provide labels for: ${missingLabels.join(', ')}`); missingLabels.forEach(label => { labelMapping[label] = label; }); } return labelMapping; } catch (error: any) { logger.error(`Error in batch field labeling: ${error.message}`); const fallbackLabels: Record = {}; Object.keys(fieldSamplesBatch).forEach(label => { fallbackLabels[label] = label; }); return fallbackLabels; } } /** * Filter fields based on user intent using LLM with confidence scoring */ private static async filterFieldsByIntent( labeledFields: Record, fieldSamples: Record, prompt: string, llmConfig?: { provider?: 'anthropic' | 'openai' | 'ollama'; model?: string; apiKey?: string; baseUrl?: string; } ): Promise<{ selectedFields: Record; confidence: number; reasoning: string; needsUserConfirmation: boolean; }> { try { const provider = llmConfig?.provider || 'ollama'; const axios = require('axios'); const fieldDescriptions = Object.entries(labeledFields).map(([fieldName, fieldInfo]) => { const samples = fieldSamples[fieldName] || []; const sampleText = samples.length > 0 ? samples.slice(0, 1).map((s, i) => `"${s.substring(0, 100)}"`).join(', ') : '(no samples)'; return `${fieldName}: ${fieldInfo.tag || 'unknown'} - ${sampleText}`; }).join('\n'); const systemPrompt = `You are a field filter assistant. Your job is to analyze the user's extraction request and select ONLY the fields that match their intent. CRITICAL RULES: 1. Only include fields explicitly mentioned or clearly implied by the user's request 2. Use semantic matching (e.g., "quotes" matches "Quote Text", "company names" matches "Company Name") 3. If the user specifies a count (e.g., "20 quotes"), note it but return the matching fields 4. Be strict: when in doubt, exclude the field rather than include it 5. Return high confidence (0.9-1.0) only if matches are exact or obvious 6. Return medium confidence (0.6-0.8) if matches are semantic/implied 7. Return low confidence (<0.6) if uncertain You must return a JSON object with selectedFields, confidence, and reasoning.`; const userPrompt = `User's extraction request: "${prompt}" Available labeled fields: ${fieldDescriptions} TASK: Determine which fields the user wants to extract based on their request. Return a JSON object with this exact structure: { "selectedFields": ["Field Name 1", "Field Name 2"], "confidence": 0.95, "reasoning": "Brief explanation of why these fields were selected and confidence level" } Rules: - selectedFields: Array of field names that match the user's intent - confidence: Number between 0 and 1 (1.0 = exact match, 0.8+ = semantic match, <0.7 = uncertain) - reasoning: Explain which keywords from the user's request matched which fields`; let llmResponse: string; if (provider === 'ollama') { const ollamaBaseUrl = llmConfig?.baseUrl || process.env.OLLAMA_BASE_URL || 'http://localhost:11434'; const ollamaModel = llmConfig?.model || 'llama3.2-vision'; const jsonSchema = { type: 'object', required: ['selectedFields', 'confidence', 'reasoning'], properties: { selectedFields: { type: 'array', items: { type: 'string' }, description: 'Array of field names that match user intent' }, confidence: { type: 'number', minimum: 0, maximum: 1, description: 'Confidence score from 0 to 1' }, reasoning: { type: 'string', description: 'Explanation of field selection and confidence' } } }; const response = await axios.post(`${ollamaBaseUrl}/api/chat`, { model: ollamaModel, messages: [ { role: 'system', content: systemPrompt }, { role: 'user', content: userPrompt } ], stream: false, format: jsonSchema, options: { temperature: 0.1, top_p: 0.9 } }); llmResponse = response.data.message.content; } else if (provider === 'anthropic') { const anthropic = new Anthropic({ apiKey: llmConfig?.apiKey || process.env.ANTHROPIC_API_KEY }); const anthropicModel = llmConfig?.model || 'claude-3-5-sonnet-20241022'; const response = await anthropic.messages.create({ model: anthropicModel, max_tokens: 1024, temperature: 0.1, messages: [{ role: 'user', content: userPrompt }], system: systemPrompt }); const textContent = response.content.find((c: any) => c.type === 'text'); llmResponse = textContent?.type === 'text' ? textContent.text : ''; } else if (provider === 'openai') { const openaiBaseUrl = llmConfig?.baseUrl || 'https://api.openai.com/v1'; const openaiModel = llmConfig?.model || 'gpt-4o-mini'; const response = await axios.post(`${openaiBaseUrl}/chat/completions`, { model: openaiModel, messages: [ { role: 'system', content: systemPrompt }, { role: 'user', content: userPrompt } ], max_tokens: 1024, temperature: 0.1, response_format: { type: 'json_object' } }, { headers: { 'Authorization': `Bearer ${llmConfig?.apiKey || process.env.OPENAI_API_KEY}`, 'Content-Type': 'application/json' } }); llmResponse = response.data.choices[0].message.content; } else { throw new Error(`Unsupported LLM provider: ${provider}`); } logger.info(`LLM Field Filtering Response: ${llmResponse}`); // Parse JSON response let jsonStr = llmResponse.trim(); const jsonMatch = jsonStr.match(/```json\s*([\s\S]*?)\s*```/) || jsonStr.match(/```\s*([\s\S]*?)\s*```/); if (jsonMatch) { jsonStr = jsonMatch[1].trim(); } const objectMatch = jsonStr.match(/\{[\s\S]*\}/); if (objectMatch) { jsonStr = objectMatch[0]; } const filterResult = JSON.parse(jsonStr); if (!Array.isArray(filterResult.selectedFields)) { throw new Error('Invalid response: selectedFields must be an array'); } if (typeof filterResult.confidence !== 'number' || filterResult.confidence < 0 || filterResult.confidence > 1) { throw new Error('Invalid response: confidence must be a number between 0 and 1'); } const filteredFields: Record = {}; for (const fieldName of filterResult.selectedFields) { if (labeledFields[fieldName]) { filteredFields[fieldName] = labeledFields[fieldName]; } else { logger.warn(`LLM selected field "${fieldName}" but it doesn't exist in labeled fields`); } } const needsUserConfirmation = filterResult.confidence < 0.8 || Object.keys(filteredFields).length === 0; return { selectedFields: filteredFields, confidence: filterResult.confidence, reasoning: filterResult.reasoning || 'No reasoning provided', needsUserConfirmation }; } catch (error: any) { logger.error(`Error filtering fields by intent: ${error.message}`); return { selectedFields: labeledFields, confidence: 0.5, reasoning: 'Error during filtering, returning all fields as fallback', needsUserConfirmation: true }; } } /** * Extract sample data from fields for LLM labeling */ private static async extractFieldSamples( fields: Record, listSelector: string, validator: SelectorValidator ): Promise> { const fieldSamples: Record = {}; try { const page = (validator as any).page; if (!page) { throw new Error('Page not available'); } const samples = await page.evaluate((args: { fieldsData: any; listSel: string }) => { const results: Record = {}; function evaluateSelector(selector: string, doc: Document): Element[] { const isXPath = selector.startsWith('//') || selector.startsWith('(//'); if (isXPath) { const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); const elements: Element[] = []; for (let i = 0; i < result.snapshotLength; i++) { const node = result.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { elements.push(node as Element); } } return elements; } else { return Array.from(doc.querySelectorAll(selector)); } } const listItems = evaluateSelector(args.listSel, document).slice(0, 5); Object.entries(args.fieldsData).forEach(([fieldLabel, fieldInfo]: [string, any]) => { const samples: string[] = []; const selector = fieldInfo.selector; const attribute = fieldInfo.attribute || 'innerText'; listItems.forEach((listItem: Element) => { try { const elements = evaluateSelector(selector, document); const matchingElement = elements.find((el: Element) => { return listItem.contains(el); }); if (matchingElement) { let value = ''; if (attribute === 'innerText') { value = (matchingElement.textContent || '').trim(); } else { value = matchingElement.getAttribute(attribute) || ''; } if (value && value.length > 0 && !samples.includes(value)) { samples.push(value.substring(0, 200)); } } } catch (e) { } }); results[fieldLabel] = samples; }); return results; }, { fieldsData: fields, listSel: listSelector }); return samples; } catch (error: any) { logger.error(`Error extracting field samples: ${error.message}`); logger.error(`Error stack: ${error.stack}`); Object.keys(fields).forEach(label => { fieldSamples[label] = []; }); return fieldSamples; } } /** * Generate semantic list name using LLM based on user prompt and field context */ private static async generateListName( prompt: string, url: string, fieldNames: string[], llmConfig?: { provider?: 'anthropic' | 'openai' | 'ollama'; model?: string; apiKey?: string; baseUrl?: string; } ): Promise { try { const provider = llmConfig?.provider || 'ollama'; const axios = require('axios'); const fieldContext = fieldNames.length > 0 ? `\n\nDetected fields in the list:\n${fieldNames.slice(0, 10).map((name, idx) => `${idx + 1}. ${name}`).join('\n')}` : ''; const systemPrompt = `You are a list naming assistant. Your job is to generate a clear, concise name for a data list based on the user's extraction request and the fields being extracted. RULES FOR LIST NAMING: 1. Use 1-3 words maximum (prefer 2 words) 2. Use Title Case (e.g., "Product Listings", "Job Postings") 3. Be specific and descriptive 4. Match the user's terminology when possible 5. Adapt to the domain: e-commerce (Products, Listings), jobs (Jobs, Postings), articles (Articles, News), etc. 6. Avoid generic terms like "List", "Data", "Items" unless absolutely necessary 7. Focus on WHAT is being extracted, not HOW Examples: - User wants "product listings" → "Product Listings" or "Products" - User wants "job postings" → "Job Postings" or "Jobs" - User wants "article titles" → "Articles" - User wants "company information" → "Companies" - User wants "quotes from page" → "Quotes" You must return ONLY the list name, nothing else. No JSON, no explanation, just the name.`; const userPrompt = `URL: ${url} User's extraction request: "${prompt}" ${fieldContext} TASK: Generate a concise, descriptive name for this list (1-3 words in Title Case). Return ONLY the list name, nothing else:`; let llmResponse: string; if (provider === 'ollama') { const ollamaBaseUrl = llmConfig?.baseUrl || process.env.OLLAMA_BASE_URL || 'http://localhost:11434'; const ollamaModel = llmConfig?.model || 'llama3.2-vision'; try { const response = await axios.post(`${ollamaBaseUrl}/api/chat`, { model: ollamaModel, messages: [ { role: 'system', content: systemPrompt }, { role: 'user', content: userPrompt } ], stream: false, options: { temperature: 0.1, top_p: 0.9, num_predict: 20 } }); llmResponse = response.data.message.content; } catch (ollamaError: any) { logger.error(`Ollama request failed for list naming: ${ollamaError.message}`); logger.info('Using fallback list name: "List 1"'); return 'List 1'; } } else if (provider === 'anthropic') { const anthropic = new Anthropic({ apiKey: llmConfig?.apiKey || process.env.ANTHROPIC_API_KEY }); const anthropicModel = llmConfig?.model || 'claude-3-5-sonnet-20241022'; const response = await anthropic.messages.create({ model: anthropicModel, max_tokens: 20, temperature: 0.1, messages: [{ role: 'user', content: userPrompt }], system: systemPrompt }); const textContent = response.content.find((c: any) => c.type === 'text'); llmResponse = textContent?.type === 'text' ? textContent.text : ''; } else if (provider === 'openai') { const openaiBaseUrl = llmConfig?.baseUrl || 'https://api.openai.com/v1'; const openaiModel = llmConfig?.model || 'gpt-4o-mini'; const response = await axios.post(`${openaiBaseUrl}/chat/completions`, { model: openaiModel, messages: [ { role: 'system', content: systemPrompt }, { role: 'user', content: userPrompt } ], max_tokens: 20, temperature: 0.1 }, { headers: { 'Authorization': `Bearer ${llmConfig?.apiKey || process.env.OPENAI_API_KEY}`, 'Content-Type': 'application/json' } }); llmResponse = response.data.choices[0].message.content; } else { throw new Error(`Unsupported LLM provider: ${provider}`); } let listName = (llmResponse || '').trim(); logger.info(`LLM List Naming Response: "${listName}"`); listName = listName.replace(/^["']|["']$/g, ''); listName = listName.split('\n')[0]; listName = listName.trim(); if (!listName || listName.length === 0) { throw new Error('LLM returned empty list name'); } if (listName.length > 50) { throw new Error('LLM returned list name that is too long'); } listName = listName.split(' ') .map((word: string) => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase()) .join(' '); logger.info(`✓ Generated list name: "${listName}"`); return listName; } catch (error: any) { logger.error(`Error in generateListName: ${error.message}`); logger.info('Using fallback list name: "List 1"'); return 'List 1'; } } /** * Build workflow from LLM decision */ private static async buildWorkflowFromLLMDecision( llmDecision: any, url: string, validator: SelectorValidator, prompt?: string, llmConfig?: { provider?: 'anthropic' | 'openai' | 'ollama'; model?: string; apiKey?: string; baseUrl?: string; } ): Promise { const workflow: any[] = []; workflow.push({ where: { url, selectors: [] }, what: [ { action: 'goto', args: [url] }, { action: 'waitForLoadState', args: ['networkidle'] } ] }); if (llmDecision.actionType === 'captureList') { const autoDetectResult = await validator.autoDetectListFields(llmDecision.itemSelector); if (!autoDetectResult.success || !autoDetectResult.fields || Object.keys(autoDetectResult.fields).length === 0) { throw new Error('Failed to auto-detect fields from selected group'); } logger.info('Extracting field samples and detecting pagination in parallel...'); const [fieldSamples, paginationResult] = await Promise.all([ this.extractFieldSamples( autoDetectResult.fields, autoDetectResult.listSelector || '', validator ), validator.autoDetectPagination(llmDecision.itemSelector).catch((error: any) => { logger.warn('Pagination auto-detection failed:', error.message); return { success: false, type: 'none', selector: '' }; }) ]); logger.info('Generating semantic field labels with LLM...'); const fieldLabels = await this.generateFieldLabels( autoDetectResult.fields, fieldSamples, prompt || 'Extract list data', url, llmConfig ); const renamedFields: Record = {}; Object.entries(autoDetectResult.fields).forEach(([genericLabel, fieldInfo]) => { const semanticLabel = fieldLabels[genericLabel] || genericLabel; renamedFields[semanticLabel] = fieldInfo; }); const renamedSamples: Record = {}; Object.entries(fieldSamples).forEach(([genericLabel, samples]) => { const semanticLabel = fieldLabels[genericLabel] || genericLabel; renamedSamples[semanticLabel] = samples; }); const filterResult = await this.filterFieldsByIntent( renamedFields, renamedSamples, prompt || 'Extract list data', llmConfig ); let finalFields = renamedFields; if (filterResult.confidence >= 0.8 && Object.keys(filterResult.selectedFields).length > 0) { finalFields = filterResult.selectedFields; } else if (filterResult.confidence >= 0.6 && Object.keys(filterResult.selectedFields).length > 0) { finalFields = filterResult.selectedFields; } else { logger.warn(`Low confidence (${filterResult.confidence}) or no fields selected. Using all detected fields as fallback.`); } let paginationType = 'none'; let paginationSelector = ''; if (paginationResult.success && paginationResult.type) { paginationType = paginationResult.type; paginationSelector = paginationResult.selector || ''; } const limit = llmDecision.limit || 100; logger.info(`Using limit: ${limit}`); logger.info('Generating semantic list name with LLM...'); const listName = await this.generateListName( prompt || 'Extract list data', url, Object.keys(finalFields), llmConfig ); logger.info(`Using list name: "${listName}"`); workflow[0].what.push({ action: 'scrapeList', actionId: `list-${uuid()}`, name: listName, args: [{ fields: finalFields, listSelector: autoDetectResult.listSelector, pagination: { type: paginationType, selector: paginationSelector }, limit: limit }] }); workflow[0].what.push({ action: 'waitForLoadState', args: ['networkidle'] }); } else { throw new Error(`Unsupported action type: ${llmDecision.actionType}. Only captureList is supported.`); } return workflow; } /** * Generate workflow from prompt with automatic URL detection via search * This method searches for the target website based on the user's prompt, * then generates a workflow for the best matching URL */ static async generateWorkflowFromPromptWithSearch( userPrompt: string, userId: string, llmConfig?: { provider?: 'anthropic' | 'openai' | 'ollama'; model?: string; apiKey?: string; baseUrl?: string; } ): Promise<{ success: boolean; workflow?: any[]; url?: string; errors?: string[]; }> { let browserId: string | null = null; try { const { browserId: id, page } = await createRemoteBrowserForValidation(userId); browserId = id; const intent = await this.parseSearchIntent(userPrompt, llmConfig); const searchResults = await this.performDuckDuckGoSearch(intent.searchQuery, page); if (searchResults.length === 0) { if (browserId) { await destroyRemoteBrowser(browserId, userId); } return { success: false, errors: [`No search results found for query: "${intent.searchQuery}". Please provide a URL manually or refine your prompt.`] }; } const selection = await this.selectBestUrlFromResults(searchResults, userPrompt, llmConfig); await page.goto(selection.url, { waitUntil: 'networkidle', timeout: 30000 }); await page.waitForTimeout(2000); const validator = new SelectorValidator(); await validator.initialize(page, selection.url); const validatorPage = (validator as any).page; const screenshotBuffer = await validatorPage.screenshot({ fullPage: true, type: 'jpeg', quality: 85 }); const screenshotBase64 = screenshotBuffer.toString('base64'); const elementGroups = await this.analyzePageGroups(validator); const pageHTML = await validatorPage.content(); const llmDecision = await this.getLLMDecisionWithVision( userPrompt, screenshotBase64, elementGroups, pageHTML, llmConfig ); if (intent.limit !== undefined && intent.limit !== null) { llmDecision.limit = intent.limit; } const workflow = await this.buildWorkflowFromLLMDecision(llmDecision, selection.url, validator, userPrompt, llmConfig); await validator.close(); if (browserId) { await destroyRemoteBrowser(browserId, userId); } return { success: true, workflow, url: selection.url }; } catch (error: any) { if (browserId) { try { await destroyRemoteBrowser(browserId, userId); } catch (cleanupError) { logger.warn('Failed to cleanup RemoteBrowser:', cleanupError); } } logger.error('Error in generateWorkflowFromPromptWithSearch:', error); return { success: false, errors: [error.message] }; } } /** * Parse user prompt to extract search intent */ private static async parseSearchIntent( userPrompt: string, llmConfig?: { provider?: 'anthropic' | 'openai' | 'ollama'; model?: string; apiKey?: string; baseUrl?: string; } ): Promise<{ searchQuery: string; extractionGoal: string; limit?: number | null; }> { const systemPrompt = `You are a search query extractor. Analyze the user's extraction request and identify: 1. The website or page they want to extract from (for searching) 2. What data they want to extract 3. Any limit/quantity specified Examples: - "Extract top 10 company data from YCombinator Companies site" → searchQuery: "YCombinator Companies", goal: "company data", limit: 10 - "Get first 20 laptop names and prices from Amazon" → searchQuery: "Amazon laptops", goal: "laptop names and prices", limit: 20 - "Scrape articles from TechCrunch AI section" → searchQuery: "TechCrunch AI section", goal: "articles", limit: null Return ONLY valid JSON: {"searchQuery": "...", "extractionGoal": "...", "limit": NUMBER_OR_NULL}`; const userMessage = `User request: "${userPrompt}" Extract the search query, extraction goal, and limit. Return JSON only.`; try { const provider = llmConfig?.provider || 'ollama'; const axios = require('axios'); let llmResponse: string; if (provider === 'ollama') { const ollamaBaseUrl = llmConfig?.baseUrl || process.env.OLLAMA_BASE_URL || 'http://localhost:11434'; const ollamaModel = llmConfig?.model || 'llama3.2-vision'; const jsonSchema = { type: 'object', required: ['searchQuery', 'extractionGoal'], properties: { searchQuery: { type: 'string' }, extractionGoal: { type: 'string' }, limit: { type: ['integer', 'null'] } } }; const response = await axios.post(`${ollamaBaseUrl}/api/chat`, { model: ollamaModel, messages: [ { role: 'system', content: systemPrompt }, { role: 'user', content: userMessage } ], stream: false, format: jsonSchema, options: { temperature: 0.1 } }); llmResponse = response.data.message.content; } else if (provider === 'anthropic') { const anthropic = new Anthropic({ apiKey: llmConfig?.apiKey || process.env.ANTHROPIC_API_KEY }); const anthropicModel = llmConfig?.model || 'claude-3-5-sonnet-20241022'; const response = await anthropic.messages.create({ model: anthropicModel, max_tokens: 256, temperature: 0.1, messages: [{ role: 'user', content: userMessage }], system: systemPrompt }); const textContent = response.content.find((c: any) => c.type === 'text'); llmResponse = textContent?.type === 'text' ? textContent.text : ''; } else if (provider === 'openai') { const openaiBaseUrl = llmConfig?.baseUrl || 'https://api.openai.com/v1'; const openaiModel = llmConfig?.model || 'gpt-4o-mini'; const response = await axios.post(`${openaiBaseUrl}/chat/completions`, { model: openaiModel, messages: [ { role: 'system', content: systemPrompt }, { role: 'user', content: userMessage } ], max_tokens: 256, temperature: 0.1, response_format: { type: 'json_object' } }, { headers: { 'Authorization': `Bearer ${llmConfig?.apiKey || process.env.OPENAI_API_KEY}`, 'Content-Type': 'application/json' } }); llmResponse = response.data.choices[0].message.content; } else { throw new Error(`Unsupported LLM provider: ${provider}`); } logger.info(`[WorkflowEnricher] Intent parsing response: ${llmResponse}`); let jsonStr = llmResponse.trim(); const jsonMatch = jsonStr.match(/```json\s*([\s\S]*?)\s*```/) || jsonStr.match(/```\s*([\s\S]*?)\s*```/); if (jsonMatch) { jsonStr = jsonMatch[1].trim(); } const objectMatch = jsonStr.match(/\{[\s\S]*"searchQuery"[\s\S]*\}/); if (objectMatch) { jsonStr = objectMatch[0]; } const intent = JSON.parse(jsonStr); if (!intent.searchQuery || !intent.extractionGoal) { throw new Error('Invalid intent parsing response - missing required fields'); } return { searchQuery: intent.searchQuery, extractionGoal: intent.extractionGoal, limit: intent.limit || null }; } catch (error: any) { logger.warn(`Failed to parse intent with LLM: ${error.message}`); logger.info('Using fallback heuristic intent parsing'); const fromMatch = userPrompt.match(/from\s+([^,\.]+)/i); const searchQuery = fromMatch ? fromMatch[1].trim() : userPrompt.slice(0, 50); const numberMatch = userPrompt.match(/(\d+)/); const limit = numberMatch ? parseInt(numberMatch[1], 10) : null; return { searchQuery, extractionGoal: userPrompt, limit }; } } /** * Perform DuckDuckGo search and return FIRST URL only * Simplified version - just returns the first valid URL from search results */ private static async performDuckDuckGoSearch( query: string, page: any ): Promise> { logger.info(`[WorkflowEnricher] Searching DuckDuckGo for: "${query}"`); try { const searchUrl = `https://duckduckgo.com/?q=${encodeURIComponent(query)}`; const initialDelay = 500 + Math.random() * 1000; await new Promise(resolve => setTimeout(resolve, initialDelay)); await page.goto(searchUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }); await page.waitForLoadState('load', { timeout: 10000 }).catch(() => { logger.warn('[WorkflowEnricher] Load state timeout, continuing anyway'); }); const pageLoadDelay = 2000 + Math.random() * 1500; await new Promise(resolve => setTimeout(resolve, pageLoadDelay)); await page.waitForSelector('[data-testid="result"], .result', { timeout: 5000 }).catch(() => { logger.warn('[WorkflowEnricher] DuckDuckGo results not found on initial wait'); }); const firstUrl = await page.evaluate(() => { const selectors = [ '[data-testid="result"]', 'article[data-testid="result"]', 'li[data-layout="organic"]', '.result', 'article[data-testid]' ]; let allElements: Element[] = []; for (const selector of selectors) { const elements = Array.from(document.querySelectorAll(selector)); if (elements.length > 0) { console.log(`Found ${elements.length} DDG elements with: ${selector}`); allElements = elements; break; } } if (allElements.length === 0) { console.error('No search result elements found'); return null; } const element = allElements[0]; const titleEl = element.querySelector('h2, [data-testid="result-title-a"], h3, [data-testid="result-title"]'); let linkEl = titleEl?.querySelector('a[href]') as HTMLAnchorElement; if (!linkEl) { linkEl = element.querySelector('a[href]') as HTMLAnchorElement; } if (!linkEl || !linkEl.href) return null; let actualUrl = linkEl.href; if (actualUrl.includes('uddg=')) { try { const urlParams = new URLSearchParams(actualUrl.split('?')[1]); const uddgUrl = urlParams.get('uddg'); if (uddgUrl) { actualUrl = decodeURIComponent(uddgUrl); } } catch (e) { console.log('Failed to parse uddg parameter:', e); } } if (actualUrl.includes('duckduckgo.com')) { console.log(`Skipping DDG internal URL: ${actualUrl}`); return null; } return actualUrl; }); if (!firstUrl) { logger.error('[WorkflowEnricher] No valid URL found in search results'); return []; } logger.info(`[WorkflowEnricher] Successfully extracted first URL: ${firstUrl}`); return [{ url: firstUrl, title: '', description: '', position: 1 }]; } catch (error: any) { logger.error(`[WorkflowEnricher] Search failed: ${error.message}`); throw new Error(`DuckDuckGo search failed: ${error.message}`); } } /** * Use LLM to select the best URL from search results */ private static async selectBestUrlFromResults( searchResults: any[], userPrompt: string, llmConfig?: { provider?: 'anthropic' | 'openai' | 'ollama'; model?: string; apiKey?: string; baseUrl?: string; } ): Promise<{ url: string; confidence: number; reasoning: string; }> { if (searchResults.length === 1) { return { url: searchResults[0].url, confidence: 0.8, reasoning: 'Selected first search result from DuckDuckGo' }; } const systemPrompt = `You are a URL selector. Given a list of search results and a user's extraction request, select the BEST URL that is most likely to contain the data the user wants. Consider: 1. Title and description relevance to the user's request 2. Official/authoritative sources are usually better than aggregators 3. List/directory pages are better than individual item pages 4. The URL path often gives hints about the page content Return ONLY valid JSON: {"selectedIndex": NUMBER, "confidence": NUMBER_0_TO_1, "reasoning": "brief explanation"}`; const resultsDescription = searchResults.map((r, i) => `Result ${i}: - Title: ${r.title} - URL: ${r.url} - Description: ${r.description}` ).join('\n\n'); const userMessage = `User wants to: "${userPrompt}" Available search results: ${resultsDescription} Select the BEST result index (0-${searchResults.length - 1}). Return JSON only.`; try { const provider = llmConfig?.provider || 'ollama'; const axios = require('axios'); let llmResponse: string; if (provider === 'ollama') { const ollamaBaseUrl = llmConfig?.baseUrl || process.env.OLLAMA_BASE_URL || 'http://localhost:11434'; const ollamaModel = llmConfig?.model || 'llama3.2-vision'; const jsonSchema = { type: 'object', required: ['selectedIndex', 'confidence', 'reasoning'], properties: { selectedIndex: { type: 'integer' }, confidence: { type: 'number' }, reasoning: { type: 'string' } } }; const response = await axios.post(`${ollamaBaseUrl}/api/chat`, { model: ollamaModel, messages: [ { role: 'system', content: systemPrompt }, { role: 'user', content: userMessage } ], stream: false, format: jsonSchema, options: { temperature: 0.1 } }); llmResponse = response.data.message.content; } else if (provider === 'anthropic') { const anthropic = new Anthropic({ apiKey: llmConfig?.apiKey || process.env.ANTHROPIC_API_KEY }); const anthropicModel = llmConfig?.model || 'claude-3-5-sonnet-20241022'; const response = await anthropic.messages.create({ model: anthropicModel, max_tokens: 256, temperature: 0.1, messages: [{ role: 'user', content: userMessage }], system: systemPrompt }); const textContent = response.content.find((c: any) => c.type === 'text'); llmResponse = textContent?.type === 'text' ? textContent.text : ''; } else if (provider === 'openai') { const openaiBaseUrl = llmConfig?.baseUrl || 'https://api.openai.com/v1'; const openaiModel = llmConfig?.model || 'gpt-4o-mini'; const response = await axios.post(`${openaiBaseUrl}/chat/completions`, { model: openaiModel, messages: [ { role: 'system', content: systemPrompt }, { role: 'user', content: userMessage } ], max_tokens: 256, temperature: 0.1, response_format: { type: 'json_object' } }, { headers: { 'Authorization': `Bearer ${llmConfig?.apiKey || process.env.OPENAI_API_KEY}`, 'Content-Type': 'application/json' } }); llmResponse = response.data.choices[0].message.content; } else { throw new Error(`Unsupported LLM provider: ${provider}`); } logger.info(`[WorkflowEnricher] URL selection response: ${llmResponse}`); let jsonStr = llmResponse.trim(); const jsonMatch = jsonStr.match(/```json\s*([\s\S]*?)\s*```/) || jsonStr.match(/```\s*([\s\S]*?)\s*```/); if (jsonMatch) { jsonStr = jsonMatch[1].trim(); } const objectMatch = jsonStr.match(/\{[\s\S]*"selectedIndex"[\s\S]*\}/); if (objectMatch) { jsonStr = objectMatch[0]; } const decision = JSON.parse(jsonStr); if (decision.selectedIndex === undefined || decision.selectedIndex < 0 || decision.selectedIndex >= searchResults.length) { throw new Error(`Invalid selectedIndex: ${decision.selectedIndex}`); } return { url: searchResults[decision.selectedIndex].url, confidence: decision.confidence || 0.5, reasoning: decision.reasoning || 'No reasoning provided' }; } catch (error: any) { logger.warn(`[WorkflowEnricher] Failed to select URL with LLM: ${error.message}`); logger.info('[WorkflowEnricher] Using fallback: selecting first search result'); return { url: searchResults[0].url, confidence: 0.6, reasoning: 'Selected first search result (LLM selection failed)' }; } } } ================================================ FILE: server/src/server.ts ================================================ import express from 'express'; import path from 'path'; import http from 'http'; import { Server } from "socket.io"; import cors from 'cors'; import dotenv from 'dotenv'; dotenv.config(); import { record, workflow, storage, auth, proxy, webhook } from './routes'; import { BrowserPool } from "./browser-management/classes/BrowserPool"; import logger from './logger'; import sequelize, { connectDB, syncDB } from './storage/db' import cookieParser from 'cookie-parser'; import { SERVER_PORT } from "./constants/config"; import { readdirSync } from "fs" import { fork } from 'child_process'; import { capture } from "./utils/analytics"; import swaggerUi from 'swagger-ui-express'; import swaggerSpec from './swagger/config'; import connectPgSimple from 'connect-pg-simple'; import pg from 'pg'; import session from 'express-session'; import { processQueuedRuns, recoverOrphanedRuns } from './routes/storage'; import { startWorkers } from './pgboss-worker'; import { stopPgBossClient, startPgBossClient } from './storage/pgboss' import Run from './models/Run'; const app = express(); app.use(cors({ origin: process.env.PUBLIC_URL ? process.env.PUBLIC_URL : 'http://localhost:5173', credentials: true, })); app.use(express.json()); const { Pool } = pg; const pool = new Pool({ user: process.env.DB_USER, host: process.env.DB_HOST, database: process.env.DB_NAME, password: process.env.DB_PASSWORD, port: process.env.DB_PORT ? parseInt(process.env.DB_PORT, 10) : undefined, max: 10, min: 0, idleTimeoutMillis: 30000, connectionTimeoutMillis: 10000, maxUses: 7500, allowExitOnIdle: true }); const PgSession = connectPgSimple(session); interface PgStoreOptions { pool: pg.Pool; tableName: string; createTableIfMissing?: boolean; pruneSessionInterval?: number; errorLog?: (err: Error) => void; } const sessionStore = new PgSession({ pool: pool, tableName: 'session', createTableIfMissing: true, pruneSessionInterval: 15 * 60, errorLog: (err: Error) => { logger.log('error', `Session store error: ${err.message}`); }, } as PgStoreOptions); app.use( session({ store: sessionStore, secret: process.env.SESSION_SECRET || 'mx-session', resave: false, saveUninitialized: false, cookie: { secure: process.env.NODE_ENV === 'production', maxAge: 24 * 60 * 60 * 1000, } }) ); const server = http.createServer(app); /** * Globally exported singleton instance of socket.io for socket communication with the client. */ export let io = new Server(server, { cleanupEmptyChildNamespaces: true, pingTimeout: 60000, pingInterval: 25000, maxHttpBufferSize: 1e8, transports: ['websocket', 'polling'], allowEIO3: true }); /** * {@link BrowserPool} globally exported singleton instance for managing browsers. */ export const browserPool = new BrowserPool(); export const recentRecoveries = new Map(); app.use(cookieParser()) app.use('/webhook', webhook); app.use('/record', record); app.use('/workflow', workflow); app.use('/storage', storage); app.use('/auth', auth); app.use('/proxy', proxy); app.use('/api-docs', swaggerUi.serve, swaggerUi.setup(swaggerSpec)); readdirSync(path.join(__dirname, 'api')).forEach((r) => { const route = require(path.join(__dirname, 'api', r)); const router = route.default || route; if (typeof router === 'function') { app.use('/api', router); } else { console.error(`Error: ${r} does not export a valid router`); } }); const isProduction = process.env.NODE_ENV === 'production'; const workerPath = path.resolve(__dirname, isProduction ? './schedule-worker.js' : './schedule-worker.ts'); const recordingWorkerPath = path.resolve(__dirname, isProduction ? './pgboss-worker.js' : './pgboss-worker.ts'); let workerProcess: any; let recordingWorkerProcess: any; app.get('/', function (req, res) { capture( 'maxun-oss-server-run', { event: 'server_started', } ); return res.send('Maxun server started 🚀'); }); app.use((req, res, next) => { res.header('Access-Control-Allow-Origin', process.env.PUBLIC_URL || 'http://localhost:5173'); res.header('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS'); res.header('Access-Control-Allow-Headers', 'Content-Type, Authorization'); res.header('Access-Control-Allow-Credentials', 'true'); if (req.method === 'OPTIONS') { return res.sendStatus(200); } next(); }); if (require.main === module) { const serverIntervals: NodeJS.Timeout[] = []; const processQueuedRunsInterval = setInterval(async () => { try { await processQueuedRuns(); } catch (error: any) { logger.log('error', `Error in processQueuedRuns interval: ${error.message}`); } }, 5000); serverIntervals.push(processQueuedRunsInterval); const browserPoolCleanupInterval = setInterval(() => { browserPool.cleanupStaleBrowserSlots(); }, 60000); serverIntervals.push(browserPoolCleanupInterval); server.listen(SERVER_PORT, '0.0.0.0', async () => { try { await connectDB(); await syncDB(); logger.log('info', 'Cleaning up stale browser slots...'); browserPool.cleanupStaleBrowserSlots(); await recoverOrphanedRuns(); await startPgBossClient(); await startWorkers(); io.of('/queued-run').on('connection', (socket) => { const userId = socket.handshake.query.userId as string; if (userId) { socket.join(`user-${userId}`); logger.log('info', `Client joined queued-run namespace for user: ${userId}, socket: ${socket.id}`); if (recentRecoveries.has(userId)) { const recoveries = recentRecoveries.get(userId)!; recoveries.forEach(recoveryData => { socket.emit('run-recovered', recoveryData); logger.log('info', `Sent stored recovery notification for run: ${recoveryData.runId} to user: ${userId}`); }); recentRecoveries.delete(userId); } socket.on('disconnect', () => { logger.log('info', `Client disconnected from queued-run namespace: ${socket.id}`); }); } else { logger.log('warn', `Client connected to queued-run namespace without userId: ${socket.id}`); socket.disconnect(); } }); if (!isProduction) { // Development mode if (process.platform === 'win32') { workerProcess = fork(workerPath, [], { execArgv: ['--inspect=5859'], }); workerProcess.on('message', (message: any) => { console.log(`Message from worker: ${message}`); }); workerProcess.on('error', (error: any) => { console.error(`Error in worker: ${error}`); }); workerProcess.on('exit', (code: any) => { console.log(`Worker exited with code: ${code}`); }); recordingWorkerProcess = fork(recordingWorkerPath, [], { execArgv: ['--inspect=5860'], }); recordingWorkerProcess.on('message', (message: any) => { console.log(`Message from recording worker: ${message}`); }); recordingWorkerProcess.on('error', (error: any) => { console.error(`Error in recording worker: ${error}`); }); recordingWorkerProcess.on('exit', (code: any) => { console.log(`Recording worker exited with code: ${code}`); }); } else { // Run in same process for non-Windows development try { await import('./schedule-worker'); await import('./pgboss-worker'); console.log('Workers started in main process for memory sharing'); } catch (error) { console.error('Failed to start workers in main process:', error); } } } else { // Production mode - run workers in same process for memory sharing try { await import('./schedule-worker'); await import('./pgboss-worker'); logger.log('info', 'Workers started in main process'); } catch (error: any) { logger.log('error', `Failed to start workers: ${error.message}`); process.exit(1); } } logger.log('info', `Server listening on port ${SERVER_PORT}`); } catch (error: any) { logger.log('error', `Failed to connect to the database: ${error.message}`); process.exit(1); } }); process.on('SIGINT', async () => { console.log('Main app shutting down...'); let shutdownSuccessful = true; await new Promise(resolve => setTimeout(resolve, 2000)); try { const runningBrowsers = browserPool.getAllBrowsers(); for (const [browserId, browser] of runningBrowsers) { try { if (browser && browser.interpreter) { const hasData = (browser.interpreter.serializableDataByType?.scrapeSchema?.length > 0) || (browser.interpreter.serializableDataByType?.scrapeList?.length > 0) || (browser.interpreter.binaryData?.length > 0); if (hasData) { const run = await Run.findOne({ where: { browserId, status: 'running' } }); if (run) { const limitedData = { scrapeSchemaOutput: browser.interpreter.serializableDataByType?.scrapeSchema || {}, scrapeListOutput: browser.interpreter.serializableDataByType?.scrapeList || {}, binaryOutput: browser.interpreter.binaryData || [] }; const binaryOutputRecord = limitedData.binaryOutput.reduce((acc: Record, item: any, index: number) => { const key = item.name || `Screenshot ${index + 1}`; acc[key] = { data: item.data, mimeType: item.mimeType }; return acc; }, {}); let uploadedBinaryOutput = {}; if (Object.keys(binaryOutputRecord).length > 0) { try { const { BinaryOutputService } = require('./storage/mino'); const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, binaryOutputRecord); logger.log('info', `Successfully uploaded ${Object.keys(uploadedBinaryOutput).length} screenshots to MinIO for interrupted run`); } catch (minioError: any) { logger.log('error', `Failed to upload binary data to MinIO during shutdown: ${minioError.message}`); uploadedBinaryOutput = binaryOutputRecord; } } await run.update({ status: 'failed', finishedAt: new Date().toLocaleString(), log: 'Process interrupted during execution - partial data preserved', serializableOutput: { scrapeSchema: limitedData.scrapeSchemaOutput, scrapeList: limitedData.scrapeListOutput, }, binaryOutput: uploadedBinaryOutput }); } } } } catch (browserError: any) { shutdownSuccessful = false; } } } catch (error: any) { shutdownSuccessful = false; } serverIntervals.forEach(clearInterval); try { const allBrowsers = browserPool.getAllBrowsers(); for (const [browserId, browser] of allBrowsers) { try { if (browser) { await browser.switchOff(); } } catch (browserCleanupError: any) { console.error(`Error shutting down browser ${browserId}:`, browserCleanupError.message); } } } catch (error: any) { console.error('Error during browser cleanup:', error.message); } if (!isProduction) { try { if (workerProcess) { workerProcess.kill('SIGTERM'); } if (recordingWorkerProcess) { recordingWorkerProcess.kill('SIGTERM'); } } catch (workerError: any) { console.error('Error terminating worker processes:', workerError.message); } await new Promise(resolve => setTimeout(resolve, 1000)); } try { await new Promise((resolve) => { io.close(() => { resolve(); }); }); } catch (ioError: any) { shutdownSuccessful = false; } try { await new Promise((resolve, reject) => { server.close((err) => { if (err) { reject(err); } else { resolve(); } }); }); } catch (serverError: any) { console.error('Error closing HTTP server:', serverError.message); shutdownSuccessful = false; } try { await pool.end(); } catch (poolError: any) { console.error('Error closing PostgreSQL connection pool:', poolError.message); shutdownSuccessful = false; } try { await stopPgBossClient(); } catch (pgBossError: any) { console.error('Error closing PgBoss client connection:', pgBossError.message); shutdownSuccessful = false; } try { await sequelize.close(); } catch (sequelizeError: any) { console.error('Error closing Sequelize connection:', sequelizeError.message); shutdownSuccessful = false; } console.log(`Shutdown ${shutdownSuccessful ? 'completed successfully' : 'completed with errors'}`); process.exit(shutdownSuccessful ? 0 : 1); }); process.on('unhandledRejection', (reason, promise) => { console.error('Unhandled promise rejection:', reason); if (process.env.NODE_ENV === 'production') { setTimeout(() => { process.exit(1); }, 1000); } }); process.on('uncaughtException', (error) => { console.error('Uncaught exception:', error); if (process.env.NODE_ENV === 'production') { setTimeout(() => { process.exit(1); }, 5000); } }); } ================================================ FILE: server/src/socket-connection/connection.ts ================================================ import { Namespace, Socket } from 'socket.io'; import logger from "../logger"; import { registerInputHandlers, removeInputHandlers } from '../browser-management/inputHandlers'; /** * Opens a websocket canal for duplex data transfer and registers all handlers for this data for the recording session. * Uses socket.io dynamic namespaces for multiplexing the traffic from different running remote browser instances. * @param io dynamic namespace on the socket.io server * @param callback function called after the connection is created providing the socket resource * @category BrowserManagement */ export const createSocketConnection = ( io: Namespace, userId: string, callback: (socket: Socket) => void, ) => { const onConnection = async (socket: Socket) => { logger.log('info', "Client connected " + socket.id); registerInputHandlers(socket, userId); socket.on('disconnect', () => { logger.log('info', "Client disconnected " + socket.id); removeInputHandlers(socket); logger.log('debug', "Input handlers cleaned up for socket " + socket.id); }); callback(socket); } io.on('connection', onConnection); }; /** * Opens a websocket canal for duplex data transfer for the recording run. * Uses socket.io dynamic namespaces for multiplexing the traffic from different running remote browser instances. * @param io dynamic namespace on the socket.io server * @param callback function called after the connection is created providing the socket resource * @category BrowserManagement */ export const createSocketConnectionForRun = ( io: Namespace, callback: (socket: Socket) => void, ) => { const onConnection = async (socket: Socket) => { logger.log('info', "Client connected " + socket.id); socket.on('disconnect', () => logger.log('info', "Client disconnected " + socket.id)); callback(socket); } io.on('connection', onConnection); }; ================================================ FILE: server/src/storage/db.ts ================================================ import { Sequelize } from 'sequelize'; import dotenv from 'dotenv'; dotenv.config(); if (!process.env.DB_USER || !process.env.DB_PASSWORD || !process.env.DB_HOST || !process.env.DB_PORT || !process.env.DB_NAME) { throw new Error('One or more required environment variables are missing.'); } const databaseUrl = `postgresql://${process.env.DB_USER}:${encodeURIComponent(process.env.DB_PASSWORD)}@${process.env.DB_HOST}:${process.env.DB_PORT}/${process.env.DB_NAME}`; // Extract the hostname using the URL constructor const host = new URL(databaseUrl).hostname; const sequelize = new Sequelize(databaseUrl, { host, dialect: 'postgres', logging: false, pool: { max: 10, // Maximum number of connections in pool (reduced from 20) min: 0, // Minimum number of connections in pool (let pool shrink to 0) acquire: 30000, // Maximum time (ms) to try to get connection before throwing error idle: 10000, // Maximum time (ms) a connection can be idle before being released evict: 1000, // Time interval (ms) for eviction runs }, dialectOptions: { statement_timeout: 60000, // 60 seconds }, } ); export const connectDB = async () => { try { await sequelize.authenticate(); console.log('Database connected successfully'); } catch (error) { console.error('Unable to connect to the database:', error); } }; export const syncDB = async () => { try { //setupAssociations(); const isDevelopment = process.env.NODE_ENV === 'development'; // force: true will drop and recreate tables on every run // Use `alter: true` only in development mode await sequelize.sync({ force: false, alter: isDevelopment }); console.log('Database synced successfully!'); } catch (error) { console.error('Failed to sync database:', error); } }; export default sequelize; ================================================ FILE: server/src/storage/mino.ts ================================================ import { Client } from 'minio'; import Run from '../models/Run'; const minioClient = new Client({ endPoint: process.env.MINIO_ENDPOINT ? process.env.MINIO_ENDPOINT : 'localhost', port: parseInt(process.env.MINIO_PORT || '9000'), useSSL: false, accessKey: process.env.MINIO_ACCESS_KEY || 'minio-access-key', secretKey: process.env.MINIO_SECRET_KEY || 'minio-secret-key', }); async function fixMinioBucketConfiguration(bucketName: string) { try { const exists = await minioClient.bucketExists(bucketName); if (!exists) { await minioClient.makeBucket(bucketName); console.log(`Bucket ${bucketName} created.`); } else { console.log(`Bucket ${bucketName} already exists.`); } const policyJSON = { Version: "2012-10-17", Statement: [ { Effect: "Allow", Principal: "*", Action: ["s3:GetObject"], Resource: [`arn:aws:s3:::${bucketName}/*`], }, ], }; await minioClient.setBucketPolicy(bucketName, JSON.stringify(policyJSON)); console.log(`Public-read policy applied to bucket ${bucketName}.`); } catch (error) { console.error(`Error configuring bucket ${bucketName}:`, error); throw error; } } minioClient.bucketExists('maxun-test') .then((exists) => { if (exists) { console.log('MinIO connected successfully.'); } else { console.log('MinIO connected successfully.'); } }) .catch((err) => { console.error('Error connecting to MinIO:', err); }) async function createBucketWithPolicy(bucketName: string, policy = 'public-read') { try { const bucketExists = await minioClient.bucketExists(bucketName); if (!bucketExists) { await minioClient.makeBucket(bucketName); console.log(`Bucket ${bucketName} created successfully.`); } else { console.log(`Bucket ${bucketName} already exists.`); } if (policy === 'public-read') { // Apply public-read policy after confirming the bucket exists const policyJSON = { Version: "2012-10-17", Statement: [ { Effect: "Allow", Principal: "*", Action: ["s3:GetObject"], Resource: [`arn:aws:s3:::${bucketName}/*`] } ] }; await minioClient.setBucketPolicy(bucketName, JSON.stringify(policyJSON)); console.log(`Public-read policy applied to bucket ${bucketName}.`); } } catch (error) { console.error('Error in bucket creation or policy application:', error); } } class BinaryOutputService { private bucketName: string; constructor(bucketName: string) { this.bucketName = bucketName; } /** * Uploads binary data to Minio and stores references in PostgreSQL. * @param run - The run object representing the current process. * @param binaryOutput - The binary output object containing data to upload. * @returns A map of Minio URLs pointing to the uploaded binary data. */ async uploadAndStoreBinaryOutput(run: Run, binaryOutput: Record): Promise> { const uploadedBinaryOutput: Record = {}; const plainRun = run.toJSON(); for (const key of Object.keys(binaryOutput)) { let binaryData = binaryOutput[key]; if (!plainRun.runId) { console.error('Run ID is undefined. Cannot upload binary data.'); continue; } console.log(`Processing binary output key: ${key}`); // Convert binary data to Buffer (handles base64, data URI, and old Buffer format) let bufferData: Buffer | null = null; if (binaryData && typeof binaryData === 'object' && binaryData.data) { const dataString = binaryData.data; if (typeof dataString === 'string') { try { if (dataString.startsWith('data:')) { const base64Match = dataString.match(/^data:([^;]+);base64,(.+)$/); if (base64Match) { bufferData = Buffer.from(base64Match[2], 'base64'); console.log(`Converted data URI to Buffer for key: ${key}`); } } else { try { const parsed = JSON.parse(dataString); if (parsed?.type === 'Buffer' && Array.isArray(parsed.data)) { bufferData = Buffer.from(parsed.data); console.log(`Converted JSON Buffer format for key: ${key}`); } else { bufferData = Buffer.from(dataString, 'base64'); console.log(`Converted raw base64 to Buffer for key: ${key}`); } } catch { bufferData = Buffer.from(dataString, 'base64'); console.log(`Converted raw base64 to Buffer for key: ${key}`); } } } catch (error) { console.error(`Failed to parse binary data for key ${key}:`, error); continue; } } } if (!bufferData || !Buffer.isBuffer(bufferData)) { console.error(`Invalid or empty buffer for key ${key}`); continue; } try { await fixMinioBucketConfiguration(this.bucketName); const minioKey = `${plainRun.runId}/${encodeURIComponent(key.trim().replace(/\s+/g, '_'))}`; console.log(`Uploading to bucket ${this.bucketName} with key ${minioKey}`); await minioClient.putObject( this.bucketName, minioKey, bufferData, bufferData.length, { 'Content-Type': binaryData.mimeType || 'image/png' } ); const publicHost = process.env.MINIO_PUBLIC_HOST || 'http://localhost'; const publicPort = process.env.MINIO_PORT || '9000'; const publicUrl = `${publicHost}:${publicPort}/${this.bucketName}/${minioKey}`; uploadedBinaryOutput[key] = publicUrl; console.log(`✅ Uploaded and stored: ${publicUrl}`); } catch (error) { console.error(`❌ Error uploading key ${key} to MinIO:`, error); } } console.log('Uploaded Binary Output:', uploadedBinaryOutput); try { await run.update({ binaryOutput: uploadedBinaryOutput }); console.log('Run successfully updated with binary output'); } catch (updateError) { console.error('Error updating run with binary output:', updateError); } return uploadedBinaryOutput; } async uploadBinaryOutputToMinioBucket(run: Run, key: string, data: Buffer): Promise { await createBucketWithPolicy('maxun-run-screenshots', 'public-read'); const bucketName = 'maxun-run-screenshots'; try { console.log(`Uploading to bucket ${bucketName} with key ${key}`); await minioClient.putObject(bucketName, key, data, data.length, { 'Content-Type': 'image/png' }); const plainRun = run.toJSON(); plainRun.binaryOutput[key] = `minio://${bucketName}/${key}`; console.log(`Successfully uploaded to MinIO: minio://${bucketName}/${key}`); } catch (error) { console.error(`Error uploading to MinIO bucket: ${bucketName} with key: ${key}`, error); throw error; } } public async getBinaryOutputFromMinioBucket(key: string): Promise { const bucketName = 'maxun-run-screenshots'; try { console.log(`Fetching from bucket ${bucketName} with key ${key}`); const stream = await minioClient.getObject(bucketName, key); return new Promise((resolve, reject) => { const chunks: Buffer[] = []; stream.on('data', (chunk) => chunks.push(chunk)); stream.on('end', () => resolve(Buffer.concat(chunks))); stream.on('error', (error) => { console.error('Error while reading the stream from MinIO:', error); reject(error); }); }); } catch (error) { console.error(`Error fetching from MinIO bucket: ${bucketName} with key: ${key}`, error); throw error; } } } export { minioClient, BinaryOutputService }; ================================================ FILE: server/src/storage/pgboss.ts ================================================ /** * Shared PgBoss singleton for job queue operations * * This module provides a single PgBoss instance that can be safely * imported by both the main server process and routes without creating * duplicate connection pools. * * IMPORTANT: This is separate from pgboss-worker.ts which runs in a * forked child process and handles job processing. */ import PgBoss from 'pg-boss'; import logger from '../logger'; import dotenv from 'dotenv'; dotenv.config(); if (!process.env.DB_USER || !process.env.DB_PASSWORD || !process.env.DB_HOST || !process.env.DB_PORT || !process.env.DB_NAME) { throw new Error('One or more required environment variables are missing.'); } const pgBossConnectionString = `postgres://${process.env.DB_USER}:${encodeURIComponent(process.env.DB_PASSWORD)}@${process.env.DB_HOST}:${process.env.DB_PORT}/${process.env.DB_NAME}`; /** * Shared PgBoss instance for submitting jobs (NOT processing) * This instance is only used to send jobs to queues, not to work on them */ export const pgBossClient = new PgBoss({ connectionString: pgBossConnectionString, max: 3, }); let isStarted = false; /** * Initialize the PgBoss client for job submission * Should be called once during server startup */ export async function startPgBossClient(): Promise { if (isStarted) { logger.log('warn', 'PgBoss client already started, skipping...'); return; } try { await pgBossClient.start(); isStarted = true; logger.log('info', 'PgBoss client started successfully (job submission only)'); } catch (error: any) { logger.log('error', `Failed to start PgBoss client: ${error.message}`); throw error; } } /** * Stop the PgBoss client gracefully */ export async function stopPgBossClient(): Promise { if (!isStarted) { return; } try { await pgBossClient.stop(); isStarted = false; logger.log('info', 'PgBoss client stopped successfully'); } catch (error: any) { logger.log('error', `Failed to stop PgBoss client: ${error.message}`); } } // Handle graceful shutdown process.on('SIGTERM', async () => { await stopPgBossClient(); }); process.on('SIGINT', async () => { await stopPgBossClient(); }); export default pgBossClient; ================================================ FILE: server/src/storage/schedule.ts ================================================ /** * Shared scheduling utilities * These functions use the shared PgBoss client to avoid connection leaks */ import { v4 as uuid } from 'uuid'; import logger from '../logger'; import { pgBossClient } from './pgboss'; import { registerWorkerForQueue } from '../schedule-worker'; /** * Utility function to schedule a cron job using PgBoss * @param id The robot ID * @param userId The user ID * @param cronExpression The cron expression for scheduling * @param timezone The timezone for the cron expression */ export async function scheduleWorkflow(id: string, userId: string, cronExpression: string, timezone: string): Promise { try { const runId = uuid(); const queueName = `scheduled-workflow-${id}`; logger.log('info', `Scheduling workflow ${id} with cron expression ${cronExpression} in timezone ${timezone}`); await pgBossClient.createQueue(queueName); await pgBossClient.schedule(queueName, cronExpression, { id, runId, userId }, { tz: timezone } ); await registerWorkerForQueue(queueName); logger.log('info', `Scheduled workflow job for robot ${id}`); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Failed to schedule workflow: ${errorMessage}`); throw error; } } /** * Utility function to cancel a scheduled job * @param robotId The robot ID * @returns true if successful */ export async function cancelScheduledWorkflow(robotId: string) { try { const jobs = await pgBossClient.getSchedules(); const matchingJobs = jobs.filter((job: any) => { try { const data = job.data; return data && data.id === robotId; } catch { return false; } }); for (const job of matchingJobs) { logger.log('info', `Cancelling scheduled job ${job.name} for robot ${robotId}`); await pgBossClient.unschedule(job.name); } return true; } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Failed to cancel scheduled workflow: ${errorMessage}`); throw error; } } ================================================ FILE: server/src/swagger/config.ts ================================================ import swaggerJSDoc from 'swagger-jsdoc'; import path from 'path'; import fs from 'fs'; const apiDir = path.join(__dirname, '../api'); const jsGlobPattern = path.join(__dirname, '../api/*.js'); const tsGlobPattern = path.join(__dirname, '../api/*.ts'); let apis: string[]; if (fs.existsSync(apiDir)) { const files = fs.readdirSync(apiDir); const hasJsFiles = files.some(file => file.endsWith('.js')); const hasTsFiles = files.some(file => file.endsWith('.ts')); if (hasJsFiles) { apis = [jsGlobPattern]; } else if (hasTsFiles) { apis = [tsGlobPattern]; } else { throw new Error('No valid API files found! Ensure either .js or .ts files exist in the ../api/ directory.'); } } else { throw new Error('API directory not found! Ensure the ../api/ directory exists.'); } const options = { definition: { openapi: '3.0.0', info: { title: 'Website to API', version: '1.0.0', description: 'Maxun lets you get the data your robot extracted and run robots via API. All you need to do is input the Maxun API key by clicking Authorize below.', }, components: { securitySchemes: { api_key: { type: 'apiKey', in: 'header', name: 'x-api-key', description: 'API key for authorization. You can find your API key in the "API Key" section on Maxun Dashboard.', }, }, }, security: [ { api_key: [], // Apply this security scheme globally }, ], }, apis, }; const swaggerSpec = swaggerJSDoc(options); export default swaggerSpec; ================================================ FILE: server/src/types/index.ts ================================================ import {BrowserType, LaunchOptions} from "playwright-core"; /** * Interpreter settings properties including recording parameters. * @category Types */ export interface InterpreterSettings { maxConcurrency: number; maxRepeats: number; debug: boolean; params?: any; } /** * Useful coordinates interface holding the x and y coordinates of a point. * @category Types */ export interface Coordinates { x: number; y: number; } /** * interface to handle date picker events. * @category Types */ export interface DatePickerEventData { coordinates: Coordinates; selector: string; value: string; } /** * Holds the deltas of a wheel/scroll event. * @category Types */ export interface ScrollDeltas { deltaX: number; deltaY: number; } /** * Options for the {@link BrowserManagement.launch} method. * Wraps the Playwright's launchOptions and adds an extra browser option. * The browser option determines which browser to launch as Playwright * supports multiple browsers. (chromium, firefox, webkit) * -- Possible expansion for the future of the browser recorder -- * @category Types */ export interface RemoteBrowserOptions { browser: BrowserType launchOptions: LaunchOptions }; /** * Pairs a pressed key value with the coordinates of the key press. * @category Types */ export interface KeyboardInput { key: string; coordinates: Coordinates; } /** * Contains index in the current workflow and result for over-shadowing check of a pair. * @category Types */ export type PossibleOverShadow = { index: number; isOverShadowing: boolean; } /** * An object representing he coordinates, width, height and corner points of the element. * @category Types */ export interface Rectangle extends Coordinates { width: number; height: number; top: number; right: number; bottom: number; left: number; } /** * Helpful enum used for determining the type of action currently executed by the user. * @enum {string} * @category Types */ export enum ActionType { AwaitText = 'awaitText', Click = 'click', DragAndDrop = 'dragAndDrop', Screenshot = 'screenshot', Hover = 'hover', Input = 'input', Keydown = 'keydown', Load = 'load', Navigate = 'navigate', Scroll = 'scroll', } /** * Useful enum for determining the element's tag name. * @enum {string} * @category Types */ export enum TagName { A = 'A', B = 'B', Cite = 'CITE', EM = 'EM', Input = 'INPUT', Select = 'SELECT', Span = 'SPAN', Strong = 'STRONG', TextArea = 'TEXTAREA', } /** * @category Types */ export interface BaseActionInfo { tagName: string; /** * If the element only has text content inside (hint to use text selector) */ hasOnlyText: boolean; } interface IframeSelector { full: string; isIframe: boolean; } interface ShadowSelector { full: string; mode: string; } /** * Holds all the possible css selectors that has been found for an element. * @category Types */ export interface Selectors { id: string|null; generalSelector: string|null; attrSelector: string|null; testIdSelector: string|null; text: string|null; href: string|null; hrefSelector: string|null; accessibilitySelector: string|null; formSelector: string|null; iframeSelector: IframeSelector|null; shadowSelector: ShadowSelector|null; } /** * Base type for all actions. * Action types are used to determine the best selector for the user action. * They store valuable information, specific to the action. * @category Types */ export interface BaseAction extends BaseActionInfo{ type: ActionType; associatedActions: ActionType[]; inputType: string | undefined; value: string | undefined; selectors: Selectors; timestamp: number; isPassword: boolean; /** * Overrides the {@link BaseActionInfo} type of tagName for the action. */ tagName: TagName; } /** * Action type for pressing on a keyboard. * @category Types */ interface KeydownAction extends BaseAction { type: ActionType.Keydown; key: string; } /** * Action type for typing into an input field. * @category Types */ interface InputAction extends BaseAction { type: ActionType.Input; } /** * Action type for clicking on an element. * @category Types */ interface ClickAction extends BaseAction { type: ActionType.Click; } /** * Action type for drag and dropping an element. * @category Types */ interface DragAndDropAction extends BaseAction { type: ActionType.DragAndDrop; sourceX: number; sourceY: number; targetX: number; targetY: number; } /** * Action type for hovering over an element. * @category Types */ interface HoverAction extends BaseAction { type: ActionType.Hover; } /** * Action type for waiting on load. * @category Types */ interface LoadAction extends BaseAction { type: ActionType.Load; url: string; } /** * Action type for page navigation. * @category Types */ interface NavigateAction extends BaseAction { type: ActionType.Navigate; url: string; source: string; } /** * Action type for scrolling. * @category Types */ interface WheelAction extends BaseAction { type: ActionType.Scroll; deltaX: number; deltaY: number; pageXOffset: number; pageYOffset: number; } /** * Action type for taking a screenshot. * @category Types */ interface FullScreenshotAction extends BaseAction { type: ActionType.Screenshot; } /** * Action type for waiting on the filling of text input. * @category Types */ interface AwaitTextAction extends BaseAction { type: ActionType.AwaitText; text: string; } /** * Definition of the Action type. * @category Types */ export type Action = | KeydownAction | InputAction | ClickAction | DragAndDropAction | HoverAction | LoadAction | NavigateAction | WheelAction | FullScreenshotAction | AwaitTextAction; ================================================ FILE: server/src/utils/analytics.ts ================================================ import { PostHog } from 'posthog-node' import os from 'os' import fs from 'fs' import path from 'path' import { ANALYTICS_ID } from '../constants/config' const posthogClient = new PostHog( 'phc_19FEaqf2nfrvPoNcw6H7YjhERoiXJ7kamkQrvvFnQhw', { host: 'https://us.i.posthog.com' } ) const DEFAULT_DISTINCT_ID = "oss"; function getOssVersion() { try { const packageJsonPath = path.resolve(process.cwd(), 'package.json'); const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf8')); return packageJson.version || 'unknown'; } catch { return 'unknown'; } } function analyticsMetadata() { return { os: os.type().toLowerCase(), oss_version: getOssVersion(), machine: os.arch(), platform: os.platform(), node_version: process.version, environment: process.env.ENV || 'production', }; } export function capture(event: any, data = {}) { if (process.env.MAXUN_TELEMETRY !== 'true') return; const distinctId = ANALYTICS_ID || DEFAULT_DISTINCT_ID; const payload = { ...data, ...analyticsMetadata() }; posthogClient.capture({ distinctId, event, properties: payload, }); } ================================================ FILE: server/src/utils/api.ts ================================================ import crypto from 'crypto'; export const genAPIKey = (): string => { return crypto.randomBytes(24).toString('base64url'); }; ================================================ FILE: server/src/utils/auth.ts ================================================ import bcrypt from "bcrypt"; import crypto from 'crypto'; import { getEnvVariable } from './env'; export const hashPassword = (password: string): Promise => { return new Promise((resolve, reject) => { bcrypt.genSalt(12, (err, salt) => { if (err) { reject(err); } bcrypt.hash(password, salt, (err, hash) => { if (err) { reject(err); } resolve(hash); }); }); }); }; // password from frontend and hash from database export const comparePassword = (password: string, hash: string): Promise => { return bcrypt.compare(password, hash); }; export const encrypt = (text: string): string => { const ivLength = 16; const iv = crypto.randomBytes(ivLength); const algorithm = 'aes-256-cbc'; // Retrieve the encryption key or generate a new one if invalid or empty let key = getEnvVariable('ENCRYPTION_KEY'); if (!key || key.length !== 64) { // aes-256-cbc requires a 256-bit key, which is 64 hex characters console.warn('Invalid or missing ENCRYPTION_KEY, generating a new one.'); key = crypto.randomBytes(32).toString('hex'); // Generate a new 256-bit (32-byte) key } const keyBuffer = Buffer.from(key, 'hex'); const cipher = crypto.createCipheriv(algorithm, keyBuffer, iv); let encrypted = cipher.update(text, 'utf8', 'hex'); encrypted += cipher.final('hex'); return `${iv.toString('hex')}:${encrypted}`; }; export const decrypt = (encryptedText: string): string => { const [iv, encrypted] = encryptedText.split(':'); const algorithm = "aes-256-cbc"; // Retrieve the encryption key or generate a new one if invalid or empty let key = getEnvVariable('ENCRYPTION_KEY'); if (!key || key.length !== 64) { // aes-256-cbc requires a 256-bit key, which is 64 hex characters console.warn('Invalid or missing ENCRYPTION_KEY, generating a new one.'); key = crypto.randomBytes(32).toString('hex'); // Generate a new 256-bit (32-byte) key } const keyBuffer = Buffer.from(key, 'hex'); const decipher = crypto.createDecipheriv(algorithm, keyBuffer, Buffer.from(iv, 'hex')); let decrypted = decipher.update(encrypted, 'hex', 'utf8'); decrypted += decipher.final('utf8'); return decrypted; }; ================================================ FILE: server/src/utils/env.ts ================================================ // Helper function to get environment variables and throw an error if they are not set export const getEnvVariable = (key: string, defaultValue?: string): string => { const value = process.env[key] || defaultValue; if (!value) { throw new Error(`Environment variable ${key} is not defined`); } return value; }; ================================================ FILE: server/src/utils/schedule.ts ================================================ import cronParser from 'cron-parser'; import moment from 'moment-timezone'; // Function to compute next run date based on the cron pattern and timezone export function computeNextRun(cronExpression: string, timezone: string) { try { const interval = cronParser.parseExpression(cronExpression, { tz: timezone }); return interval.next().toDate(); } catch (err) { console.error('Error parsing cron expression:', err); return null; } } ================================================ FILE: server/src/workflow-management/classes/Generator.ts ================================================ import { Action, ActionType, Coordinates, TagName, DatePickerEventData } from "../../types"; import { WhereWhatPair, WorkflowFile } from 'maxun-core'; import logger from "../../logger"; import { Socket } from "socket.io"; import { Page } from "playwright-core"; import { getElementInformation, getRect, getSelectors, getChildSelectors, getNonUniqueSelectors, isRuleOvershadowing, selectorAlreadyInWorkflow } from "../selector"; import { CustomActions } from "../../../../src/shared/types"; import Robot from "../../models/Robot"; import { getBestSelectorForAction } from "../utils"; import { v4 as uuid } from "uuid"; import { capture } from "../../utils/analytics" import { decrypt, encrypt } from "../../utils/auth"; interface PersistedGeneratedData { lastUsedSelector: string; lastIndex: number | null; lastAction: string; lastUsedSelectorTagName: string; lastUsedSelectorInnerText: string; } interface MetaData { name: string; id: string; createdAt: string; pairs: number; updatedAt: string; params: string[], type?: 'extract' | 'scrape' | 'crawl' | 'search'; isLogin?: boolean; } interface InputState { selector: string; value: string; type: string; cursorPosition: number; } /** * Workflow generator is used to transform the user's interactions into an automatically * generated correct workflows, using the ability of internal state persistence and * heuristic generative algorithms. * This class also takes care of the selector generation. * @category WorkflowManagement */ export class WorkflowGenerator { /** * The socket used to communicate with the client. * @private */ private socket: Socket; /** * getList is one of the custom actions from maxun-core. * Used to provide appropriate selectors for the getList action. */ private getList: boolean = false; private listSelector: string = ''; private paginationMode: boolean = false; private poolId: string | null = null; private pageCloseListeners: Map void> = new Map(); /** * The public constructor of the WorkflowGenerator. * Takes socket for communication as a parameter and registers some important events on it. * @param socket The socket used to communicate with the client. * @constructor */ public constructor(socket: Socket, poolId: string) { this.socket = socket; this.poolId = poolId; this.registerEventHandlers(socket); this.initializeSocketListeners(); this.initializeDOMListeners(); } /** * The current workflow being recorded. * @private */ private workflowRecord: WorkflowFile = { workflow: [], }; private isDOMMode: boolean = false; /** * Metadata of the currently recorded workflow. * @private */ private recordingMeta: MetaData = { name: '', id: '', createdAt: '', pairs: 0, updatedAt: '', params: [], isLogin: false, } /** * The persistent data from the whole workflow generation process. * Used for correct generation of other user inputs. * @private */ private generatedData: PersistedGeneratedData = { lastUsedSelector: '', lastIndex: null, lastAction: '', lastUsedSelectorTagName: '', lastUsedSelectorInnerText: '', } /** * Initializes the socket listeners for the generator. */ private initializeSocketListeners() { this.socket.on('setGetList', (data: { getList: boolean }) => { this.getList = data.getList; }); this.socket.on('listSelector', (data: { selector: string }) => { this.listSelector = data.selector; }) this.socket.on('setPaginationMode', (data: { pagination: boolean }) => { this.paginationMode = data.pagination; }) } private initializeDOMListeners() { this.socket.on('dom-mode-enabled', () => { this.isDOMMode = true; logger.log('debug', 'Generator: DOM mode enabled'); }); this.socket.on('screenshot-mode-enabled', () => { this.isDOMMode = false; logger.log('debug', 'Generator: Screenshot mode enabled'); }); } /** * Registers the event handlers for all generator-related events on the socket. * @param socket The socket used to communicate with the client. * @private */ private registerEventHandlers = (socket: Socket) => { socket.on('save', (data) => { const { fileName, userId, isLogin, robotId } = data; logger.log('debug', `Saving workflow ${fileName} for user ID ${userId}`); this.saveNewWorkflow(fileName, userId, isLogin, robotId); }); socket.on('new-recording', (data) => { this.workflowRecord = { workflow: [], }; }); socket.on('activeIndex', (data) => this.generatedData.lastIndex = parseInt(data)); socket.on('decision', async ({ pair, actionType, decision, userId }) => { if (this.poolId) { // const activeBrowser = browserPool.getRemoteBrowser(id); // const currentPage = activeBrowser?.getCurrentPage(); if (!decision) { switch (actionType) { case 'customAction': // pair.where.selectors = [this.generatedData.lastUsedSelector]; if (pair.where.selectors) { pair.where.selectors = pair.where.selectors.filter( (selector: string) => selector !== this.generatedData.lastUsedSelector ); } break; default: break; } } // if (currentPage) { // await this.addPairToWorkflowAndNotifyClient(pair, currentPage); // } } }) socket.on('updatePair', (data) => { this.updatePairInWorkflow(data.index, data.pair); }) }; private async getSelectorsForSchema(page: Page, schema: Record): Promise { const selectors = Object.values(schema).map((field) => field.selector); const actionableSelectors: string[] = []; for (const selector of selectors) { const isActionable = await page.isVisible(selector).catch(() => false); if (isActionable) { actionableSelectors.push(selector); } } return actionableSelectors; } /** * Adds a newly generated pair to the workflow and notifies the client about it by * sending the updated workflow through socket. * * Checks some conditions for the correct addition of the pair. * 1. The pair's action selector is already in the workflow as a different pair's where selector * If so, the what part of the pair is added to the pair with the same where selector. * 2. The pair's where selector is located on the page at the same time as another pair's where selector, * having the same url. This state is called over-shadowing an already existing pair. * If so, the pair is merged with the previous over-shadowed pair - what part is attached and * new selector added to the where selectors. In case the over-shadowed pair is further down the * workflow array, the new pair is added to the beginning of the workflow array. * * This function also makes sure to add a waitForLoadState and a generated flag * action after every new action or pair added. The [waitForLoadState](https://playwright.dev/docs/api/class-frame#frame-wait-for-load-state) * action waits for the networkidle event to be fired, * and the generated flag action is used for making pausing the interpretation possible. * * @param pair The pair to add to the workflow. * @param page The page to use for the state checking. * @private * @returns {Promise} */ private addPairToWorkflowAndNotifyClient = async (pair: WhereWhatPair, page: Page) => { let matched = false; if (pair.what[0].action === 'scrapeSchema') { const schema = pair.what[0]?.args?.[0]; if (schema) { const additionalSelectors = await this.getSelectorsForSchema(page, schema); pair.where.selectors = [...(pair.where.selectors || []), ...additionalSelectors]; } } if (pair.where.selectors && pair.where.selectors[0]) { const match = selectorAlreadyInWorkflow(pair.where.selectors[0], this.workflowRecord.workflow); if (match) { const matchedIndex = this.workflowRecord.workflow.indexOf(match); if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') { pair.what.push({ action: 'waitForLoadState', args: ['networkidle'], }); } this.workflowRecord.workflow[matchedIndex].what = this.workflowRecord.workflow[matchedIndex].what.concat(pair.what); matched = true; } } if (!matched) { const handled = await this.handleOverShadowing(pair, page, this.generatedData.lastIndex || 0); if (!handled) { if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') { pair.what.push({ action: 'waitForLoadState', args: ['networkidle'], }); } if (this.generatedData.lastIndex === 0) { this.generatedData.lastIndex = null; this.workflowRecord.workflow.unshift(pair); } else { this.workflowRecord.workflow.splice(this.generatedData.lastIndex || 0, 0, pair); if (this.generatedData.lastIndex) { this.generatedData.lastIndex -= 1; } } } } this.socket.emit('workflow', this.workflowRecord); logger.log('info', `Workflow emitted`); }; public onDateSelection = async (page: Page, data: DatePickerEventData) => { const { selector, value } = data; try { await page.fill(selector, value); } catch (error) { console.error("Failed to fill date value:", error); } const pair: WhereWhatPair = { where: { url: this.getBestUrl(page.url()) }, what: [{ action: 'fill', args: [selector, value], }], }; await this.addPairToWorkflowAndNotifyClient(pair, page); }; public onDropdownSelection = async (page: Page, data: { selector: string, value: string }) => { const { selector, value } = data; try { await page.selectOption(selector, value); } catch (error) { console.error("Failed to fill date value:", error); } const pair: WhereWhatPair = { where: { url: this.getBestUrl(page.url()) }, what: [{ action: 'selectOption', args: [selector, value], }], }; await this.addPairToWorkflowAndNotifyClient(pair, page); }; public onTimeSelection = async (page: Page, data: { selector: string, value: string }) => { const { selector, value } = data; try { await page.fill(selector, value); } catch (error) { console.error("Failed to set time value:", error); } const pair: WhereWhatPair = { where: { url: this.getBestUrl(page.url()) }, what: [{ action: 'fill', args: [selector, value], }], }; await this.addPairToWorkflowAndNotifyClient(pair, page); }; public onDateTimeLocalSelection = async (page: Page, data: { selector: string, value: string }) => { const { selector, value } = data; try { await page.fill(selector, value); } catch (error) { console.error("Failed to fill datetime-local value:", error); } const pair: WhereWhatPair = { where: { url: this.getBestUrl(page.url()) }, what: [{ action: 'fill', args: [selector, value], }], }; await this.addPairToWorkflowAndNotifyClient(pair, page); }; public onDOMClickAction = async (page: Page, data: { selector: string, url: string, userId: string, elementInfo?: any, coordinates?: { x: number, y: number } }) => { const { selector, url, elementInfo, coordinates } = data; const pair: WhereWhatPair = { where: { url: this.getBestUrl(url), selectors: [selector] }, what: [{ action: 'click', args: [selector], }], }; if (elementInfo && coordinates && (elementInfo.tagName === 'INPUT' || elementInfo.tagName === 'TEXTAREA')) { pair.what[0] = { action: 'click', args: [selector, { position: coordinates }, { cursorIndex: 0 }], }; } this.generatedData.lastUsedSelector = selector; this.generatedData.lastAction = 'click'; await this.addPairToWorkflowAndNotifyClient(pair, page); }; public onDOMKeyboardAction = async (page: Page, data: { selector: string, key: string, url: string, userId: string, inputType?: string }) => { const { selector, key, url, inputType } = data; const pair: WhereWhatPair = { where: { url: this.getBestUrl(url), selectors: [selector] }, what: [{ action: 'press', args: [selector, encrypt(key), inputType || 'text'], }], }; this.generatedData.lastUsedSelector = selector; this.generatedData.lastAction = 'press'; await this.addPairToWorkflowAndNotifyClient(pair, page); }; public onDOMNavigation = async (page: Page, data: { url: string, currentUrl: string, userId: string }) => { const { url, currentUrl } = data; const pair: WhereWhatPair = { where: { url: this.getBestUrl(currentUrl) }, what: [{ action: 'goto', args: [url], }], }; this.generatedData.lastUsedSelector = ''; await this.addPairToWorkflowAndNotifyClient(pair, page); }; /** * Generates a pair for the click event. * @param coordinates The coordinates of the click event. * @param page The page to use for obtaining the needed data. * @returns {Promise} */ public onClick = async (coordinates: Coordinates, page: Page) => { let where: WhereWhatPair["where"] = { url: this.getBestUrl(page.url()) }; const selector = await this.generateSelector(page, coordinates, ActionType.Click); logger.log('debug', `Element's selector: ${selector}`); const elementInfo = await getElementInformation(page, coordinates, '', false); console.log("Element info: ", elementInfo); const isDropdown = elementInfo?.tagName === 'SELECT'; if (isDropdown && elementInfo.innerHTML) { const options = elementInfo.innerHTML .split(' { const valueMatch = optionHtml.match(/value="([^"]*)"/); const disabledMatch = optionHtml.includes('disabled="disabled"'); const selectedMatch = optionHtml.includes('selected="selected"'); const textMatch = optionHtml.match(/>([^<]*) { const getCursorPosition = (element: any, clickX: any) => { const text = element.value; const mirror = document.createElement('div'); const style = window.getComputedStyle(element); mirror.style.cssText = ` font: ${style.font}; line-height: ${style.lineHeight}; padding: ${style.padding}; border: ${style.border}; box-sizing: ${style.boxSizing}; white-space: ${style.whiteSpace}; overflow-wrap: ${style.overflowWrap}; position: absolute; top: -9999px; left: -9999px; width: ${element.offsetWidth}px; `; document.body.appendChild(mirror); const paddingLeft = parseFloat(style.paddingLeft); const borderLeft = parseFloat(style.borderLeftWidth); const adjustedClickX = clickX - (paddingLeft + borderLeft); let bestIndex = 0; let bestDiff = Infinity; for (let i = 0; i <= text.length; i++) { const textBeforeCursor = text.substring(0, i); const span = document.createElement('span'); span.textContent = textBeforeCursor; mirror.innerHTML = ''; mirror.appendChild(span); const textWidth = span.getBoundingClientRect().width; const diff = Math.abs(adjustedClickX - textWidth); if (diff < bestDiff) { bestIndex = i; bestDiff = diff; } } document.body.removeChild(mirror); return bestIndex; }; const element = document.querySelector(selector) as HTMLInputElement | HTMLTextAreaElement; if (!element) return null; const rect = element.getBoundingClientRect(); const relativeX = coords.x - rect.left; return { rect: { x: rect.left, y: rect.top }, cursorIndex: getCursorPosition(element, relativeX) }; }, { selector, coords: coordinates } ); if (positionAndCursor) { const relativeX = coordinates.x - positionAndCursor.rect.x; const relativeY = coordinates.y - positionAndCursor.rect.y; const pair: WhereWhatPair = { where, what: [{ action: 'click', args: [selector, { position: { x: relativeX, y: relativeY } }, { cursorIndex: positionAndCursor.cursorIndex }], }] }; if (selector) { this.generatedData.lastUsedSelector = selector; this.generatedData.lastAction = 'click'; } await this.addPairToWorkflowAndNotifyClient(pair, page); return; } } if (selector) { where.selectors = [selector]; } const pair: WhereWhatPair = { where, what: [{ action: 'click', args: [selector], }], } if (selector) { this.generatedData.lastUsedSelector = selector; this.generatedData.lastAction = 'click'; } await this.addPairToWorkflowAndNotifyClient(pair, page); }; /** * Generates a pair for the change url event. * @param newUrl The new url to be changed to. * @param page The page to use for obtaining the needed data. * @returns {Promise} */ public onChangeUrl = async (newUrl: string, page: Page) => { this.generatedData.lastUsedSelector = ''; const pair: WhereWhatPair = { where: { url: this.getBestUrl(page.url()) }, what: [ { action: 'goto', args: [newUrl], } ], } await this.addPairToWorkflowAndNotifyClient(pair, page); }; /** * Returns tag name and text content for the specified selector * used in customAction for decision modal */ private async getLastUsedSelectorInfo(page: Page, selector: string) { const elementHandle = await page.$(selector); if (elementHandle) { const tagName = await elementHandle.evaluate(el => (el as HTMLElement).tagName); // TODO: based on tagName, send data. Always innerText won't hold true. For now, can roll. const innerText = await elementHandle.evaluate(el => (el as HTMLElement).innerText); return { tagName, innerText }; } return { tagName: '', innerText: '' }; } /** * Generates a pair for the custom action event. * * @param action The type of the custom action. * @param actionId The unique identifier for this action (for updates) * @param settings The settings of the custom action (may include name and actionId). * @param page The page to use for obtaining the needed data. */ public customAction = async (action: CustomActions, actionId: string, settings: any, page: Page) => { try { let actionSettings = settings; let actionName: string | undefined; if (settings && !Array.isArray(settings)) { actionName = settings.name; actionSettings = JSON.parse(JSON.stringify(settings)); delete actionSettings.name; } const pair: WhereWhatPair = { where: { url: this.getBestUrl(page.url()) }, what: [{ action, args: actionSettings ? Array.isArray(actionSettings) ? actionSettings : [actionSettings] : [], ...(actionName ? { name: actionName } : {}), ...(actionId ? { actionId } : {}), }], }; if (actionId) { const existingIndex = this.workflowRecord.workflow.findIndex( (workflowPair) => Array.isArray(workflowPair.what) && workflowPair.what.some((whatItem: any) => whatItem.actionId === actionId) ); if (existingIndex !== -1) { const existingPair = this.workflowRecord.workflow[existingIndex]; const existingAction = existingPair.what.find((whatItem: any) => whatItem.actionId === actionId); const updatedAction = { ...existingAction, action, args: Array.isArray(actionSettings) ? actionSettings : [actionSettings], name: actionName || existingAction?.name || '', actionId, }; this.workflowRecord.workflow[existingIndex] = { where: JSON.parse(JSON.stringify(existingPair.where)), what: existingPair.what.map((whatItem: any) => whatItem.actionId === actionId ? updatedAction : whatItem ), }; if (action === 'scrapeSchema' && actionName) { this.workflowRecord.workflow.forEach((pair, index) => { pair.what.forEach((whatItem: any, whatIndex: number) => { if (whatItem.action === 'scrapeSchema' && whatItem.actionId !== actionId) { this.workflowRecord.workflow[index].what[whatIndex] = { ...whatItem, name: actionName }; } }); }); } } else { await this.addPairToWorkflowAndNotifyClient(pair, page); logger.log("debug", `Added new workflow action: ${action} with actionId: ${actionId}`); } } else { await this.addPairToWorkflowAndNotifyClient(pair, page); logger.log("debug", `Added new workflow action: ${action} without actionId`); } if (this.generatedData.lastUsedSelector) { const elementInfo = await this.getLastUsedSelectorInfo( page, this.generatedData.lastUsedSelector ); this.socket.emit('decision', { pair, actionType: 'customAction', lastData: { selector: this.generatedData.lastUsedSelector, action: this.generatedData.lastAction, tagName: elementInfo.tagName, innerText: elementInfo.innerText, }, }); } } catch (e) { const { message } = e as Error; logger.log("warn", `Error handling customAction: ${message}`); } }; /** * Returns the currently generated workflow. * @returns {WorkflowFile} */ public getWorkflowFile = () => { return this.workflowRecord; }; /** * Removes a pair from the currently generated workflow. * @param index The index of the pair to be removed. * @returns void */ public removePairFromWorkflow = (index: number) => { if (index <= this.workflowRecord.workflow.length && index >= 0) { this.workflowRecord.workflow.splice(this.workflowRecord.workflow.length - (index + 1), 1); logger.log('debug', `pair ${index}: Removed from workflow file.`); } else { logger.log('error', `Delete pair ${index}: Index out of range.`); } }; /** * Adds a new pair to the currently generated workflow. * @param index The index on which the pair should be added. * @param pair The pair to be added. * @returns void */ public addPairToWorkflow = (index: number, pair: WhereWhatPair) => { if (index === this.workflowRecord.workflow.length) { this.workflowRecord.workflow.unshift(pair); logger.log('debug', `pair ${index}: Added to workflow file.`); } else if (index < this.workflowRecord.workflow.length && index >= 0) { this.workflowRecord.workflow.splice( this.workflowRecord.workflow.length - index, 0, pair); } else { logger.log('error', `Add pair ${index}: Index out of range.`); } }; /** * Updates a pair in the currently generated workflow. * @param index The index of the pair to be updated. * @param pair The pair to be used as a replacement. * @returns void */ public updatePairInWorkflow = (index: number, pair: WhereWhatPair) => { if (index <= this.workflowRecord.workflow.length && index >= 0) { this.workflowRecord.workflow[this.workflowRecord.workflow.length - (index + 1)] = pair; } else { logger.log('error', `Update pair ${index}: Index out of range.`); } }; /** * Removes all socket listeners to prevent memory leaks * Must be called before re-registering listeners or during cleanup * @private */ private removeSocketListeners(): void { try { this.socket.removeAllListeners('setGetList'); this.socket.removeAllListeners('listSelector'); this.socket.removeAllListeners('setPaginationMode'); this.socket.removeAllListeners('dom-mode-enabled'); this.socket.removeAllListeners('screenshot-mode-enabled'); this.socket.removeAllListeners('save'); this.socket.removeAllListeners('new-recording'); this.socket.removeAllListeners('activeIndex'); this.socket.removeAllListeners('decision'); this.socket.removeAllListeners('updatePair'); logger.log('debug', 'Removed all Generator socket listeners'); } catch (error: any) { logger.warn(`Error removing Generator socket listeners: ${error.message}`); } } /** * Removes an action with the given actionId from the workflow. * Only removes the specific action from the what array, not the entire pair. * If the what array becomes empty after removal, then the entire pair is removed. * @param actionId The actionId of the action to remove * @returns boolean indicating whether an action was removed */ public removeAction = (actionId: string): boolean => { let actionWasRemoved = false; this.workflowRecord.workflow = this.workflowRecord.workflow .map((pair) => { const filteredWhat = pair.what.filter( (whatItem: any) => whatItem.actionId !== actionId ); if (filteredWhat.length < pair.what.length) { actionWasRemoved = true; if (filteredWhat.length > 0) { return { ...pair, what: filteredWhat }; } return null; } return pair; }) .filter((pair) => pair !== null) as WhereWhatPair[]; if (actionWasRemoved) { logger.log("info", `Action with actionId ${actionId} removed from workflow`); } else { logger.log("debug", `No action found with actionId ${actionId}`); } return actionWasRemoved; }; /** * Updates the socket used for communication with the client. * @param socket The socket to be used for communication. * @returns void */ public updateSocket = (socket: Socket): void => { this.socket = socket; this.registerEventHandlers(socket); this.initializeSocketListeners(); this.initializeDOMListeners(); }; /** * Cleanup method to release resources and prevent memory leaks * Must be called when the generator is no longer needed */ public cleanup(): void { try { this.removeSocketListeners(); for (const [page, listener] of this.pageCloseListeners.entries()) { try { if (!page.isClosed()) { page.removeListener('close', listener); } } catch (error: any) { logger.warn(`Error removing page close listener: ${error.message}`); } } this.pageCloseListeners.clear(); this.workflowRecord = { workflow: [] }; this.generatedData = { lastUsedSelector: '', lastIndex: null, lastAction: '', lastUsedSelectorTagName: '', lastUsedSelectorInnerText: '', }; logger.log('debug', 'Generator cleanup completed'); } catch (error: any) { logger.error(`Error during Generator cleanup: ${error.message}`); } } /** * Adds generated flag actions to the workflow's pairs' what conditions. * @param workflow The workflow for adding the generated flag actions from. * @private * @returns {WorkflowFile} */ public AddGeneratedFlags = (workflow: WorkflowFile): WorkflowFile => { const copy = JSON.parse(JSON.stringify(workflow)); for (let i = 0; i < workflow.workflow.length; i++) { copy.workflow[i].what.unshift({ action: 'flag', args: ['generated'], }); } return copy; }; /** * Enables to update the generated workflow file. * Adds a generated flag action for possible pausing during the interpretation. * Used for loading a recorded workflow to already initialized Generator. * @param workflowFile The workflow file to be used as a replacement for the current generated workflow. * @returns void */ public updateWorkflowFile = (workflowFile: WorkflowFile, meta: MetaData) => { this.recordingMeta = meta; const params = this.checkWorkflowForParams(workflowFile); if (params) { this.recordingMeta.params = params; } this.workflowRecord = workflowFile; } /** * Creates a recording metadata and stores the curren workflow * with the metadata to the file system. * @param fileName The name of the file. * @returns {Promise} */ public saveNewWorkflow = async (fileName: string, userId: number, isLogin: boolean, robotId?: string) => { const recording = this.optimizeWorkflow(this.workflowRecord); let actionType = 'saved'; try { if (robotId) { const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId }}); if (robot) { await robot.update({ recording: recording, recording_meta: { ...robot.recording_meta, pairs: recording.workflow.length, params: this.getParams() || [], updatedAt: new Date().toLocaleString(), }, }) actionType = 'retrained'; logger.log('info', `Robot retrained with id: ${robot.id}`); } } else { this.recordingMeta = { name: fileName, id: uuid(), createdAt: this.recordingMeta.createdAt || new Date().toLocaleString(), pairs: recording.workflow.length, updatedAt: new Date().toLocaleString(), params: this.getParams() || [], type: this.recordingMeta.type || 'extract', isLogin: isLogin, } const robot = await Robot.create({ userId, recording_meta: this.recordingMeta, recording: recording, }); capture( 'maxun-oss-robot-created', { robot_meta: robot.recording_meta, recording: robot.recording, } ) actionType = 'saved'; logger.log('info', `Robot saved with id: ${robot.id}`); } } catch (e) { const { message } = e as Error; logger.log('warn', `Cannot save the file to the local file system ${e}`) actionType = 'error'; } this.socket.emit('fileSaved', { actionType }); } /** * Uses a system of functions to generate a correct and unique css selector * according to the action being performed. * @param page The page to be used for obtaining the information and selector. * @param coordinates The coordinates of the element. * @param action The action for which the selector is being generated. * @private * @returns {Promise} */ private generateSelector = async (page: Page, coordinates: Coordinates, action: ActionType) => { const elementInfo = await getElementInformation(page, coordinates, this.listSelector, this.getList); const selectorBasedOnCustomAction = (this.getList === true) ? await getNonUniqueSelectors(page, coordinates, this.listSelector) : await getSelectors(page, coordinates); if (this.paginationMode && selectorBasedOnCustomAction) { const selectors = selectorBasedOnCustomAction; const selectorChain = [ selectors?.iframeSelector?.full, selectors?.shadowSelector?.full, selectors?.testIdSelector, selectors?.id, selectors?.hrefSelector, selectors?.relSelector, selectors?.accessibilitySelector, selectors?.attrSelector ] .filter(selector => selector !== null && selector !== undefined) .join(','); return selectorChain; } const bestSelector = getBestSelectorForAction( { type: action, tagName: elementInfo?.tagName as TagName || '', inputType: undefined, value: undefined, selectors: selectorBasedOnCustomAction || {}, timestamp: 0, isPassword: false, hasOnlyText: elementInfo?.hasOnlyText || false, } as Action, ); return bestSelector; } /** * Generates data for highlighting the element on client side and emits the * highlighter event to the client. * @param page The page to be used for obtaining data. * @param coordinates The coordinates of the element. * @returns {Promise} */ public generateDataForHighlighter = async (page: Page, coordinates: Coordinates) => { const rect = await getRect(page, coordinates, this.listSelector, this.getList); const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click); const elementInfo = await getElementInformation(page, coordinates, this.listSelector, this.getList); if (rect) { const highlighterData = { rect, selector: displaySelector, elementInfo, isDOMMode: this.isDOMMode, // Include shadow DOM specific information shadowInfo: elementInfo?.isShadowRoot ? { mode: elementInfo.shadowRootMode, content: elementInfo.shadowRootContent } : null }; if (this.getList === true) { if (this.listSelector !== '') { const childSelectors = await getChildSelectors(page, this.listSelector || ''); this.socket.emit('highlighter', { ...highlighterData, childSelectors }) } else { this.socket.emit('highlighter', { ...highlighterData }); } } else { this.socket.emit('highlighter', { ...highlighterData }); } } } /** * Notifies the client about the change of the url if navigation * happens after some performed action. * @param url The new url. * @param fromNavBar Whether the navigation is from the simulated browser's navbar or not. * @returns void */ public notifyUrlChange = (url: string) => { if (this.socket) { this.socket.emit('urlChanged', url); } } /** * Notifies the client about the new tab if popped-up * @param page The page to be used for obtaining data. * @param pageIndex The index of the page. * @returns void */ public notifyOnNewTab = (page: Page, pageIndex: number) => { if (this.socket) { page.on('close', () => { this.socket.emit('tabHasBeenClosed', pageIndex); }) const parsedUrl = new URL(page.url()); const host = parsedUrl.hostname?.match(/\b(?!www\.)[a-zA-Z0-9]+/g)?.join('.'); this.socket.emit('newTab', host ? host : 'new tab') } } /** * Generates a pair for navigating to the previous page. * This function alone adds the pair to the workflow and notifies the client. * It's safe to always add a go back action to the first rule in the workflow and do not check * general conditions for adding a pair to the workflow. * @param newUrl The previous page's url. * @returns void */ public onGoBack = (newUrl: string) => { //it's safe to always add a go back action to the first rule in the workflow this.workflowRecord.workflow[0].what.push({ action: 'goBack', args: [{ waitUntil: 'commit' }], }); this.notifyUrlChange(newUrl); this.socket.emit('workflow', this.workflowRecord); } /** * Generates a pair for navigating to the next page. * This function alone adds the pair to the workflow and notifies the client. * It's safe to always add a go forward action to the first rule in the workflow and do not check * general conditions for adding a pair to the workflow. * @param newUrl The next page's url. * @returns void */ public onGoForward = (newUrl: string) => { //it's safe to always add a go forward action to the first rule in the workflow this.workflowRecord.workflow[0].what.push({ action: 'goForward', args: [{ waitUntil: 'commit' }], }); this.notifyUrlChange(newUrl); this.socket.emit('workflow', this.workflowRecord); } /** * Checks and returns possible pairs that would get over-shadowed by the pair * from the current workflow. * @param pair The pair that could be over-shadowing. * @param page The page to be used for checking the visibility and accessibility of the selectors. * @private * @returns {Promise} */ private IsOverShadowingAction = async (pair: WhereWhatPair, page: Page) => { type possibleOverShadow = { index: number; isOverShadowing: boolean; } const possibleOverShadow: possibleOverShadow[] = []; const haveSameUrl = this.workflowRecord.workflow .filter((p, index) => { if (p.where.url === pair.where.url) { possibleOverShadow.push({ index: index, isOverShadowing: false }); return true; } else { return false; } }); if (haveSameUrl.length !== 0) { for (let i = 0; i < haveSameUrl.length; i++) { //@ts-ignore if (haveSameUrl[i].where.selectors && haveSameUrl[i].where.selectors.length > 0) { //@ts-ignore const isOverShadowing = await isRuleOvershadowing(haveSameUrl[i].where.selectors, page); if (isOverShadowing) { possibleOverShadow[i].isOverShadowing = true; } } } } return possibleOverShadow; } /** * General over-shadowing handler. * Checks for possible over-shadowed pairs and if found, * adds the pair to the workflow in the correct way. * @param pair The pair that could be over-shadowing. * @param page The page to be used for checking the visibility and accessibility of the selectors. * @private * @returns {Promise} */ private handleOverShadowing = async (pair: WhereWhatPair, page: Page, index: number): Promise => { const overShadowing = (await this.IsOverShadowingAction(pair, page)) .filter((p) => p.isOverShadowing); if (overShadowing.length !== 0) { for (const overShadowedAction of overShadowing) { if (overShadowedAction.index === index) { if (pair.where.selectors) { for (const selector of pair.where.selectors) { if (this.workflowRecord.workflow[index].where.selectors?.includes(selector)) { break; } else { this.workflowRecord.workflow[index].where.selectors?.push(selector); } } } this.workflowRecord.workflow[index].what = this.workflowRecord.workflow[index].what.concat(pair.what); return true; } else { return false; } } } return false; } /** * Returns the best possible url representation for a where condition according to the heuristics. * @param url The url to be checked and possibly replaced. * @private * @returns {string | {$regex: string}} */ private getBestUrl = (url: string) => { const parsedUrl = new URL(url); const protocol = parsedUrl.protocol === 'https:' || parsedUrl.protocol === 'http:' ? `${parsedUrl.protocol}//` : parsedUrl.protocol; const regex = new RegExp(/(?=.*[A-Z])/g) const search = parsedUrl.search .replace(/[.*+?^${}()|[\]\\]/g, '\\$&') .split('&').map((param, index) => { if (!regex.test(param)) { return param; } else { return '.*'; } }) .join('&'); let bestUrl; if (search) { bestUrl = { $regex: `^${protocol}${parsedUrl.host}${parsedUrl.pathname}${search}${parsedUrl.hash}` } } else { bestUrl = `${protocol}${parsedUrl.host}${parsedUrl.pathname}${parsedUrl.hash}`; } return bestUrl; } /** * Returns parameters if present in the workflow or null. * @param workflow The workflow to be checked. */ private checkWorkflowForParams = (workflow: WorkflowFile): string[] | null => { for (const pair of workflow.workflow) { for (const condition of pair.what) { if (condition.args) { const params: any[] = []; condition.args.forEach((arg) => { if (arg.$param) { params.push(arg.$param); } }) if (params.length !== 0) { return params; } } } } return null; } /** * A function for workflow optimization once finished. * @param workflow The workflow to be optimized. */ private optimizeWorkflow = (workflow: WorkflowFile) => { const inputStates = new Map(); for (const pair of workflow.workflow) { let currentIndex = 0; while (currentIndex < pair.what.length) { const condition = pair.what[currentIndex]; if (condition.action === 'click' && condition.args?.[2]?.cursorIndex !== undefined) { const selector = condition.args[0]; const cursorIndex = condition.args[2].cursorIndex; let state = inputStates.get(selector) || { selector, value: '', type: 'text', cursorPosition: -1 }; state.cursorPosition = cursorIndex; inputStates.set(selector, state); pair.what.splice(currentIndex, 1); continue; } if (condition.action === 'press' && condition.args?.[1]) { const [selector, encryptedKey, type] = condition.args; const key = decrypt(encryptedKey); let state = inputStates.get(selector); if (!state) { state = { selector, value: '', type: type || 'text', cursorPosition: -1 }; } else { state.type = type || state.type; } if (key.length === 1) { if (state.cursorPosition === -1) { state.value += key; } else { state.value = state.value.slice(0, state.cursorPosition) + key + state.value.slice(state.cursorPosition); state.cursorPosition++; } } else if (key === 'Backspace') { if (state.cursorPosition > 0) { state.value = state.value.slice(0, state.cursorPosition - 1) + state.value.slice(state.cursorPosition); state.cursorPosition--; } else if (state.cursorPosition === -1 && state.value.length > 0) { state.value = state.value.slice(0, -1); } } else if (key === 'Delete') { if (state.cursorPosition >= 0 && state.cursorPosition < state.value.length) { state.value = state.value.slice(0, state.cursorPosition) + state.value.slice(state.cursorPosition + 1); } else if (state.cursorPosition === -1 && state.value.length > 0) { state.value = state.value.slice(0, -1); } } inputStates.set(selector, state); pair.what.splice(currentIndex, 1); continue; } currentIndex++; } } for (const [selector, state] of inputStates.entries()) { if (state.value) { for (let i = workflow.workflow.length - 1; i >= 0; i--) { const pair = workflow.workflow[i]; pair.what.push({ action: 'type', args: [selector, encrypt(state.value), state.type] }, { action: 'waitForLoadState', args: ['networkidle'] }); break; } } } return workflow; }; /** * Returns workflow params from the stored metadata. */ public getParams = (): string[] | null => { return this.checkWorkflowForParams(this.workflowRecord); } /** * Clears the last generated data index. */ public clearLastIndex = () => { this.generatedData.lastIndex = null; } } ================================================ FILE: server/src/workflow-management/classes/Interpreter.ts ================================================ import Interpreter, { WorkflowFile } from "maxun-core"; import logger from "../../logger"; import { Socket } from "socket.io"; import { Page } from "playwright-core"; import { InterpreterSettings } from "../../types"; import { decrypt } from "../../utils/auth"; import Run from "../../models/Run"; /** * Decrypts any encrypted inputs in the workflow. If checkLimit is true, it will also handle the limit validation for scrapeList action. * @param workflow The workflow to decrypt. * @param checkLimit If true, it will handle the limit validation for scrapeList action. */ function processWorkflow(workflow: WorkflowFile, checkLimit: boolean = false): WorkflowFile { const processedWorkflow = JSON.parse(JSON.stringify(workflow)) as WorkflowFile; processedWorkflow.workflow.forEach((pair) => { pair.what.forEach((action) => { if (action.action === 'scrapeList' && checkLimit && Array.isArray(action.args) && action.args.length > 0) { const scrapeConfig = action.args[0]; if (scrapeConfig && typeof scrapeConfig === 'object' && 'limit' in scrapeConfig) { if (typeof scrapeConfig.limit === 'number' && scrapeConfig.limit > 5) { scrapeConfig.limit = 5; } } } if ((action.action === 'type' || action.action === 'press') && Array.isArray(action.args) && action.args.length > 1) { try { const encryptedValue = action.args[1]; if (typeof encryptedValue === 'string') { const decryptedValue = decrypt(encryptedValue); action.args[1] = decryptedValue; } else { logger.log('error', 'Encrypted value is not a string'); action.args[1] = ''; } } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Failed to decrypt input value: ${errorMessage}`); action.args[1] = ''; } } }); }); return processedWorkflow; } /** * This class implements the main interpretation functions. * It holds some information about the current interpretation process and * registers to some events to allow the client (frontend) to interact with the interpreter. * It uses the [maxun-core](https://www.npmjs.com/package/maxun-core) * library to interpret the workflow. * @category WorkflowManagement */ export class WorkflowInterpreter { /** * Socket.io socket instance enabling communication with the client (frontend) side. * @private */ private socket: Socket; /** * True if the interpretation is paused. */ public interpretationIsPaused: boolean = false; /** * The instance of the {@link Interpreter} class used to interpret the workflow. * From maxun-core. * @private */ private interpreter: Interpreter | null = null; /** * An id of the currently interpreted pair in the workflow. * @private */ private activeId: number | null = null; /** * An array of debug messages emitted by the {@link Interpreter}. */ public debugMessages: string[] = []; /** * Storage for different types of serializable data */ public serializableDataByType: { scrapeSchema: Record; scrapeList: Record; crawl: Record; search: Record; [key: string]: any; } = { scrapeSchema: {}, scrapeList: {}, crawl: {}, search: {}, }; private currentActionName: string | null = null; /** * Track the current action type being processed */ private currentActionType: string | null = null; /** * An array of all the binary data extracted from the run. */ public binaryData: { name: string; mimeType: string; data: string }[] = []; /** * Track current scrapeList index */ private currentScrapeListIndex: number = 0; /** * Track action counts to generate unique names */ private actionCounts: Record = {}; /** * Track used action names to prevent duplicates */ private usedActionNames: Set = new Set(); /** * Current run ID for real-time persistence */ private currentRunId: string | null = null; /** * Batched persistence system for performance optimization */ private persistenceBuffer: Array<{ actionType: string; data: any; listIndex?: number; timestamp: number; creditValidated: boolean; }> = []; private persistenceTimer: NodeJS.Timeout | null = null; private persistenceRetryTimer: NodeJS.Timeout | null = null; private readonly BATCH_SIZE = 5; private readonly BATCH_TIMEOUT = 3000; private readonly MAX_PERSISTENCE_RETRIES = 3; private persistenceInProgress = false; private persistenceRetryCount = 0; /** * An array of id's of the pairs from the workflow that are about to be paused. * As "breakpoints". * @private */ private breakpoints: boolean[] = []; /** * Callback to resume the interpretation after a pause. * @private */ private interpretationResume: (() => void) | null = null; /** * A public constructor taking a socket instance for communication with the client. * @param socket Socket.io socket instance enabling communication with the client (frontend) side. * @param runId Optional run ID for real-time data persistence * @constructor */ constructor(socket: Socket, runId?: string) { this.socket = socket; this.currentRunId = runId || null; } /** * Removes pausing-related socket listeners to prevent memory leaks * Must be called before re-registering listeners or during cleanup * @private */ private removePausingListeners(): void { try { this.socket.removeAllListeners('pause'); this.socket.removeAllListeners('resume'); this.socket.removeAllListeners('step'); this.socket.removeAllListeners('breakpoints'); logger.log('debug', 'Removed pausing socket listeners'); } catch (error: any) { logger.warn(`Error removing pausing listeners: ${error.message}`); } } /** * Subscribes to the events that are used to control the interpretation. * The events are pause, resume, step and breakpoints. * Step is used to interpret a single pair and pause on the other matched pair. * @returns void */ public subscribeToPausing = () => { this.removePausingListeners(); this.socket.on('pause', () => { this.interpretationIsPaused = true; }); this.socket.on('resume', () => { this.interpretationIsPaused = false; if (this.interpretationResume) { this.interpretationResume(); this.socket.emit('log', '----- The interpretation has been resumed -----', false); } else { logger.log('debug', "Resume called but no resume function is set"); } }); this.socket.on('step', () => { if (this.interpretationResume) { this.interpretationResume(); } else { logger.log('debug', "Step called but no resume function is set"); } }); this.socket.on('breakpoints', (data: boolean[]) => { logger.log('debug', "Setting breakpoints: " + data); this.breakpoints = data }); } /** * Sets up the instance of {@link Interpreter} and interprets * the workflow inside the recording editor. * Cleans up this interpreter instance after the interpretation is finished. * @param workflow The workflow to interpret. * @param page The page instance used to interact with the browser. * @param updatePageOnPause A callback to update the page after a pause. * @returns {Promise} */ public interpretRecordingInEditor = async ( workflow: WorkflowFile, page: Page, updatePageOnPause: (page: Page) => void, settings: InterpreterSettings, ) => { const params = settings.params ? settings.params : null; delete settings.params; const processedWorkflow = processWorkflow(workflow, true); const options = { ...settings, mode: 'editor', debugChannel: { activeId: (id: any) => { this.activeId = id; this.socket.emit('activePairId', id); }, debugMessage: (msg: any) => { this.debugMessages.push(`[${new Date().toLocaleString()}] ` + msg); this.socket.emit('log', msg) }, setActionType: (type: string) => { this.currentActionType = type; } }, serializableCallback: async (data: any) => { if (this.currentActionType === 'scrapeSchema') { const cumulativeScrapeSchemaData = Array.isArray(data) && data.length > 0 ? data : [data]; if (cumulativeScrapeSchemaData.length > 0) { await this.persistDataToDatabase('scrapeSchema', cumulativeScrapeSchemaData); } if (Array.isArray(data) && data.length > 0) { this.socket.emit('serializableCallback', { type: 'captureText', data }); } else { this.socket.emit('serializableCallback', { type: 'captureText', data : [data] }); } } else if (this.currentActionType === 'scrapeList') { if (data && Array.isArray(data) && data.length > 0) { await this.persistDataToDatabase('scrapeList', data, this.currentScrapeListIndex); } this.socket.emit('serializableCallback', { type: 'captureList', data }); } }, binaryCallback: async (data: string, mimetype: string) => { const binaryItem = { name: `Screenshot ${Date.now()}`, mimeType: mimetype, data: JSON.stringify(data) }; this.binaryData.push(binaryItem); await this.persistBinaryDataToDatabase(binaryItem); this.socket.emit('binaryCallback', { data, mimetype, type: 'captureScreenshot' }); } } const interpreter = new Interpreter(processedWorkflow, options); this.interpreter = interpreter; interpreter.on('flag', async (page, resume) => { if (this.activeId !== null && this.breakpoints[this.activeId]) { logger.log('debug', `breakpoint hit id: ${this.activeId}`); this.socket.emit('breakpointHit'); this.interpretationIsPaused = true; } if (this.interpretationIsPaused) { this.interpretationResume = resume; logger.log('debug', `Paused inside of flag: ${page.url()}`); updatePageOnPause(page); this.socket.emit('log', '----- The interpretation has been paused -----', false); } else { resume(); } }); this.socket.emit('log', '----- Starting the interpretation -----', false); const status = await interpreter.run(page, params); this.socket.emit('log', `----- The interpretation finished with status: ${status} -----`, false); logger.log('debug', `Interpretation finished`); await this.flushPersistenceBuffer(); this.interpreter = null; this.socket.emit('activePairId', -1); this.interpretationIsPaused = false; this.interpretationResume = null; this.socket.emit('finished'); }; /** * Stops the current process of the interpretation of the workflow. * @returns {Promise} */ public stopInterpretation = async () => { if (this.interpreter) { logger.log('info', 'Stopping the interpretation.'); this.interpreter.abort(); logger.log('info', 'maxun-core interpreter aborted - data collection stopped immediately'); await this.interpreter.stop(); this.socket.emit('log', '----- The interpretation has been stopped -----', false); await this.clearState(); } else { logger.log('error', 'Cannot stop: No active interpretation.'); } }; public clearState = async (): Promise => { if (this.persistenceBuffer.length > 0) { try { await this.flushPersistenceBuffer(); logger.log('debug', 'Successfully flushed final persistence buffer during cleanup'); } catch (error: any) { logger.log('error', `Failed to flush final persistence buffer: ${error.message}`); } } if (this.persistenceTimer) { clearTimeout(this.persistenceTimer); this.persistenceTimer = null; } if (this.persistenceRetryTimer) { clearTimeout(this.persistenceRetryTimer); this.persistenceRetryTimer = null; } if (this.interpreter) { try { if (!this.interpreter.getIsAborted()) { this.interpreter.abort(); } await this.interpreter.stop(); logger.log('debug', 'mx-cloud interpreter properly stopped during cleanup'); if (typeof this.interpreter.cleanup === 'function') { await this.interpreter.cleanup(); logger.log('debug', 'mx-cloud interpreter cleanup completed'); } } catch (error: any) { logger.log('warn', `Error stopping mx-cloud interpreter during cleanup: ${error.message}`); } } this.removePausingListeners(); this.debugMessages = []; this.interpretationIsPaused = false; this.activeId = null; this.interpreter = null; this.breakpoints = []; this.interpretationResume = null; this.currentActionType = null; this.currentActionName = null; this.serializableDataByType = { scrapeSchema: {}, scrapeList: {}, crawl: {}, search: {}, }; this.binaryData = []; this.currentScrapeListIndex = 0; this.actionCounts = {}; this.usedActionNames = new Set(); this.currentRunId = null; this.persistenceBuffer = []; this.persistenceInProgress = false; this.persistenceRetryCount = 0; } /** * Sets the current run ID for real-time persistence. * @param runId The run ID to set */ public setRunId = (runId: string): void => { this.currentRunId = runId; logger.log('debug', `Set run ID for real-time persistence: ${runId}`); }; /** * Generates a unique action name for data storage * @param actionType The type of action (scrapeList, scrapeSchema, etc.) * @param providedName Optional name provided by the action * @returns A unique action name */ private getUniqueActionName = (actionType: string, providedName?: string | null): string => { if (providedName && providedName.trim() !== '' && !this.usedActionNames.has(providedName)) { this.usedActionNames.add(providedName); return providedName; } if (!this.actionCounts[actionType]) { this.actionCounts[actionType] = 0; } let uniqueName: string; let counter = this.actionCounts[actionType]; do { counter++; if (actionType === 'scrapeList') { uniqueName = `List ${counter}`; } else if (actionType === 'scrapeSchema') { uniqueName = `Text ${counter}`; } else if (actionType === 'screenshot') { uniqueName = `Screenshot ${counter}`; } else { uniqueName = `${actionType} ${counter}`; } } while (this.usedActionNames.has(uniqueName)); this.actionCounts[actionType] = counter; this.usedActionNames.add(uniqueName); return uniqueName; }; /** * Persists extracted data to database with intelligent batching for performance * Falls back to immediate persistence for critical operations * @private */ private persistDataToDatabase = async (actionType: string, data: any, listIndex?: number): Promise => { if (!this.currentRunId) { logger.log('debug', 'No run ID available for persistence'); return; } this.addToPersistenceBatch(actionType, data, listIndex, true); if (actionType === 'scrapeSchema' || this.persistenceBuffer.length >= this.BATCH_SIZE) { await this.flushPersistenceBuffer(); } else { this.scheduleBatchFlush(); } }; /** * Persists binary data to database in real-time * @private */ private persistBinaryDataToDatabase = async (binaryItem: { name: string; mimeType: string; data: string }): Promise => { if (!this.currentRunId) { logger.log('debug', 'No run ID available for binary data persistence'); return; } try { const run = await Run.findOne({ where: { runId: this.currentRunId } }); if (!run) { logger.log('warn', `Run not found for binary data persistence: ${this.currentRunId}`); return; } const currentBinaryOutput = run.binaryOutput && typeof run.binaryOutput === 'object' ? JSON.parse(JSON.stringify(run.binaryOutput)) : {}; const baseName = binaryItem.name?.trim() || `Screenshot ${Object.keys(currentBinaryOutput).length + 1}`; let uniqueName = baseName; let counter = 1; while (currentBinaryOutput[uniqueName]) { uniqueName = `${baseName} (${counter++})`; } const updatedBinaryOutput = { ...currentBinaryOutput, [uniqueName]: binaryItem, }; await run.update({ binaryOutput: updatedBinaryOutput }); logger.log('debug', `Persisted binary data for run ${this.currentRunId}: ${binaryItem.name} (${binaryItem.mimeType})`); } catch (error: any) { logger.log('error', `Failed to persist binary data in real-time for run ${this.currentRunId}: ${error.message}`); } }; /** * Interprets the recording as a run. * @param workflow The workflow to interpret. * @param page The page instance used to interact with the browser. * @param settings The settings to use for the interpretation. */ public InterpretRecording = async ( workflow: WorkflowFile, page: Page, updatePageOnPause: (page: Page) => void, settings: InterpreterSettings ) => { const params = settings.params ? settings.params : null; delete settings.params; const processedWorkflow = processWorkflow(workflow); let mergedScrapeSchema = {}; const options = { ...settings, debugChannel: { activeId: (id: any) => { this.activeId = id; this.socket.emit('activePairId', id); }, debugMessage: (msg: any) => { this.debugMessages.push(`[${new Date().toLocaleString()}] ` + msg); this.socket.emit('debugMessage', msg) }, setActionType: (type: string) => { this.currentActionType = type; }, incrementScrapeListIndex: () => { this.currentScrapeListIndex++; }, setActionName: (name: string) => { this.currentActionName = name; }, progressUpdate: (current: number, total: number, percentage: number) => { this.socket.nsp.emit('workflowProgress', { current, total, percentage }); }, }, serializableCallback: async (data: any) => { try { if (!data || typeof data !== "object") return; let typeKey = this.currentActionType || ""; if (this.currentActionType === "scrapeList") { typeKey = "scrapeList"; } else if (this.currentActionType === "scrapeSchema") { typeKey = "scrapeSchema"; } else if (this.currentActionType === "crawl") { typeKey = "crawl"; } else if (this.currentActionType === "search") { typeKey = "search"; } if (typeKey === "scrapeList" && data.scrapeList) { data = data.scrapeList; } else if (typeKey === "scrapeSchema" && data.scrapeSchema) { data = data.scrapeSchema; } else if (typeKey === "crawl" && data.crawl) { data = data.crawl; } else if (typeKey === "search" && data.search) { data = data.search; } let actionName = ""; if (typeKey === "scrapeList" && data && typeof data === "object" && !Array.isArray(data)) { const keys = Object.keys(data); if (keys.length === 1) { actionName = keys[0]; data = data[actionName]; } else if (keys.length > 1) { actionName = keys[keys.length - 1]; data = data[actionName]; } } else if (typeKey === "crawl" && data && typeof data === "object" && !Array.isArray(data)) { const keys = Object.keys(data); if (keys.length === 1) { actionName = keys[0]; data = data[actionName]; } else if (keys.length > 1) { actionName = keys[keys.length - 1]; data = data[actionName]; } } else if (typeKey === "search" && data && typeof data === "object" && !Array.isArray(data)) { const keys = Object.keys(data); if (keys.length === 1) { actionName = keys[0]; data = data[actionName]; } else if (keys.length > 1) { actionName = keys[keys.length - 1]; data = data[actionName]; } } if (!actionName) { actionName = this.currentActionName || ""; if (typeKey === "scrapeList" && !actionName) { actionName = this.getUniqueActionName(typeKey, ""); } else if (typeKey === "crawl" && !actionName) { actionName = this.getUniqueActionName(typeKey, "Crawl Results"); } else if (typeKey === "search" && !actionName) { actionName = this.getUniqueActionName(typeKey, "Search Results"); } } let processedData; if (typeKey === "search") { processedData = data; } else { processedData = Array.isArray(data) ? data : ( data?.List ?? (data && typeof data === "object" ? Object.values(data).flat?.() ?? data : []) ); } if (!this.serializableDataByType[typeKey]) { this.serializableDataByType[typeKey] = {}; } this.serializableDataByType[typeKey][actionName] = processedData; await this.persistDataToDatabase(typeKey, { [actionName]: processedData, }); this.socket.emit("serializableCallback", { type: typeKey, name: actionName, data: processedData, }); } catch (err: any) { logger.log('error', `serializableCallback handler failed: ${err.message}`); } }, binaryCallback: async (payload: { name: string; data: Buffer; mimeType: string }) => { try { const { name, data, mimeType } = payload; const base64Data = data.toString("base64"); const uniqueName = this.getUniqueActionName('screenshot', name); const binaryItem = { name: uniqueName, mimeType, data: base64Data }; this.binaryData.push(binaryItem); await this.persistBinaryDataToDatabase(binaryItem); this.socket.emit("binaryCallback", { name: uniqueName, data: base64Data, mimeType }); } catch (err: any) { logger.log("error", `binaryCallback handler failed: ${err.message}`); } } } const interpreter = new Interpreter(processedWorkflow, options); this.interpreter = interpreter; interpreter.on('flag', async (page, resume) => { if (this.activeId !== null && this.breakpoints[this.activeId]) { logger.log('debug', `breakpoint hit id: ${this.activeId}`); this.socket.emit('breakpointHit'); this.interpretationIsPaused = true; } if (this.interpretationIsPaused) { this.interpretationResume = resume; logger.log('debug', `Paused inside of flag: ${page.url()}`); updatePageOnPause(page); this.socket.emit('log', '----- The interpretation has been paused -----', false); } else { resume(); } }); const status = await interpreter.run(page, params); await this.flushPersistenceBuffer(); const result = { log: this.debugMessages, result: status, scrapeSchemaOutput: this.serializableDataByType.scrapeSchema, scrapeListOutput: this.serializableDataByType.scrapeList, binaryOutput: this.binaryData.reduce>((acc, item) => { const key = item.name || `Screenshot ${Object.keys(acc).length + 1}`; acc[key] = { data: item.data, mimeType: item.mimeType }; return acc; }, {}) } logger.log('debug', `Interpretation finished`); return result; } /** * Returns true if an interpretation is currently running. * @returns {boolean} */ public interpretationInProgress = () => { return this.interpreter !== null; }; /** * Updates the socket used for communication with the client (frontend). * @param socket Socket.io socket instance enabling communication with the client (frontend) side. * @returns void */ public updateSocket = (socket: Socket): void => { this.socket = socket; this.subscribeToPausing(); }; /** * Adds data to persistence buffer for batched processing * @private */ private addToPersistenceBatch(actionType: string, data: any, listIndex?: number, creditValidated: boolean = false): void { this.persistenceBuffer.push({ actionType, data, listIndex, timestamp: Date.now(), creditValidated }); logger.log('debug', `Added ${actionType} to persistence buffer (${this.persistenceBuffer.length} items)`); } /** * Schedules a batched flush if not already scheduled * @private */ private scheduleBatchFlush(): void { if (!this.persistenceTimer && !this.persistenceInProgress) { this.persistenceTimer = setTimeout(async () => { await this.flushPersistenceBuffer(); }, this.BATCH_TIMEOUT); } } /** * Flushes persistence buffer to database in a single transaction * @public - Made public to allow external flush before socket emission */ public async flushPersistenceBuffer(): Promise { if (this.persistenceBuffer.length === 0 || this.persistenceInProgress || !this.currentRunId) { return; } if (this.persistenceTimer) { clearTimeout(this.persistenceTimer); this.persistenceTimer = null; } this.persistenceInProgress = true; const batchToProcess = [...this.persistenceBuffer]; this.persistenceBuffer = []; try { const sequelize = require('../../storage/db').default; await sequelize.transaction(async (transaction: any) => { const run = await Run.findOne({ where: { runId: this.currentRunId! }, transaction }); if (!run) { logger.log('warn', `Run not found for batched persistence: ${this.currentRunId}`); return; } const currentSerializableOutput = run.serializableOutput ? JSON.parse(JSON.stringify(run.serializableOutput)) : { scrapeSchema: {}, scrapeList: {}, crawl: {}, search: {} }; if (Array.isArray(currentSerializableOutput.scrapeList)) { currentSerializableOutput.scrapeList = {}; } if (Array.isArray(currentSerializableOutput.scrapeSchema)) { currentSerializableOutput.scrapeSchema = {}; } if (!currentSerializableOutput.search) { currentSerializableOutput.search = {}; } let hasUpdates = false; const mergeLists = (target: Record, updates: Record) => { for (const [key, val] of Object.entries(updates)) { const flattened = Array.isArray(val) ? val : (val?.List ?? (val && typeof val === 'object' ? Object.values(val).flat?.() ?? val : [])); target[key] = flattened; } }; for (const item of batchToProcess) { if (item.actionType === 'scrapeSchema') { if (!currentSerializableOutput.scrapeSchema || typeof currentSerializableOutput.scrapeSchema !== 'object') { currentSerializableOutput.scrapeSchema = {}; } mergeLists(currentSerializableOutput.scrapeSchema, item.data); hasUpdates = true; } else if (item.actionType === 'scrapeList') { if (!currentSerializableOutput.scrapeList || typeof currentSerializableOutput.scrapeList !== 'object') { currentSerializableOutput.scrapeList = {}; } mergeLists(currentSerializableOutput.scrapeList, item.data); hasUpdates = true; } else if (item.actionType === 'crawl') { currentSerializableOutput.crawl = { ...(currentSerializableOutput.crawl || {}), ...item.data }; hasUpdates = true; } else if (item.actionType === 'search') { currentSerializableOutput.search = { ...(currentSerializableOutput.search || {}), ...item.data }; hasUpdates = true; } } if (hasUpdates) { await run.update({ serializableOutput: currentSerializableOutput }, { transaction }); logger.log('debug', `Batched persistence: Updated run ${this.currentRunId} with ${batchToProcess.length} items`); } }); this.persistenceRetryCount = 0; } catch (error: any) { logger.log('error', `Failed to flush persistence buffer for run ${this.currentRunId}: ${error.message}`); if (!this.persistenceRetryCount) { this.persistenceRetryCount = 0; } if (this.persistenceRetryCount < this.MAX_PERSISTENCE_RETRIES) { this.persistenceBuffer.unshift(...batchToProcess); this.persistenceRetryCount++; const backoffDelay = Math.min(5000 * Math.pow(2, this.persistenceRetryCount), 30000); if (this.persistenceRetryTimer) { clearTimeout(this.persistenceRetryTimer); } this.persistenceRetryTimer = setTimeout(async () => { this.persistenceRetryTimer = null; await this.flushPersistenceBuffer(); }, backoffDelay); logger.log('warn', `Scheduling persistence retry ${this.persistenceRetryCount}/${this.MAX_PERSISTENCE_RETRIES} in ${backoffDelay}ms`); } else { logger.log('error', `Max persistence retries exceeded for run ${this.currentRunId}, dropping ${batchToProcess.length} items`); this.persistenceRetryCount = 0; } } finally { this.persistenceInProgress = false; if (this.persistenceBuffer.length > 0 && !this.persistenceTimer) { this.scheduleBatchFlush(); } } }; } ================================================ FILE: server/src/workflow-management/integrations/airtable.ts ================================================ import Airtable from "airtable"; import axios from "axios"; import logger from "../../logger"; import Run from "../../models/Run"; import Robot from "../../models/Robot"; interface AirtableUpdateTask { robotId: string; runId: string; status: 'pending' | 'completed' | 'failed'; retries: number; } interface SerializableOutput { scrapeSchema?: Record; scrapeList?: Record; markdown?: Array<{ content: string }>; html?: Array<{ content: string }>; crawl?: Record; search?: any; } const MAX_RETRIES = 3; const BASE_API_DELAY = 2000; const MAX_QUEUE_SIZE = 1000; export let airtableUpdateTasks: { [runId: string]: AirtableUpdateTask } = {}; let isProcessingAirtable = false; export function addAirtableUpdateTask(runId: string, task: AirtableUpdateTask): boolean { const currentSize = Object.keys(airtableUpdateTasks).length; if (currentSize >= MAX_QUEUE_SIZE) { logger.log('warn', `Airtable task queue full (${currentSize}/${MAX_QUEUE_SIZE}), dropping oldest task`); const oldestKey = Object.keys(airtableUpdateTasks)[0]; if (oldestKey) { delete airtableUpdateTasks[oldestKey]; } } airtableUpdateTasks[runId] = task; return true; } async function refreshAirtableToken(refreshToken: string) { try { const response = await axios.post( "https://airtable.com/oauth2/v1/token", new URLSearchParams({ grant_type: "refresh_token", refresh_token: refreshToken, client_id: process.env.AIRTABLE_CLIENT_ID!, }), { headers: { "Content-Type": "application/x-www-form-urlencoded", }, } ); return response.data; } catch (error: any) { logger.log("error", `Failed to refresh Airtable token: ${error.message}`); throw new Error(`Token refresh failed: ${error.response?.data?.error_description || error.message}`); } } function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput: Record) { const allRecords: Record[] = []; const schemaData: Array<{ Group: string; Field: string; Value: any }> = []; const listData: any[] = []; const screenshotData: Array<{ key: string; url: string }> = []; const markdownData: any[] = []; const htmlData: any[] = []; const crawlData: any[] = []; const searchData: any[] = []; if (serializableOutput.scrapeSchema) { if (Array.isArray(serializableOutput.scrapeSchema)) { for (const schemaArray of serializableOutput.scrapeSchema) { if (!Array.isArray(schemaArray)) continue; for (const schemaItem of schemaArray) { Object.entries(schemaItem || {}).forEach(([key, value]) => { if (key && key.trim() !== "" && value !== null && value !== undefined && value !== "") { schemaData.push({ Group: "Default", Field: key, Value: value }); } }); } } } else if (typeof serializableOutput.scrapeSchema === "object") { for (const [groupName, schemaArray] of Object.entries(serializableOutput.scrapeSchema)) { if (!Array.isArray(schemaArray)) continue; for (const schemaItem of schemaArray) { Object.entries(schemaItem || {}).forEach(([fieldName, value]) => { if (fieldName && fieldName.trim() !== "" && value !== null && value !== undefined && value !== "") { schemaData.push({ Group: groupName, Field: fieldName, Value: value, }); } }); } } } } if (serializableOutput.scrapeList) { if (Array.isArray(serializableOutput.scrapeList)) { for (const listArray of serializableOutput.scrapeList) { if (!Array.isArray(listArray)) continue; listArray.forEach((listItem) => { const hasContent = Object.values(listItem || {}).some( (value) => value !== null && value !== undefined && value !== "" ); if (hasContent) listData.push(listItem); }); } } else if (typeof serializableOutput.scrapeList === "object") { for (const [listName, listArray] of Object.entries(serializableOutput.scrapeList)) { if (!Array.isArray(listArray)) continue; listArray.forEach((listItem) => { const hasContent = Object.values(listItem || {}).some( (value) => value !== null && value !== undefined && value !== "" ); if (hasContent) listData.push({ List: listName, ...listItem }); }); } } } if (serializableOutput.markdown && Array.isArray(serializableOutput.markdown)) { serializableOutput.markdown.forEach((item, index) => { if (item.content) { markdownData.push({ "Index": index + 1, "Type": "Markdown", "Content": item.content }); } }); } if (serializableOutput.html && Array.isArray(serializableOutput.html)) { serializableOutput.html.forEach((item, index) => { if (item.content) { htmlData.push({ "Index": index + 1, "Type": "HTML", "Content": item.content }); } }); } if (serializableOutput.crawl && typeof serializableOutput.crawl === "object") { for (const [crawlName, crawlArray] of Object.entries(serializableOutput.crawl)) { if (Array.isArray(crawlArray)) { crawlArray.forEach((crawlItem) => { const hasContent = Object.values(crawlItem || {}).some( (value) => value !== null && value !== undefined && value !== "" ); if (hasContent) { crawlData.push({ "Crawl Type": crawlName, ...crawlItem }); } }); } } } if (serializableOutput.search) { let results: any[] = []; if (serializableOutput.search.results && Array.isArray(serializableOutput.search.results)) { results = serializableOutput.search.results; } else if (Array.isArray(serializableOutput.search)) { results = serializableOutput.search; } else { results = [serializableOutput.search]; } results.forEach((result) => { const hasContent = Object.values(result || {}).some( (value) => value !== null && value !== undefined && value !== "" ); if (hasContent) { searchData.push(result); } }); } // Collect screenshot data (handles both string and object forms safely) // if (binaryOutput && Object.keys(binaryOutput).length > 0) { // Object.entries(binaryOutput).forEach(([key, rawValue]: [string, any]) => { // if (!key || key.trim() === "") return; // let urlString = ""; // // Case 1: old format (string URL) // if (typeof rawValue === "string") { // urlString = rawValue; // } // // Case 2: new format (object with { url?, data?, mimeType? }) // else if (rawValue && typeof rawValue === "object") { // const valueObj = rawValue as { url?: string; data?: string; mimeType?: string }; // if (typeof valueObj.url === "string") { // urlString = valueObj.url; // } else if (typeof valueObj.data === "string") { // const mime = valueObj.mimeType || "image/png"; // urlString = `data:${mime};base64,${valueObj.data}`; // } // } // if (typeof urlString === "string" && urlString.trim() !== "") { // screenshotData.push({ key, url: urlString }); // } // }); // } // --- Merge all types into Airtable rows --- const maxLength = Math.max( schemaData.length, listData.length, screenshotData.length, markdownData.length, htmlData.length, crawlData.length, searchData.length ); for (let i = 0; i < maxLength; i++) { const record: Record = {}; if (i < schemaData.length) { record.Group = schemaData[i].Group; record.Label = schemaData[i].Field; record.Value = schemaData[i].Value; } if (i < listData.length) { Object.entries(listData[i] || {}).forEach(([key, value]) => { if (value !== null && value !== undefined && value !== "") { record[key] = value; } }); } if (i < screenshotData.length) { record.Key = screenshotData[i].key; record.Screenshot = screenshotData[i].url; } if (i < markdownData.length) { Object.entries(markdownData[i] || {}).forEach(([key, value]) => { if (value !== null && value !== undefined && value !== "") { record[key] = value; } }); } if (i < htmlData.length) { Object.entries(htmlData[i] || {}).forEach(([key, value]) => { if (value !== null && value !== undefined && value !== "") { record[key] = value; } }); } if (i < crawlData.length) { Object.entries(crawlData[i] || {}).forEach(([key, value]) => { if (value !== null && value !== undefined && value !== "") { record[key] = value; } }); } if (i < searchData.length) { Object.entries(searchData[i] || {}).forEach(([key, value]) => { if (value !== null && value !== undefined && value !== "") { record[key] = value; } }); } if (Object.keys(record).length > 0) { allRecords.push(record); } } // Push leftovers for (let i = maxLength; i < schemaData.length; i++) { allRecords.push({ Label: schemaData[i].Field, Value: schemaData[i].Value }); } for (let i = maxLength; i < listData.length; i++) { allRecords.push(listData[i]); } for (let i = maxLength; i < screenshotData.length; i++) { allRecords.push({ Key: screenshotData[i].key, Screenshot: screenshotData[i].url, }); } for (let i = maxLength; i < markdownData.length; i++) { allRecords.push(markdownData[i]); } for (let i = maxLength; i < htmlData.length; i++) { allRecords.push(htmlData[i]); } for (let i = maxLength; i < crawlData.length; i++) { allRecords.push(crawlData[i]); } for (let i = maxLength; i < searchData.length; i++) { allRecords.push(searchData[i]); } return allRecords; } export async function updateAirtable(robotId: string, runId: string) { try { console.log(`Starting Airtable update for run: ${runId}, robot: ${robotId}`); const run = await Run.findOne({ where: { runId } }); if (!run) throw new Error(`Run not found for runId: ${runId}`); const plainRun = run.toJSON(); if (plainRun.status !== 'success') { console.log('Run status is not success, skipping Airtable update'); return; } const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) throw new Error(`Robot not found for robotId: ${robotId}`); const plainRobot = robot.toJSON(); if (!plainRobot.airtable_base_id || !plainRobot.airtable_table_name || !plainRobot.airtable_table_id) { console.log('Airtable integration not configured'); return; } console.log(`Airtable configuration found - Base: ${plainRobot.airtable_base_id}, Table: ${plainRobot.airtable_table_name}`); const serializableOutput = plainRun.serializableOutput as SerializableOutput; const binaryOutput = plainRun.binaryOutput || {}; const mergedData = mergeRelatedData(serializableOutput, binaryOutput); if (mergedData.length > 0) { await writeDataToAirtable( robotId, plainRobot.airtable_base_id, plainRobot.airtable_table_name, plainRobot.airtable_table_id, mergedData ); console.log(`All data written to Airtable for ${robotId}`); } else { console.log(`No data to write to Airtable for ${robotId}`); } } catch (error: any) { console.error(`Airtable update failed: ${error.message}`); throw error; } } async function withTokenRefresh(robotId: string, apiCall: (accessToken: string) => Promise): Promise { const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) throw new Error(`Robot not found for robotId: ${robotId}`); let accessToken = robot.get('airtable_access_token') as string; let refreshToken = robot.get('airtable_refresh_token') as string; if (!accessToken || !refreshToken) { throw new Error('Airtable credentials not configured'); } try { return await apiCall(accessToken); } catch (error: any) { if (error.response?.status === 401 || (error.statusCode === 401) || error.message.includes('unauthorized') || error.message.includes('expired')) { logger.log("info", `Refreshing expired Airtable token for robot: ${robotId}`); try { const tokens = await refreshAirtableToken(refreshToken); await robot.update({ airtable_access_token: tokens.access_token, airtable_refresh_token: tokens.refresh_token || refreshToken }); return await apiCall(tokens.access_token); } catch (refreshError: any) { logger.log("error", `Failed to refresh token: ${refreshError.message}`); throw new Error(`Token refresh failed: ${refreshError.message}`); } } throw error; } } export async function writeDataToAirtable( robotId: string, baseId: string, tableName: string, tableId: string, data: any[] ) { if (!data || data.length === 0) { console.log('No data to write. Skipping.'); return; } try { return await withTokenRefresh(robotId, async (accessToken: string) => { const airtable = new Airtable({ apiKey: accessToken }); const base = airtable.base(baseId); await deleteEmptyRecords(base, tableName); const processedData = data.map(item => { const cleanedItem: Record = {}; for (const [key, value] of Object.entries(item)) { if (value === null || value === undefined || value === '') { cleanedItem[key] = ''; } else if (typeof value === 'object' && !Array.isArray(value)) { cleanedItem[key] = JSON.stringify(value); } else { cleanedItem[key] = value; } } return cleanedItem; }).filter(record => { return Object.values(record).some(value => value !== null && value !== undefined && value !== ''); }); if (processedData.length === 0) { console.log('No valid data to write after filtering. Skipping.'); return; } const dataFields = [...new Set(processedData.flatMap(row => Object.keys(row)))]; console.log(`Found ${dataFields.length} fields in data: ${dataFields.join(', ')}`); const existingFields = await getExistingFields(base, tableName); const missingFields = dataFields.filter(field => !existingFields.includes(field)); if (missingFields.length > 0) { console.log(`Creating ${missingFields.length} new fields: ${missingFields.join(', ')}`); for (const field of missingFields) { const sampleRow = processedData.find(row => field in row && row[field] !== ''); if (sampleRow) { const sampleValue = sampleRow[field]; try { await createAirtableField(baseId, tableName, field, sampleValue, accessToken, tableId); console.log(`Successfully created field: ${field}`); await new Promise(resolve => setTimeout(resolve, 200)); } catch (fieldError: any) { console.warn(`Warning: Could not create field "${field}": ${fieldError.message}`); } } } } console.log(`Appending all ${processedData.length} records to Airtable`); const recordsToCreate = processedData.map(record => ({ fields: record })); const BATCH_SIZE = 10; for (let i = 0; i < recordsToCreate.length; i += BATCH_SIZE) { const batch = recordsToCreate.slice(i, i + BATCH_SIZE); console.log(`Creating batch ${Math.floor(i/BATCH_SIZE) + 1} of ${Math.ceil(recordsToCreate.length/BATCH_SIZE)}`); try { await retryableAirtableCreate(base, tableName, batch); } catch (batchError: any) { console.error(`Error creating batch: ${batchError.message}`); throw batchError; } await new Promise(resolve => setTimeout(resolve, 500)); } await deleteEmptyRecords(base, tableName); logger.log('info', `Successfully processed ${processedData.length} records in Airtable`); }); } catch (error: any) { logger.log('error', `Airtable write failed: ${error.message}`); throw error; } } async function deleteEmptyRecords(base: Airtable.Base, tableName: string): Promise { console.log('Checking for empty records to clear...'); try { const existingRecords = await base(tableName).select().all(); console.log(`Found ${existingRecords.length} total records`); const emptyRecords = existingRecords.filter(record => { const fields = record.fields; return !fields || Object.keys(fields).length === 0 || Object.values(fields).every(value => value === null || value === undefined || value === ''); }); if (emptyRecords.length > 0) { console.log(`Found ${emptyRecords.length} empty records to delete`); const BATCH_SIZE = 10; for (let i = 0; i < emptyRecords.length; i += BATCH_SIZE) { const batch = emptyRecords.slice(i, i + BATCH_SIZE); const recordIds = batch.map(record => record.id); await base(tableName).destroy(recordIds); console.log(`Deleted batch ${Math.floor(i/BATCH_SIZE) + 1} of ${Math.ceil(emptyRecords.length/BATCH_SIZE)}`); } console.log(`Successfully deleted ${emptyRecords.length} empty records`); } else { console.log('No empty records found to delete'); } } catch (error: any) { console.warn(`Warning: Could not clear empty records: ${error.message}`); console.warn('Will continue without deleting empty records'); } } async function retryableAirtableCreate( base: Airtable.Base, tableName: string, batch: any[], retries = MAX_RETRIES ): Promise { try { await base(tableName).create(batch); } catch (error) { if (retries > 0) { await new Promise(resolve => setTimeout(resolve, BASE_API_DELAY)); return retryableAirtableCreate(base, tableName, batch, retries - 1); } throw error; } } // Helper functions async function getExistingFields(base: Airtable.Base, tableName: string): Promise { try { const records = await base(tableName).select({ pageSize: 5 }).firstPage(); const fieldNames = new Set(); if (records.length > 0) { records.forEach(record => { Object.keys(record.fields).forEach(field => fieldNames.add(field)); }); } const headers = Array.from(fieldNames); console.log(`Found ${headers.length} headers from records: ${headers.join(', ')}`); return headers; } catch (error) { console.warn(`Warning: Error fetching existing fields: ${error}`); return []; } } async function createAirtableField( baseId: string, tableName: string, fieldName: string, sampleValue: any, accessToken: string, tableId: string, retries = MAX_RETRIES ): Promise { try { const fieldType = inferFieldType(sampleValue); console.log(`Creating field ${fieldName} with type ${fieldType}`); const response = await axios.post( `https://api.airtable.com/v0/meta/bases/${baseId}/tables/${tableId}/fields`, { name: fieldName, type: fieldType }, { headers: { Authorization: `Bearer ${accessToken}` } } ); logger.log('info', `Created field: ${fieldName} (${fieldType})`); return response.data; } catch (error: any) { if (retries > 0 && error.response?.status === 429) { await new Promise(resolve => setTimeout(resolve, BASE_API_DELAY)); return createAirtableField(baseId, tableName, fieldName, sampleValue, accessToken, tableId, retries - 1); } if (error.response?.status === 422) { console.log(`Field ${fieldName} may already exist or has validation issues`); return; } const errorMessage = error.response?.data?.error?.message || error.message; const statusCode = error.response?.status || 'No Status Code'; console.warn(`Field creation issue (${statusCode}): ${errorMessage}`); } } function inferFieldType(value: any): string { if (value === null || value === undefined) return 'singleLineText'; if (typeof value === 'number') return 'number'; if (typeof value === 'boolean') return 'checkbox'; if (value instanceof Date) return 'dateTime'; if (Array.isArray(value)) { return value.length > 0 && typeof value[0] === 'object' ? 'multipleRecordLinks' : 'multipleSelects'; } if (typeof value === 'string' && isValidUrl(value)) return 'url'; return 'singleLineText'; } function isValidUrl(str: string): boolean { try { new URL(str); return true; } catch (_) { return false; } } export const processAirtableUpdates = async () => { if (isProcessingAirtable) { logger.log('info', 'Airtable processing already in progress, skipping'); return; } isProcessingAirtable = true; try { const maxProcessingTime = 60000; const startTime = Date.now(); while (Date.now() - startTime < maxProcessingTime) { let hasPendingTasks = false; for (const runId in airtableUpdateTasks) { const task = airtableUpdateTasks[runId]; if (task.status === 'pending') { hasPendingTasks = true; console.log(`Processing Airtable update for run: ${runId}`); try { await updateAirtable(task.robotId, task.runId); console.log(`Successfully updated Airtable for runId: ${runId}`); delete airtableUpdateTasks[runId]; } catch (error: any) { console.error(`Failed to update Airtable for run ${task.runId}:`, error); if (task.retries < MAX_RETRIES) { airtableUpdateTasks[runId].retries += 1; console.log(`Retrying task for runId: ${runId}, attempt: ${task.retries + 1}`); } else { console.log(`Max retries reached for runId: ${runId}. Removing task.`); delete airtableUpdateTasks[runId]; } } } else if (task.status === 'completed' || task.status === 'failed') { delete airtableUpdateTasks[runId]; } } if (!hasPendingTasks) { console.log('No pending Airtable update tasks, exiting processor'); break; } console.log('Waiting for 5 seconds before checking again...'); await new Promise(resolve => setTimeout(resolve, 5000)); } console.log('Airtable processing completed or timed out'); } finally { isProcessingAirtable = false; } }; ================================================ FILE: server/src/workflow-management/integrations/gsheet.ts ================================================ import { google } from "googleapis"; import logger from "../../logger"; import Run from "../../models/Run"; import Robot from "../../models/Robot"; interface GoogleSheetUpdateTask { robotId: string; runId: string; status: 'pending' | 'completed' | 'failed'; retries: number; } interface SerializableOutput { scrapeSchema?: Record; scrapeList?: Record; markdown?: Array<{ content: string }>; html?: Array<{ content: string }>; crawl?: Record; search?: any; } const MAX_RETRIES = 5; const MAX_QUEUE_SIZE = 1000; export let googleSheetUpdateTasks: { [runId: string]: GoogleSheetUpdateTask } = {}; let isProcessingGoogleSheets = false; export function addGoogleSheetUpdateTask(runId: string, task: GoogleSheetUpdateTask): boolean { const currentSize = Object.keys(googleSheetUpdateTasks).length; if (currentSize >= MAX_QUEUE_SIZE) { logger.log('warn', `Google Sheets task queue full (${currentSize}/${MAX_QUEUE_SIZE}), dropping oldest task`); const oldestKey = Object.keys(googleSheetUpdateTasks)[0]; if (oldestKey) { delete googleSheetUpdateTasks[oldestKey]; } } googleSheetUpdateTasks[runId] = task; return true; } export async function updateGoogleSheet(robotId: string, runId: string) { try { const run = await Run.findOne({ where: { runId } }); if (!run) { throw new Error(`Run not found for runId: ${runId}`); } const plainRun = run.toJSON(); if (plainRun.status === 'success') { const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) { throw new Error(`Robot not found for robotId: ${robotId}`); } const plainRobot = robot.toJSON(); const spreadsheetId = plainRobot.google_sheet_id; if (!plainRobot.google_sheet_email || !spreadsheetId) { console.log('Google Sheets integration not configured.'); return; } console.log(`Preparing to write data to Google Sheet for robot: ${robotId}, spreadsheetId: ${spreadsheetId}`); const serializableOutput = plainRun.serializableOutput as SerializableOutput; if (serializableOutput) { if (serializableOutput.scrapeSchema && typeof serializableOutput.scrapeSchema === "object") { for (const [groupName, schemaArray] of Object.entries(serializableOutput.scrapeSchema)) { if (!Array.isArray(schemaArray) || schemaArray.length === 0) continue; await processOutputType( robotId, spreadsheetId, `Schema - ${groupName}`, schemaArray, plainRobot ); } } if (serializableOutput.scrapeList && typeof serializableOutput.scrapeList === "object") { for (const [listName, listArray] of Object.entries(serializableOutput.scrapeList)) { if (!Array.isArray(listArray) || listArray.length === 0) continue; await processOutputType( robotId, spreadsheetId, `List - ${listName}`, listArray, plainRobot ); } } if (serializableOutput.markdown && Array.isArray(serializableOutput.markdown) && serializableOutput.markdown.length > 0) { const markdownData = serializableOutput.markdown.map((item, index) => ({ "Index": index + 1, "Content": item.content || "" })); await processOutputType( robotId, spreadsheetId, 'Markdown', markdownData, plainRobot ); } if (serializableOutput.html && Array.isArray(serializableOutput.html) && serializableOutput.html.length > 0) { const htmlData = serializableOutput.html.map((item, index) => ({ "Index": index + 1, "Content": item.content || "" })); await processOutputType( robotId, spreadsheetId, 'HTML', htmlData, plainRobot ); } if (serializableOutput.crawl && typeof serializableOutput.crawl === "object") { for (const [crawlName, crawlArray] of Object.entries(serializableOutput.crawl)) { if (!Array.isArray(crawlArray) || crawlArray.length === 0) continue; await processOutputType( robotId, spreadsheetId, `Crawl - ${crawlName}`, crawlArray, plainRobot ); } } if (serializableOutput.search) { let searchData: any[] = []; if (serializableOutput.search.results && Array.isArray(serializableOutput.search.results)) { searchData = serializableOutput.search.results; } else if (Array.isArray(serializableOutput.search)) { searchData = serializableOutput.search; } else { searchData = [serializableOutput.search]; } if (searchData.length > 0) { await processOutputType( robotId, spreadsheetId, 'Search Results', searchData, plainRobot ); } } } if (plainRun.binaryOutput && Object.keys(plainRun.binaryOutput).length > 0) { const screenshots = Object.entries(plainRun.binaryOutput).map(([key, url]) => ({ "Screenshot Key": key, "Screenshot URL": url })); await processOutputType( robotId, spreadsheetId, 'Screenshot', [screenshots], plainRobot ); } console.log(`Data written to Google Sheet successfully for Robot: ${robotId} and Run: ${runId}`); } else { console.log('Run status is not success or serializableOutput is missing.'); } } catch (error: any) { console.error(`Failed to write data to Google Sheet for Robot: ${robotId} and Run: ${runId}: ${error.message}`); throw error; } } async function processOutputType( robotId: string, spreadsheetId: string, outputType: string, outputData: any[], robotConfig: any ) { const data = outputData; const sheetName = outputType; if (!Array.isArray(data) || data.length === 0) { console.log(`No data to write for ${sheetName}. Skipping.`); return; } await ensureSheetExists(spreadsheetId, sheetName, robotConfig); const formattedData = data.map(item => { const flatRow: Record = {}; for (const [key, value] of Object.entries(item || {})) { flatRow[key] = typeof value === "object" && value !== null ? JSON.stringify(value) : value; } return flatRow; }); await writeDataToSheet(robotId, spreadsheetId, formattedData, sheetName, robotConfig); console.log(`Data written to ${sheetName} sheet for ${outputType} data`); } async function ensureSheetExists(spreadsheetId: string, sheetName: string, robotConfig: any) { try { const oauth2Client = getOAuth2Client(robotConfig); const sheets = google.sheets({ version: 'v4', auth: oauth2Client }); const response = await sheets.spreadsheets.get({ spreadsheetId, fields: 'sheets.properties.title' }); const existingSheets = response.data.sheets?.map((sheet: any) => sheet.properties?.title) || []; if (!existingSheets.includes(sheetName)) { await sheets.spreadsheets.batchUpdate({ spreadsheetId, requestBody: { requests: [ { addSheet: { properties: { title: sheetName } } } ] } }); console.log(`Created new sheet: ${sheetName}`); } } catch (error: any) { logger.log('error', `Error ensuring sheet exists: ${error.message}`); throw error; } } function getOAuth2Client(robotConfig: any) { const oauth2Client = new google.auth.OAuth2( process.env.GOOGLE_CLIENT_ID, process.env.GOOGLE_CLIENT_SECRET, process.env.GOOGLE_REDIRECT_URI ); oauth2Client.setCredentials({ access_token: robotConfig.google_access_token, refresh_token: robotConfig.google_refresh_token, }); return oauth2Client; } export async function writeDataToSheet( robotId: string, spreadsheetId: string, data: any[], sheetName: string = 'Sheet1', robotConfig?: any ) { try { let robot = robotConfig; if (!robot) { robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (!robot) { throw new Error(`Robot not found for robotId: ${robotId}`); } robot = robot.toJSON(); } if (!robot.google_access_token || !robot.google_refresh_token) { throw new Error('Google Sheets access not configured for user'); } const oauth2Client = new google.auth.OAuth2( process.env.GOOGLE_CLIENT_ID, process.env.GOOGLE_CLIENT_SECRET, process.env.GOOGLE_REDIRECT_URI ); oauth2Client.setCredentials({ access_token: robot.google_access_token, refresh_token: robot.google_refresh_token, }); oauth2Client.once('tokens', async (tokens: any) => { if (tokens.refresh_token || tokens.access_token) { const robotModel = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); if (robotModel) { const updateData: any = {}; if (tokens.refresh_token) updateData.google_refresh_token = tokens.refresh_token; if (tokens.access_token) updateData.google_access_token = tokens.access_token; await robotModel.update(updateData); } } }); const sheets = google.sheets({ version: 'v4', auth: oauth2Client }); const checkResponse = await sheets.spreadsheets.values.get({ spreadsheetId, range: `${sheetName}!1:1`, }); if (!data || data.length === 0) { console.log('No data to write. Exiting early.'); return; } const expectedHeaders = Object.keys(data[0]); const rows = data.map(item => Object.values(item)); const existingHeaders = checkResponse.data.values && checkResponse.data.values[0] ? checkResponse.data.values[0].map(String) : []; const isSheetEmpty = existingHeaders.length === 0; const headersMatch = !isSheetEmpty && existingHeaders.length === expectedHeaders.length && expectedHeaders.every((header, index) => existingHeaders[index] === header); let resource; if (isSheetEmpty || !headersMatch) { resource = { values: [expectedHeaders, ...rows] }; console.log(`Including headers in the append operation for sheet ${sheetName}.`); } else { resource = { values: rows }; console.log(`Headers already exist and match in sheet ${sheetName}, only appending data rows.`); } console.log(`Attempting to write to spreadsheet: ${spreadsheetId}, sheet: ${sheetName}`); const response = await sheets.spreadsheets.values.append({ spreadsheetId, range: `${sheetName}!A1`, valueInputOption: 'USER_ENTERED', requestBody: resource, }); if (response.status === 200) { console.log(`Data successfully appended to sheet: ${sheetName}`); } else { console.error('Google Sheets append failed:', response); } logger.log(`info`, `Data written to Google Sheet: ${spreadsheetId}, sheet: ${sheetName}`); } catch (error: any) { logger.log(`error`, `Error writing data to Google Sheet: ${error.message}`); throw error; } } export const processGoogleSheetUpdates = async () => { if (isProcessingGoogleSheets) { logger.log('info', 'Google Sheets processing already in progress, skipping'); return; } isProcessingGoogleSheets = true; try { const maxProcessingTime = 60000; const startTime = Date.now(); while (Date.now() - startTime < maxProcessingTime) { let hasPendingTasks = false; for (const runId in googleSheetUpdateTasks) { const task = googleSheetUpdateTasks[runId]; console.log(`Processing task for runId: ${runId}, status: ${task.status}`); if (task.status === 'pending') { hasPendingTasks = true; try { await updateGoogleSheet(task.robotId, task.runId); console.log(`Successfully updated Google Sheet for runId: ${runId}`); delete googleSheetUpdateTasks[runId]; } catch (error: any) { console.error(`Failed to update Google Sheets for run ${task.runId}:`, error); if (task.retries < MAX_RETRIES) { googleSheetUpdateTasks[runId].retries += 1; console.log(`Retrying task for runId: ${runId}, attempt: ${task.retries}`); } else { console.log(`Max retries reached for runId: ${runId}. Removing task.`); delete googleSheetUpdateTasks[runId]; } } } else if (task.status === 'completed' || task.status === 'failed') { delete googleSheetUpdateTasks[runId]; } } if (!hasPendingTasks) { console.log('No pending tasks. Exiting loop.'); break; } console.log('Waiting for 5 seconds before checking again...'); await new Promise(resolve => setTimeout(resolve, 5000)); } console.log('Google Sheets processing completed or timed out'); } finally { isProcessingGoogleSheets = false; } }; ================================================ FILE: server/src/workflow-management/scheduler/index.ts ================================================ import { v4 as uuid } from "uuid"; import { io, Socket } from "socket.io-client"; import { createRemoteBrowserForRun, destroyRemoteBrowser } from '../../browser-management/controller'; import logger from '../../logger'; import { browserPool, io as serverIo } from "../../server"; import { addGoogleSheetUpdateTask, googleSheetUpdateTasks, processGoogleSheetUpdates } from "../integrations/gsheet"; import Robot from "../../models/Robot"; import Run from "../../models/Run"; import { getDecryptedProxyConfig } from "../../routes/proxy"; import { BinaryOutputService } from "../../storage/mino"; import { capture } from "../../utils/analytics"; import { WorkflowFile } from "maxun-core"; import { Page } from "playwright-core"; import { sendWebhook } from "../../routes/webhook"; import { addAirtableUpdateTask, airtableUpdateTasks, processAirtableUpdates } from "../integrations/airtable"; import { convertPageToMarkdown, convertPageToHTML, convertPageToScreenshot } from "../../markdownify/scrape"; async function createWorkflowAndStoreMetadata(id: string, userId: string) { try { const recording = await Robot.findOne({ where: { 'recording_meta.id': id }, raw: true }); if (!recording || !recording.recording_meta || !recording.recording_meta.id) { return { success: false, error: 'Recording not found' }; } const proxyConfig = await getDecryptedProxyConfig(userId); let proxyOptions: any = {}; if (proxyConfig.proxy_url) { proxyOptions = { server: proxyConfig.proxy_url, ...(proxyConfig.proxy_username && proxyConfig.proxy_password && { username: proxyConfig.proxy_username, password: proxyConfig.proxy_password, }), }; } const browserId = createRemoteBrowserForRun(userId); const runId = uuid(); const run = await Run.create({ status: 'scheduled', name: recording.recording_meta.name, robotId: recording.id, robotMetaId: recording.recording_meta.id, startedAt: new Date().toLocaleString(), finishedAt: '', browserId, interpreterSettings: { maxConcurrency: 1, maxRepeats: 1, debug: true }, log: '', runId, runByScheduleId: uuid(), serializableOutput: {}, binaryOutput: {}, retryCount: 0 }); const plainRun = run.toJSON(); try { const runScheduledData = { runId: plainRun.runId, robotMetaId: plainRun.robotMetaId, robotName: plainRun.name, status: 'scheduled', startedAt: plainRun.startedAt, runByUserId: plainRun.runByUserId, runByScheduleId: plainRun.runByScheduleId, runByAPI: plainRun.runByAPI || false, browserId: plainRun.browserId }; serverIo.of('/queued-run').to(`user-${userId}`).emit('run-scheduled', runScheduledData); logger.log('info', `Scheduled run notification sent for run: ${plainRun.runId} to user-${userId}`); } catch (socketError: any) { logger.log('warn', `Failed to send run-scheduled notification for run ${plainRun.runId}: ${socketError.message}`); } return { browserId, runId: plainRun.runId, } } catch (e) { const { message } = e as Error; logger.log('info', `Error while scheduling a run with id: ${id}`); console.log(`Error while scheduling a run with id: ${id}:`, message); return { success: false, error: message, }; } } function withTimeout(promise: Promise, timeoutMs: number, operation: string): Promise { return Promise.race([ promise, new Promise((_, reject) => setTimeout(() => reject(new Error(`${operation} timed out after ${timeoutMs}ms`)), timeoutMs) ) ]); } async function triggerIntegrationUpdates(runId: string, robotMetaId: string): Promise { try { addGoogleSheetUpdateTask(runId, { robotId: robotMetaId, runId: runId, status: 'pending', retries: 5, }); addAirtableUpdateTask(runId, { robotId: robotMetaId, runId: runId, status: 'pending', retries: 5, }); withTimeout(processAirtableUpdates(), 65000, 'Airtable update') .catch(err => logger.log('error', `Airtable update error: ${err.message}`)); withTimeout(processGoogleSheetUpdates(), 65000, 'Google Sheets update') .catch(err => logger.log('error', `Google Sheets update error: ${err.message}`)); } catch (err: any) { logger.log('error', `Failed to update integrations for run: ${runId}: ${err.message}`); } } function AddGeneratedFlags(workflow: WorkflowFile) { const copy = JSON.parse(JSON.stringify(workflow)); for (let i = 0; i < workflow.workflow.length; i++) { copy.workflow[i].what.unshift({ action: 'flag', args: ['generated'], }); } return copy; }; async function executeRun(id: string, userId: string) { let browser: any = null; try { const run = await Run.findOne({ where: { runId: id } }); if (!run) { return { success: false, error: 'Run not found' } } const plainRun = run.toJSON(); if (run.status === 'aborted' || run.status === 'aborting') { logger.log('info', `Scheduled Run ${id} has status ${run.status}, skipping execution`); return { success: false, error: `Run has status ${run.status}` } } if (run.status === 'queued') { logger.log('info', `Scheduled Run ${id} has status 'queued', skipping stale execution - will be handled by recovery`); return { success: false, error: 'Run is queued and will be handled by recovery' } } const retryCount = plainRun.retryCount || 0; if (retryCount >= 3) { logger.log('warn', `Scheduled Run ${id} has exceeded max retries (${retryCount}/3), marking as failed`); const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId, userId }, raw: true }); await run.update({ status: 'failed', finishedAt: new Date().toLocaleString(), log: plainRun.log ? `${plainRun.log}\nMax retries exceeded (3/3) - Run failed after multiple attempts.` : `Max retries exceeded (3/3) - Run failed after multiple attempts.` }); try { const failureSocketData = { runId: plainRun.runId, robotMetaId: plainRun.robotMetaId, robotName: recording ? recording.recording_meta.name : 'Unknown Robot', status: 'failed', finishedAt: new Date().toLocaleString() }; serverIo.of(run.browserId).emit('run-completed', failureSocketData); serverIo.of('/queued-run').to(`user-${userId}`).emit('run-completed', failureSocketData); } catch (socketError: any) { logger.log('warn', `Failed to emit failure event in main catch: ${socketError.message}`); } return { success: false, error: 'Max retries exceeded' } } const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true }); if (!recording) { return { success: false, error: 'Recording not found' } } browser = browserPool.getRemoteBrowser(plainRun.browserId); if (!browser) { throw new Error('Could not access browser'); } let currentPage = await browser.getCurrentPage(); if (!currentPage) { throw new Error('Could not create a new page'); } if (recording.recording_meta.type === 'scrape') { logger.log('info', `Executing scrape robot for scheduled run ${id}`); const formats = recording.recording_meta.formats || ['markdown']; await run.update({ status: 'running', log: `Converting page to: ${formats.join(', ')}` }); try { const runStartedData = { runId: plainRun.runId, robotMetaId: plainRun.robotMetaId, robotName: recording.recording_meta.name, status: 'running', startedAt: plainRun.startedAt }; serverIo.of('/queued-run').to(`user-${userId}`).emit('run-started', runStartedData); logger.log( 'info', `Markdown robot run started notification sent for run: ${plainRun.runId} to user-${userId}` ); } catch (socketError: any) { logger.log( 'warn', `Failed to send run-started notification for markdown robot run ${plainRun.runId}: ${socketError.message}` ); } try { const url = recording.recording_meta.url; if (!url) { throw new Error('No URL specified for markdown robot'); } let markdown = ''; let html = ''; const serializableOutput: any = {}; const binaryOutput: any = {}; const SCRAPE_TIMEOUT = 120000; // Markdown conversion if (formats.includes("markdown")) { const markdownPromise = convertPageToMarkdown(url, currentPage); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`Markdown conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT); }); markdown = await Promise.race([markdownPromise, timeoutPromise]); serializableOutput.markdown = [{ content: markdown }]; } if (formats.includes("html")) { const htmlPromise = convertPageToHTML(url, currentPage); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`HTML conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT); }); html = await Promise.race([htmlPromise, timeoutPromise]); serializableOutput.html = [{ content: html }]; } if (formats.includes("screenshot-visible")) { const screenshotPromise = convertPageToScreenshot(url, currentPage, false); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`Screenshot conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT); }); const screenshotBuffer = await Promise.race([screenshotPromise, timeoutPromise]); if (!binaryOutput['screenshot-visible']) { binaryOutput['screenshot-visible'] = { data: screenshotBuffer.toString('base64'), mimeType: 'image/png' }; } } // Screenshot - full page if (formats.includes("screenshot-fullpage")) { const screenshotPromise = convertPageToScreenshot(url, currentPage, true); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`Screenshot conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT); }); const screenshotBuffer = await Promise.race([screenshotPromise, timeoutPromise]); if (!binaryOutput['screenshot-fullpage']) { binaryOutput['screenshot-fullpage'] = { data: screenshotBuffer.toString('base64'), mimeType: 'image/png' }; } } await run.update({ status: 'success', finishedAt: new Date().toLocaleString(), log: `${formats.join(', ')} conversion completed successfully`, serializableOutput, binaryOutput, }); let uploadedBinaryOutput: Record = {}; if (Object.keys(binaryOutput).length > 0) { const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, binaryOutput); await run.update({ binaryOutput: uploadedBinaryOutput }); } logger.log('info', `Markdown robot execution completed for scheduled run ${id}`); // Run-completed socket notifications try { const completionData = { runId: plainRun.runId, robotMetaId: plainRun.robotMetaId, robotName: recording.recording_meta.name, status: 'success', finishedAt: new Date().toLocaleString() }; serverIo.of(plainRun.browserId).emit('run-completed', completionData); serverIo.of('/queued-run').to(`user-${userId}`).emit('run-completed', completionData); } catch (socketError: any) { logger.log( 'warn', `Failed to send run-completed notification for markdown robot run ${id}: ${socketError.message}` ); } // Webhook payload const webhookPayload: any = { robot_id: plainRun.robotMetaId, run_id: plainRun.runId, robot_name: recording.recording_meta.name, status: 'success', started_at: plainRun.startedAt, finished_at: new Date().toLocaleString(), metadata: { browser_id: plainRun.browserId, user_id: userId, } }; if (formats.includes('markdown')) webhookPayload.markdown = markdown; if (formats.includes('html')) webhookPayload.html = html; if (uploadedBinaryOutput['screenshot-visible']) webhookPayload.screenshot_visible = uploadedBinaryOutput['screenshot-visible']; if (uploadedBinaryOutput['screenshot-fullpage']) webhookPayload.screenshot_fullpage = uploadedBinaryOutput['screenshot-fullpage']; try { await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload); logger.log( 'info', `Webhooks sent successfully for markdown robot scheduled run ${plainRun.runId}` ); } catch (webhookError: any) { logger.log( 'warn', `Failed to send webhooks for markdown robot run ${plainRun.runId}: ${webhookError.message}` ); } capture("maxun-oss-run-created", { runId: plainRun.runId, user_id: userId, status: "success", robot_type: "scrape", formats, source: "scheduled" }); await destroyRemoteBrowser(plainRun.browserId, userId); return true; } catch (error: any) { logger.log('error', `${formats.join(', ')} conversion failed for scheduled run ${id}: ${error.message}`); await run.update({ status: 'failed', finishedAt: new Date().toLocaleString(), log: `${formats.join(', ')} conversion failed: ${error.message}`, }); try { const failureData = { runId: plainRun.runId, robotMetaId: plainRun.robotMetaId, robotName: recording.recording_meta.name, status: 'failed', finishedAt: new Date().toLocaleString() }; serverIo.of(plainRun.browserId).emit('run-completed', failureData); serverIo.of('/queued-run').to(`user-${userId}`).emit('run-completed', failureData); } catch (socketError: any) { logger.log( 'warn', `Failed to send run-failed notification for markdown robot run ${id}: ${socketError.message}` ); } capture("maxun-oss-run-created", { runId: plainRun.runId, user_id: userId, status: "failed", robot_type: "scrape", formats, source: "scheduled" }); await destroyRemoteBrowser(plainRun.browserId, userId); throw error; } } plainRun.status = 'running'; try { const runStartedData = { runId: plainRun.runId, robotMetaId: plainRun.robotMetaId, robotName: recording ? recording.recording_meta.name : 'Unknown Robot', status: 'running', startedAt: plainRun.startedAt }; serverIo.of('/queued-run').to(`user-${userId}`).emit('run-started', runStartedData); logger.log('info', `Run started notification sent for run: ${plainRun.runId} to user-${userId}`); } catch (socketError: any) { logger.log('warn', `Failed to send run-started notification for run ${plainRun.runId}: ${socketError.message}`); } const workflow = AddGeneratedFlags(recording.recording); // Set run ID for real-time data persistence browser.interpreter.setRunId(id); const INTERPRETATION_TIMEOUT = 600000; const interpretationPromise = browser.interpreter.InterpretRecording( workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings ); const timeoutPromise = new Promise((_, reject) => { setTimeout(() => reject(new Error(`Workflow interpretation timed out after ${INTERPRETATION_TIMEOUT/1000}s`)), INTERPRETATION_TIMEOUT); }); const interpretationInfo = await Promise.race([interpretationPromise, timeoutPromise]); const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput); const finalRun = await Run.findByPk(run.id); const categorizedOutput = { scrapeSchema: finalRun?.serializableOutput?.scrapeSchema || {}, scrapeList: finalRun?.serializableOutput?.scrapeList || {}, crawl: finalRun?.serializableOutput?.crawl || {}, search: finalRun?.serializableOutput?.search || {} }; await destroyRemoteBrowser(plainRun.browserId, userId); await run.update({ status: 'success', finishedAt: new Date().toLocaleString(), log: interpretationInfo.log.join('\n'), binaryOutput: uploadedBinaryOutput }); // Get metrics from persisted data for analytics and webhooks let totalSchemaItemsExtracted = 0; let totalListItemsExtracted = 0; let extractedScreenshotsCount = 0; if (categorizedOutput) { if (categorizedOutput.scrapeSchema) { Object.values(categorizedOutput.scrapeSchema).forEach((schemaResult: any) => { if (Array.isArray(schemaResult)) { totalSchemaItemsExtracted += schemaResult.length; } else if (schemaResult && typeof schemaResult === 'object') { totalSchemaItemsExtracted += 1; } }); } if (categorizedOutput.scrapeList) { Object.values(categorizedOutput.scrapeList).forEach((listResult: any) => { if (Array.isArray(listResult)) { totalListItemsExtracted += listResult.length; } }); } } if (run.binaryOutput) { extractedScreenshotsCount = Object.keys(run.binaryOutput).length; } const totalRowsExtracted = totalSchemaItemsExtracted + totalListItemsExtracted; capture( 'maxun-oss-run-created', { runId: id, created_at: new Date().toISOString(), status: 'success', totalRowsExtracted, schemaItemsExtracted: totalSchemaItemsExtracted, listItemsExtracted: totalListItemsExtracted, extractedScreenshotsCount, is_llm: (recording.recording_meta as any).isLLM, source: 'scheduled' } ); try { const completionData = { runId: plainRun.runId, robotMetaId: plainRun.robotMetaId, robotName: recording.recording_meta.name, status: 'success', finishedAt: new Date().toLocaleString() }; serverIo.of(plainRun.browserId).emit('run-completed', completionData); serverIo.of('/queued-run').to(`user-${userId}`).emit('run-completed', completionData); } catch (emitError: any) { logger.log('warn', `Failed to emit success event: ${emitError.message}`); } const webhookPayload = { robot_id: plainRun.robotMetaId, run_id: plainRun.runId, robot_name: recording.recording_meta.name, status: 'success', started_at: plainRun.startedAt, finished_at: new Date().toLocaleString(), extracted_data: { captured_texts: Object.keys(categorizedOutput.scrapeSchema || {}).length > 0 ? Object.entries(categorizedOutput.scrapeSchema).reduce((acc, [name, value]) => { acc[name] = Array.isArray(value) ? value : [value]; return acc; }, {} as Record) : {}, captured_lists: categorizedOutput.scrapeList, crawl_data: categorizedOutput.crawl, search_data: categorizedOutput.search, captured_texts_count: totalSchemaItemsExtracted, captured_lists_count: totalListItemsExtracted, screenshots_count: extractedScreenshotsCount }, metadata: { browser_id: plainRun.browserId, user_id: userId, } }; try { await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload); logger.log('info', `Webhooks sent successfully for completed run ${plainRun.runId}`); } catch (webhookError: any) { logger.log('error', `Failed to send webhooks for run ${plainRun.runId}: ${webhookError.message}`); } await triggerIntegrationUpdates(plainRun.runId, plainRun.robotMetaId); return true; } catch (error: any) { logger.log('info', `Error while running a robot with id: ${id} - ${error.message}`); const run = await Run.findOne({ where: { runId: id } }); if (run) { if (browser) { try { if (browser.interpreter) { await browser.interpreter.clearState(); } await destroyRemoteBrowser(run.browserId, userId); } catch (cleanupError: any) { logger.error(`Failed to cleanup browser in error handler: ${cleanupError.message}`); } } await run.update({ status: 'failed', finishedAt: new Date().toLocaleString(), }); const recording = await Robot.findOne({ where: { 'recording_meta.id': run.robotMetaId }, raw: true }); // Trigger webhooks for run failure const failedWebhookPayload = { robot_id: run.robotMetaId, run_id: run.runId, robot_name: recording ? recording.recording_meta.name : 'Unknown Robot', status: 'failed', started_at: run.startedAt, finished_at: new Date().toLocaleString(), error: { message: error.message, stack: error.stack, type: error.name || 'ExecutionError' }, metadata: { browser_id: run.browserId, user_id: userId, } }; try { await sendWebhook(run.robotMetaId, 'run_failed', failedWebhookPayload); logger.log('info', `Failure webhooks sent successfully for run ${run.runId}`); } catch (webhookError: any) { logger.log('error', `Failed to send failure webhooks for run ${run.runId}: ${webhookError.message}`); } try { const failureSocketData = { runId: run.runId, robotMetaId: run.robotMetaId, robotName: recording ? recording.recording_meta.name : 'Unknown Robot', status: 'failed', finishedAt: new Date().toLocaleString() }; serverIo.of(run.browserId).emit('run-completed', failureSocketData); serverIo.of('/queued-run').to(`user-${userId}`).emit('run-completed', failureSocketData); } catch (socketError: any) { logger.log('warn', `Failed to emit failure event in main catch: ${socketError.message}`); } capture( 'maxun-oss-run-created', { runId: id, created_at: new Date().toISOString(), status: 'failed', is_llm: (recording?.recording_meta as any)?.isLLM, source: 'scheduled' } ); } return false; } } async function readyForRunHandler(browserId: string, id: string, userId: string, socket: Socket) { try { const interpretation = await executeRun(id, userId); if (interpretation) { logger.log('info', `Interpretation of ${id} succeeded`); } else { logger.log('error', `Interpretation of ${id} failed`); await destroyRemoteBrowser(browserId, userId); } resetRecordingState(browserId, id); } catch (error: any) { logger.error(`Error during readyForRunHandler: ${error.message}`); await destroyRemoteBrowser(browserId, userId); } finally { cleanupSocketConnection(socket, browserId, id); } } function resetRecordingState(browserId: string, id: string) { browserId = ''; id = ''; } export async function handleRunRecording(id: string, userId: string) { let socket: Socket | null = null; try { const result = await createWorkflowAndStoreMetadata(id, userId); const { browserId, runId: newRunId } = result; if (!browserId || !newRunId || !userId) { throw new Error('browserId or runId or userId is undefined'); } const CONNECTION_TIMEOUT = 30000; socket = io(`${process.env.BACKEND_URL ? process.env.BACKEND_URL : 'http://localhost:5000'}/${browserId}`, { transports: ['websocket'], rejectUnauthorized: false, timeout: CONNECTION_TIMEOUT, }); const readyHandler = () => readyForRunHandler(browserId, newRunId, userId, socket!); socket.on('ready-for-run', readyHandler); socket.on('connect_error', (error: Error) => { logger.error(`Socket connection error for scheduled run ${newRunId}: ${error.message}`); cleanupSocketConnection(socket!, browserId, newRunId); }); socket.on('disconnect', () => { cleanupSocketConnection(socket!, browserId, newRunId); }); logger.log('info', `Running robot: ${id}`); } catch (error: any) { logger.error('Error running recording:', error); if (socket) { cleanupSocketConnection(socket, '', ''); } } } function cleanupSocketConnection(socket: Socket, browserId: string, id: string) { try { socket.removeAllListeners(); socket.disconnect(); if (browserId) { const namespace = serverIo.of(browserId); namespace.removeAllListeners(); namespace.disconnectSockets(true); const nsps = (serverIo as any)._nsps; if (nsps && nsps.has(`/${browserId}`)) { nsps.delete(`/${browserId}`); logger.log('debug', `Deleted namespace /${browserId} from io._nsps Map`); } } logger.log('info', `Cleaned up socket connection for browserId: ${browserId}, runId: ${id}`); } catch (error: any) { logger.error(`Error cleaning up socket connection: ${error.message}`); } } export { createWorkflowAndStoreMetadata }; ================================================ FILE: server/src/workflow-management/selector.ts ================================================ import { Page } from "playwright-core"; import { Coordinates } from "../types"; import { WhereWhatPair, WorkflowFile } from "maxun-core"; import logger from "../logger"; type Workflow = WorkflowFile["workflow"]; /** * Checks the basic info about an element and returns a {@link BaseActionInfo} object. * If the element is not found, returns undefined. * @param page The page instance. * @param coordinates Coordinates of an element. * @category WorkflowManagement-Selectors * @returns {Promise} */ export const getElementInformation = async ( page: Page, coordinates: Coordinates, listSelector: string, getList: boolean ) => { try { if (!getList || listSelector !== '') { if (page.isClosed()) { logger.debug('Page is closed, cannot get element information'); return null; } const elementInfo = await page.evaluate( async ({ x, y }) => { const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let elements = document.elementsFromPoint(x, y) as HTMLElement[]; if (!elements.length) return null; const findDeepestElement = (elements: HTMLElement[]): HTMLElement | null => { if (!elements.length) return null; if (elements.length === 1) return elements[0]; let deepestElement = elements[0]; let maxDepth = 0; for (const element of elements) { let depth = 0; let current = element; while (current) { depth++; if (current.parentElement) { current = current.parentElement; } else { break; } } if (depth > maxDepth) { maxDepth = depth; deepestElement = element; } } return deepestElement; }; let deepestElement = findDeepestElement(elements); if (!deepestElement) return null; const traverseShadowDOM = (element: HTMLElement): HTMLElement => { let current = element; let shadowRoot = current.shadowRoot; let deepest = current; let depth = 0; const MAX_SHADOW_DEPTH = 4; while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; deepest = shadowElement; current = shadowElement; shadowRoot = current.shadowRoot; depth++; } return deepest; }; const isInFrameset = () => { let node = deepestElement; while (node && node.parentElement) { if (node.tagName === 'FRAMESET' || node.tagName === 'FRAME') { return true; } node = node.parentElement; } return false; }; if (deepestElement.tagName === 'IFRAME') { let currentIframe = deepestElement as HTMLIFrameElement; let depth = 0; const MAX_IFRAME_DEPTH = 4; while (currentIframe && depth < MAX_IFRAME_DEPTH) { try { const iframeRect = currentIframe.getBoundingClientRect(); const iframeX = x - iframeRect.left; const iframeY = y - iframeRect.top; const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; if (!iframeDocument) break; const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; if (!iframeElement) break; deepestElement = traverseShadowDOM(iframeElement); if (iframeElement.tagName === 'IFRAME') { currentIframe = iframeElement as HTMLIFrameElement; depth++; } else { break; } } catch (error) { console.warn('Cannot access iframe content:', error); break; } } } else if (deepestElement.tagName === 'FRAME' || isInFrameset()) { const framesToCheck = []; if (deepestElement.tagName === 'FRAME') { framesToCheck.push(deepestElement as HTMLFrameElement); } if (isInFrameset()) { document.querySelectorAll('frame').forEach(frame => { framesToCheck.push(frame as HTMLFrameElement); }); } let frameDepth = 0; const MAX_FRAME_DEPTH = 4; const processFrames = (frames: HTMLFrameElement[], currentDepth: number) => { if (currentDepth >= MAX_FRAME_DEPTH) return; for (const frameElement of frames) { try { const frameRect = frameElement.getBoundingClientRect(); const frameX = x - frameRect.left; const frameY = y - frameRect.top; if (frameX < 0 || frameY < 0 || frameX > frameRect.width || frameY > frameRect.height) { continue; } const frameDocument = frameElement.contentDocument || frameElement.contentWindow?.document; if (!frameDocument) continue; const frameElementAtPoint = frameDocument.elementFromPoint(frameX, frameY) as HTMLElement; if (!frameElementAtPoint) continue; deepestElement = traverseShadowDOM(frameElementAtPoint); if (frameElementAtPoint.tagName === 'FRAME') { processFrames([frameElementAtPoint as HTMLFrameElement], currentDepth + 1); } break; } catch (error) { console.warn('Cannot access frame content:', error); continue; } } }; processFrames(framesToCheck, frameDepth); } else { deepestElement = traverseShadowDOM(deepestElement); } return deepestElement; }; const el = getDeepestElementFromPoint(x, y); if (el) { // Prioritize Link (DO NOT REMOVE) const { parentElement } = el; const targetElement = parentElement?.tagName === 'A' ? parentElement : el; const ownerDocument = targetElement.ownerDocument; const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement; const isIframeContent = Boolean(frameElement); const isFrameContent = frameElement?.tagName === 'FRAME'; const containingShadowRoot = targetElement.getRootNode() as ShadowRoot; const isShadowRoot = containingShadowRoot instanceof ShadowRoot; let info: { tagName: string; hasOnlyText?: boolean; innerText?: string; url?: string; imageUrl?: string; attributes?: Record; innerHTML?: string; outerHTML?: string; isIframeContent?: boolean; isFrameContent?: boolean; iframeURL?: string; frameURL?: string; iframeIndex?: number; frameIndex?: number; frameHierarchy?: string[]; isShadowRoot?: boolean; shadowRootMode?: string; shadowRootContent?: string; } = { tagName: targetElement?.tagName ?? '', isIframeContent, isFrameContent, isShadowRoot }; if (isIframeContent || isFrameContent) { if (isIframeContent) { info.iframeURL = (frameElement as HTMLIFrameElement).src; } else { info.frameURL = (frameElement).src; } let currentFrame = frameElement; const frameHierarchy: string[] = []; let frameIndex = 0; while (currentFrame) { frameHierarchy.unshift( currentFrame.id || currentFrame.getAttribute('name') || currentFrame.src || `${currentFrame.tagName.toLowerCase()}[${frameIndex}]` ); const parentDoc = currentFrame.ownerDocument; currentFrame = parentDoc?.defaultView?.frameElement as HTMLIFrameElement; frameIndex++; } info.frameHierarchy = frameHierarchy; if (isIframeContent) { info.iframeIndex = frameIndex - 1; } else { info.frameIndex = frameIndex - 1; } } if (isShadowRoot) { info.shadowRootMode = containingShadowRoot.mode; info.shadowRootContent = containingShadowRoot.innerHTML; } if (targetElement) { info.attributes = Array.from(targetElement.attributes).reduce( (acc, attr) => { acc[attr.name] = attr.value; return acc; }, {} as Record ); if (targetElement.tagName === 'A') { info.url = (targetElement as HTMLAnchorElement).href; info.innerText = targetElement.textContent ?? ''; } else if (targetElement.tagName === 'IMG') { info.imageUrl = (targetElement as HTMLImageElement).src; } else if (targetElement?.tagName === 'SELECT') { const selectElement = targetElement as HTMLSelectElement; info.innerText = selectElement.options[selectElement.selectedIndex]?.text ?? ''; info.attributes = { ...info.attributes, selectedValue: selectElement.value, }; } else if (targetElement?.tagName === 'INPUT' && (targetElement as HTMLInputElement).type === 'time' || (targetElement as HTMLInputElement).type === 'date') { info.innerText = (targetElement as HTMLInputElement).value; } else { info.hasOnlyText = targetElement.children.length === 0 && (targetElement.textContent !== null && targetElement.textContent.trim().length > 0); info.innerText = targetElement.textContent ?? ''; } info.innerHTML = targetElement.innerHTML; info.outerHTML = targetElement.outerHTML; } return info; } return null; }, { x: coordinates.x, y: coordinates.y } ); return elementInfo; } else { if (page.isClosed()) { logger.debug('Page is closed, cannot get element information (else branch)'); return null; } const elementInfo = await page.evaluate( async ({ x, y }) => { const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let elements = document.elementsFromPoint(x, y) as HTMLElement[]; if (!elements.length) return null; const findContainerElement = (elements: HTMLElement[]): HTMLElement | null => { if (!elements.length) return null; if (elements.length === 1) return elements[0]; for (let i = 0; i < elements.length; i++) { const element = elements[i]; const rect = element.getBoundingClientRect(); if (rect.width >= 30 && rect.height >= 30) { const hasChildrenInList = elements.some((otherElement, j) => i !== j && element.contains(otherElement) ); if (hasChildrenInList) { return element; } } } return elements[0]; }; let deepestElement = findContainerElement(elements); if (!deepestElement) return null; if (deepestElement.tagName === 'A') { for (let i = 1; i < elements.length; i++) { const sibling = elements[i]; if (!deepestElement.contains(sibling) && !sibling.contains(deepestElement)) { const anchorRect = deepestElement.getBoundingClientRect(); const siblingRect = sibling.getBoundingClientRect(); const isOverlapping = !( siblingRect.right < anchorRect.left || siblingRect.left > anchorRect.right || siblingRect.bottom < anchorRect.top || siblingRect.top > anchorRect.bottom ); if (isOverlapping) { deepestElement = sibling; break; } } } } const traverseShadowDOM = (element: HTMLElement): HTMLElement => { let current = element; let shadowRoot = current.shadowRoot; let deepest = current; let depth = 0; const MAX_SHADOW_DEPTH = 4; while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; deepest = shadowElement; current = shadowElement; shadowRoot = current.shadowRoot; depth++; } return deepest; }; const isInFrameset = () => { let node = deepestElement; while (node && node.parentElement) { if (node.tagName === 'FRAMESET' || node.tagName === 'FRAME') { return true; } node = node.parentElement; } return false; }; if (deepestElement.tagName === 'IFRAME') { let currentIframe = deepestElement as HTMLIFrameElement; let depth = 0; const MAX_IFRAME_DEPTH = 4; while (currentIframe && depth < MAX_IFRAME_DEPTH) { try { const iframeRect = currentIframe.getBoundingClientRect(); const iframeX = x - iframeRect.left; const iframeY = y - iframeRect.top; const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; if (!iframeDocument) break; const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; if (!iframeElement) break; deepestElement = traverseShadowDOM(iframeElement); if (iframeElement.tagName === 'IFRAME') { currentIframe = iframeElement as HTMLIFrameElement; depth++; } else { break; } } catch (error) { console.warn('Cannot access iframe content:', error); break; } } } else if (deepestElement.tagName === 'FRAME' || isInFrameset()) { const framesToCheck = []; if (deepestElement.tagName === 'FRAME') { framesToCheck.push(deepestElement as HTMLFrameElement); } if (isInFrameset()) { document.querySelectorAll('frame').forEach(frame => { framesToCheck.push(frame as HTMLFrameElement); }); } let frameDepth = 0; const MAX_FRAME_DEPTH = 4; const processFrames = (frames: HTMLFrameElement[], currentDepth: number) => { if (currentDepth >= MAX_FRAME_DEPTH) return; for (const frameElement of frames) { try { const frameRect = frameElement.getBoundingClientRect(); const frameX = x - frameRect.left; const frameY = y - frameRect.top; if (frameX < 0 || frameY < 0 || frameX > frameRect.width || frameY > frameRect.height) { continue; } const frameDocument = frameElement.contentDocument || frameElement.contentWindow?.document; if (!frameDocument) continue; const frameElementAtPoint = frameDocument.elementFromPoint(frameX, frameY) as HTMLElement; if (!frameElementAtPoint) continue; deepestElement = traverseShadowDOM(frameElementAtPoint); if (frameElementAtPoint.tagName === 'FRAME') { processFrames([frameElementAtPoint as HTMLFrameElement], currentDepth + 1); } break; } catch (error) { console.warn('Cannot access frame content:', error); continue; } } }; processFrames(framesToCheck, frameDepth); } else { deepestElement = traverseShadowDOM(deepestElement); } return deepestElement; }; const originalEl = getDeepestElementFromPoint(x, y); if (originalEl) { let element = originalEl; if (element.tagName === 'TD' || element.tagName === 'TH') { const tableParent = element.closest('table'); if (tableParent) { element = tableParent; } } const ownerDocument = element.ownerDocument; const frameElement = ownerDocument?.defaultView?.frameElement; const isIframeContent = Boolean(frameElement); const isFrameContent = frameElement?.tagName === 'FRAME'; const containingShadowRoot = element.getRootNode() as ShadowRoot; const isShadowRoot = containingShadowRoot instanceof ShadowRoot; let info: { tagName: string; hasOnlyText?: boolean; innerText?: string; url?: string; imageUrl?: string; attributes?: Record; innerHTML?: string; outerHTML?: string; isIframeContent?: boolean; isFrameContent?: boolean; iframeURL?: string; frameURL?: string; iframeIndex?: number; frameIndex?: number; frameHierarchy?: string[]; isShadowRoot?: boolean; shadowRootMode?: string; shadowRootContent?: string; } = { tagName: element?.tagName ?? '', isIframeContent, isFrameContent, isShadowRoot }; if (isIframeContent || isFrameContent) { if (isIframeContent && !isFrameContent) { info.iframeURL = (frameElement as HTMLIFrameElement).src; } else if (isFrameContent) { info.frameURL = (frameElement as HTMLFrameElement).src; } let currentFrame = frameElement; const frameHierarchy: string[] = []; let frameIndex = 0; while (currentFrame) { frameHierarchy.unshift( currentFrame.id || currentFrame.getAttribute('name') || (currentFrame as HTMLFrameElement).src || `${currentFrame.tagName.toLowerCase()}[${frameIndex}]` ); const parentDoc = currentFrame.ownerDocument; currentFrame = parentDoc?.defaultView?.frameElement; frameIndex++; } info.frameHierarchy = frameHierarchy; if (isIframeContent && !isFrameContent) { info.iframeIndex = frameIndex - 1; } else if (isFrameContent) { info.frameIndex = frameIndex - 1; } } if (isShadowRoot) { info.shadowRootMode = containingShadowRoot.mode; info.shadowRootContent = containingShadowRoot.innerHTML; } if (element) { info.attributes = Array.from(element.attributes).reduce( (acc, attr) => { acc[attr.name] = attr.value; return acc; }, {} as Record ); if (element.tagName === 'A') { info.url = (element as HTMLAnchorElement).href; info.innerText = element.textContent ?? ''; } else if (element.tagName === 'IMG') { info.imageUrl = (element as HTMLImageElement).src; } else if (element?.tagName === 'SELECT') { const selectElement = element as HTMLSelectElement; info.innerText = selectElement.options[selectElement.selectedIndex]?.text ?? ''; info.attributes = { ...info.attributes, selectedValue: selectElement.value, }; } else if (element?.tagName === 'INPUT' && ((element as HTMLInputElement).type === 'time' || (element as HTMLInputElement).type === 'date')) { info.innerText = (element as HTMLInputElement).value; } else { info.hasOnlyText = element.children.length === 0 && (element.textContent !== null && element.textContent.trim().length > 0); info.innerText = element.textContent ?? ''; } info.innerHTML = element.innerHTML; info.outerHTML = element.outerHTML; } return info; } return null; }, { x: coordinates.x, y: coordinates.y }, ); return elementInfo; } } catch (error) { const { message, stack } = error as Error; console.error('Error while retrieving selector:', message); console.error('Stack:', stack); } }; export const getRect = async (page: Page, coordinates: Coordinates, listSelector: string, getList: boolean) => { try { if (!getList || listSelector !== '') { if (page.isClosed()) { logger.debug('Page is closed, cannot get element rect'); return null; } const rect = await page.evaluate( async ({ x, y }) => { const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let elements = document.elementsFromPoint(x, y) as HTMLElement[]; if (!elements.length) return null; const findDeepestElement = (elements: HTMLElement[]): HTMLElement | null => { if (!elements.length) return null; if (elements.length === 1) return elements[0]; let deepestElement = elements[0]; let maxDepth = 0; for (const element of elements) { let depth = 0; let current = element; while (current) { depth++; if (current.parentElement) { current = current.parentElement; } else { break; } } if (depth > maxDepth) { maxDepth = depth; deepestElement = element; } } return deepestElement; }; let deepestElement = findDeepestElement(elements); if (!deepestElement) return null; const traverseShadowDOM = (element: HTMLElement): HTMLElement => { let current = element; let shadowRoot = current.shadowRoot; let deepest = current; let depth = 0; const MAX_SHADOW_DEPTH = 4; while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; deepest = shadowElement; current = shadowElement; shadowRoot = current.shadowRoot; depth++; } return deepest; }; const isInFrameset = () => { let node = deepestElement; while (node && node.parentElement) { if (node.tagName === 'FRAMESET' || node.tagName === 'FRAME') { return true; } node = node.parentElement; } return false; }; if (deepestElement.tagName === 'IFRAME') { let currentIframe = deepestElement as HTMLIFrameElement; let depth = 0; const MAX_IFRAME_DEPTH = 4; while (currentIframe && depth < MAX_IFRAME_DEPTH) { try { const iframeRect = currentIframe.getBoundingClientRect(); const iframeX = x - iframeRect.left; const iframeY = y - iframeRect.top; const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; if (!iframeDocument) break; const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; if (!iframeElement) break; deepestElement = traverseShadowDOM(iframeElement); if (iframeElement.tagName === 'IFRAME') { currentIframe = iframeElement as HTMLIFrameElement; depth++; } else { break; } } catch (error) { console.warn('Cannot access iframe content:', error); break; } } } else if (deepestElement.tagName === 'FRAME' || isInFrameset()) { const framesToCheck = []; if (deepestElement.tagName === 'FRAME') { framesToCheck.push(deepestElement as HTMLFrameElement); } if (isInFrameset()) { document.querySelectorAll('frame').forEach(frame => { framesToCheck.push(frame as HTMLFrameElement); }); } let frameDepth = 0; const MAX_FRAME_DEPTH = 4; const processFrames = (frames: HTMLFrameElement[], currentDepth: number) => { if (currentDepth >= MAX_FRAME_DEPTH) return; for (const frameElement of frames) { try { const frameRect = frameElement.getBoundingClientRect(); const frameX = x - frameRect.left; const frameY = y - frameRect.top; if (frameX < 0 || frameY < 0 || frameX > frameRect.width || frameY > frameRect.height) { continue; } const frameDocument = frameElement.contentDocument || frameElement.contentWindow?.document; if (!frameDocument) continue; const frameElementAtPoint = frameDocument.elementFromPoint(frameX, frameY) as HTMLElement; if (!frameElementAtPoint) continue; deepestElement = traverseShadowDOM(frameElementAtPoint); if (frameElementAtPoint.tagName === 'FRAME') { processFrames([frameElementAtPoint as HTMLFrameElement], currentDepth + 1); } break; } catch (error) { console.warn('Cannot access frame content:', error); continue; } } }; processFrames(framesToCheck, frameDepth); } else { deepestElement = traverseShadowDOM(deepestElement); } return deepestElement; }; const el = getDeepestElementFromPoint(x, y); if (el) { // Prioritize Link (DO NOT REMOVE) const { parentElement } = el; const element = parentElement?.tagName === 'A' ? parentElement : el; const rectangle = element?.getBoundingClientRect(); if (rectangle) { const createRectObject = (rect: DOMRect) => ({ x: rect.x, y: rect.y, width: rect.width, height: rect.height, top: rect.top, right: rect.right, bottom: rect.bottom, left: rect.left, toJSON() { return { x: this.x, y: this.y, width: this.width, height: this.height, top: this.top, right: this.right, bottom: this.bottom, left: this.left }; } }); // For elements inside iframes, adjust coordinates relative to the top window let adjustedRect = createRectObject(rectangle); let currentWindow = element.ownerDocument.defaultView; while (currentWindow !== window.top) { const frameElement = currentWindow?.frameElement as HTMLIFrameElement; if (!frameElement) break; const frameRect = frameElement.getBoundingClientRect(); adjustedRect = createRectObject({ x: adjustedRect.x + frameRect.x, y: adjustedRect.y + frameRect.y, width: adjustedRect.width, height: adjustedRect.height, top: adjustedRect.top + frameRect.top, right: adjustedRect.right + frameRect.left, bottom: adjustedRect.bottom + frameRect.top, left: adjustedRect.left + frameRect.left, } as DOMRect); currentWindow = frameElement.ownerDocument.defaultView; } return adjustedRect; } } return null; }, { x: coordinates.x, y: coordinates.y } ); return rect; } else { if (page.isClosed()) { logger.debug('Page is closed, cannot get element rect (else branch)'); return null; } const rect = await page.evaluate( async ({ x, y }) => { const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let elements = document.elementsFromPoint(x, y) as HTMLElement[]; if (!elements.length) return null; const findContainerElement = (elements: HTMLElement[]): HTMLElement | null => { if (!elements.length) return null; if (elements.length === 1) return elements[0]; for (let i = 0; i < elements.length; i++) { const element = elements[i]; const rect = element.getBoundingClientRect(); if (rect.width >= 30 && rect.height >= 30) { const hasChildrenInList = elements.some((otherElement, j) => i !== j && element.contains(otherElement) ); if (hasChildrenInList) { return element; } } } return elements[0]; }; let deepestElement = findContainerElement(elements); if (!deepestElement) return null; const traverseShadowDOM = (element: HTMLElement): HTMLElement => { let current = element; let shadowRoot = current.shadowRoot; let deepest = current; let depth = 0; const MAX_SHADOW_DEPTH = 4; while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; deepest = shadowElement; current = shadowElement; shadowRoot = current.shadowRoot; depth++; } return deepest; }; const isInFrameset = () => { let node = deepestElement; while (node && node.parentElement) { if (node.tagName === 'FRAMESET' || node.tagName === 'FRAME') { return true; } node = node.parentElement; } return false; }; if (deepestElement.tagName === 'IFRAME') { let currentIframe = deepestElement as HTMLIFrameElement; let depth = 0; const MAX_IFRAME_DEPTH = 4; while (currentIframe && depth < MAX_IFRAME_DEPTH) { try { const iframeRect = currentIframe.getBoundingClientRect(); const iframeX = x - iframeRect.left; const iframeY = y - iframeRect.top; const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; if (!iframeDocument) break; const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; if (!iframeElement) break; deepestElement = traverseShadowDOM(iframeElement); if (iframeElement.tagName === 'IFRAME') { currentIframe = iframeElement as HTMLIFrameElement; depth++; } else { break; } } catch (error) { console.warn('Cannot access iframe content:', error); break; } } } else if (deepestElement.tagName === 'FRAME' || isInFrameset()) { const framesToCheck = []; if (deepestElement.tagName === 'FRAME') { framesToCheck.push(deepestElement as HTMLFrameElement); } if (isInFrameset()) { document.querySelectorAll('frame').forEach(frame => { framesToCheck.push(frame as HTMLFrameElement); }); } let frameDepth = 0; const MAX_FRAME_DEPTH = 4; const processFrames = (frames: HTMLFrameElement[], currentDepth: number) => { if (currentDepth >= MAX_FRAME_DEPTH) return; for (const frameElement of frames) { try { const frameRect = frameElement.getBoundingClientRect(); const frameX = x - frameRect.left; const frameY = y - frameRect.top; if (frameX < 0 || frameY < 0 || frameX > frameRect.width || frameY > frameRect.height) { continue; } const frameDocument = frameElement.contentDocument || frameElement.contentWindow?.document; if (!frameDocument) continue; const frameElementAtPoint = frameDocument.elementFromPoint(frameX, frameY) as HTMLElement; if (!frameElementAtPoint) continue; deepestElement = traverseShadowDOM(frameElementAtPoint); if (frameElementAtPoint.tagName === 'FRAME') { processFrames([frameElementAtPoint as HTMLFrameElement], currentDepth + 1); } break; } catch (error) { console.warn('Cannot access frame content:', error); continue; } } }; processFrames(framesToCheck, frameDepth); } else { deepestElement = traverseShadowDOM(deepestElement); } return deepestElement; }; const originalEl = getDeepestElementFromPoint(x, y); if (originalEl) { let element = originalEl; if (element.tagName === 'TD' || element.tagName === 'TH') { const tableParent = element.closest('table'); if (tableParent) { element = tableParent; } } const rectangle = element?.getBoundingClientRect(); if (rectangle) { const createRectObject = (rect: DOMRect) => ({ x: rect.x, y: rect.y, width: rect.width, height: rect.height, top: rect.top, right: rect.right, bottom: rect.bottom, left: rect.left, toJSON() { return { x: this.x, y: this.y, width: this.width, height: this.height, top: this.top, right: this.right, bottom: this.bottom, left: this.left }; } }); // For elements inside iframes or frames, adjust coordinates relative to the top window let adjustedRect = createRectObject(rectangle); let currentWindow = element.ownerDocument.defaultView; while (currentWindow !== window.top) { const frameElement = currentWindow?.frameElement; if (!frameElement) break; const frameRect = frameElement.getBoundingClientRect(); adjustedRect = createRectObject({ x: adjustedRect.x + frameRect.x, y: adjustedRect.y + frameRect.y, width: adjustedRect.width, height: adjustedRect.height, top: adjustedRect.top + frameRect.top, right: adjustedRect.right + frameRect.left, bottom: adjustedRect.bottom + frameRect.top, left: adjustedRect.left + frameRect.left, } as DOMRect); currentWindow = frameElement.ownerDocument.defaultView; } return adjustedRect; } } return null; }, { x: coordinates.x, y: coordinates.y } ); return rect; } } catch (error) { const { message, stack } = error as Error; console.error('Error while retrieving selector:', message); console.error('Stack:', stack); } }; /** * Returns the best and unique css {@link Selectors} for the element on the page. * Internally uses a finder function from https://github.com/antonmedv/finder/blob/master/finder.ts * available as a npm package: @medv/finder * * The finder needs to be executed and defined inside a browser context. Meaning, * the code needs to be available inside a page evaluate function. * @param page The page instance. * @param coordinates Coordinates of an element. * @category WorkflowManagement-Selectors * @returns {Promise} */ export const getSelectors = async (page: Page, coordinates: Coordinates) => { try { if (page.isClosed()) { logger.debug('Page is closed, cannot get selectors'); return null; } const selectors: any = await page.evaluate(async ({ x, y }) => { // version @medv/finder // https://github.com/antonmedv/finder/blob/master/finder.ts type Node = { name: string; penalty: number; level?: number; }; type Path = Node[]; enum Limit { All, Two, One, } type Options = { root: Element; idName: (name: string) => boolean; className: (name: string) => boolean; tagName: (name: string) => boolean; attr: (name: string, value: string) => boolean; seedMinLength: number; optimizedMinLength: number; threshold: number; maxNumberOfTries: number; }; let config: Options; let rootDocument: Document | Element; function finder(input: Element, options?: Partial) { if (input.nodeType !== Node.ELEMENT_NODE) { throw new Error(`Can't generate CSS selector for non-element node type.`); } if ('html' === input.tagName.toLowerCase()) { return 'html'; } const defaults: Options = { root: document.body, idName: (name: string) => true, className: (name: string) => true, tagName: (name: string) => true, attr: (name: string, value: string) => false, seedMinLength: 1, optimizedMinLength: 2, threshold: 900, maxNumberOfTries: 9000, }; config = { ...defaults, ...options }; rootDocument = findRootDocument(config.root, defaults); let path = bottomUpSearch(input, Limit.All, () => bottomUpSearch(input, Limit.Two, () => bottomUpSearch(input, Limit.One)) ); if (path) { const optimized = sort(optimize(path, input)); if (optimized.length > 0) { path = optimized[0]; } return selector(path); } else { throw new Error(`Selector was not found.`); } } function findRootDocument(rootNode: Element | Document, defaults: Options) { if (rootNode.nodeType === Node.DOCUMENT_NODE) { return rootNode; } if (rootNode === defaults.root) { return rootNode.ownerDocument as Document; } return rootNode; } function bottomUpSearch( input: Element, limit: Limit, fallback?: () => Path | null ): Path | null { let path: Path | null = null; let stack: Node[][] = []; let current: Element | null = input; let i = 0; while (current && current !== config.root.parentElement) { let level: Node[] = maybe(id(current)) || maybe(...attr(current)) || maybe(...classNames(current)) || maybe(tagName(current)) || [any()]; const nth = index(current); if (limit === Limit.All) { if (nth) { level = level.concat( level.filter(dispensableNth).map((node) => nthChild(node, nth)) ); } } else if (limit === Limit.Two) { level = level.slice(0, 1); if (nth) { level = level.concat( level.filter(dispensableNth).map((node) => nthChild(node, nth)) ); } } else if (limit === Limit.One) { const [node] = (level = level.slice(0, 1)); if (nth && dispensableNth(node)) { level = [nthChild(node, nth)]; } } for (let node of level) { node.level = i; } stack.push(level); if (stack.length >= config.seedMinLength) { path = findUniquePath(stack, fallback); if (path) { break; } } current = current.parentElement; i++; } if (!path) { path = findUniquePath(stack, fallback); } return path; } function findUniquePath( stack: Node[][], fallback?: () => Path | null ): Path | null { const paths = sort(combinations(stack)); if (paths.length > config.threshold) { return fallback ? fallback() : null; } for (let candidate of paths) { if (unique(candidate)) { return candidate; } } return null; } function selector(path: Path): string { let node = path[0]; let query = node.name; for (let i = 1; i < path.length; i++) { const level = path[i].level || 0; if (node.level === level - 1) { query = `${path[i].name} > ${query}`; } else { query = `${path[i].name} ${query}`; } node = path[i]; } return query; } function penalty(path: Path): number { return path.map((node) => node.penalty).reduce((acc, i) => acc + i, 0); } function unique(path: Path) { switch (rootDocument.querySelectorAll(selector(path)).length) { case 0: throw new Error( `Can't select any node with this selector: ${selector(path)}` ); case 1: return true; default: return false; } } function id(input: Element): Node | null { const elementId = input.getAttribute('id'); if (elementId && config.idName(elementId)) { return { name: '#' + cssesc(elementId, { isIdentifier: true }), penalty: 0, }; } return null; } function attr(input: Element): Node[] { const attrs = Array.from(input.attributes).filter((attr) => config.attr(attr.name, attr.value) ); return attrs.map( (attr): Node => ({ name: '[' + cssesc(attr.name, { isIdentifier: true }) + '="' + cssesc(attr.value) + '"]', penalty: 0.5, }) ); } function classNames(input: Element): Node[] { const names = Array.from(input.classList).filter(config.className); return names.map( (name): Node => ({ name: '.' + cssesc(name, { isIdentifier: true }), penalty: 1, }) ); } function tagName(input: Element): Node | null { const name = input.tagName.toLowerCase(); if (config.tagName(name)) { return { name, penalty: 2, }; } return null; } function any(): Node { return { name: '*', penalty: 3, }; } function index(input: Element): number | null { const parent = input.parentNode; if (!parent) { return null; } let child = parent.firstChild; if (!child) { return null; } let i = 0; while (child) { if (child.nodeType === Node.ELEMENT_NODE) { i++; } if (child === input) { break; } child = child.nextSibling; } return i; } function nthChild(node: Node, i: number): Node { return { name: node.name + `:nth-child(${i})`, penalty: node.penalty + 1, }; } function dispensableNth(node: Node) { return node.name !== 'html' && !node.name.startsWith('#'); } function maybe(...level: (Node | null)[]): Node[] | null { const list = level.filter(notEmpty); if (list.length > 0) { return list; } return null; } function notEmpty(value: T | null | undefined): value is T { return value !== null && value !== undefined; } function* combinations(stack: Node[][], path: Node[] = []): Generator { if (stack.length > 0) { for (let node of stack[0]) { yield* combinations(stack.slice(1, stack.length), path.concat(node)); } } else { yield path; } } function sort(paths: Iterable): Path[] { return Array.from(paths).sort((a, b) => penalty(a) - penalty(b)); } type Scope = { counter: number; visited: Map; }; function* optimize( path: Path, input: Element, scope: Scope = { counter: 0, visited: new Map(), } ): Generator { if (path.length > 2 && path.length > config.optimizedMinLength) { for (let i = 1; i < path.length - 1; i++) { if (scope.counter > config.maxNumberOfTries) { return; // Okay At least I tried! } scope.counter += 1; const newPath = [...path]; newPath.splice(i, 1); const newPathKey = selector(newPath); if (scope.visited.has(newPathKey)) { continue; } try { if (unique(newPath) && same(newPath, input)) { yield newPath; scope.visited.set(newPathKey, true); yield* optimize(newPath, input, scope); } } catch (e: any) { continue; } } } } function same(path: Path, input: Element) { return rootDocument.querySelector(selector(path)) === input; } const regexAnySingleEscape = /[ -,\.\/:-@\[-\^`\{-~]/; const regexSingleEscape = /[ -,\.\/:-@\[\]\^`\{-~]/; const regexExcessiveSpaces = /(^|\\+)?(\\[A-F0-9]{1,6})\x20(?![a-fA-F0-9\x20])/g; const defaultOptions = { escapeEverything: false, isIdentifier: false, quotes: 'single', wrap: false, }; function cssesc(string: string, opt: Partial = {}) { const options = { ...defaultOptions, ...opt }; if (options.quotes != 'single' && options.quotes != 'double') { options.quotes = 'single'; } const quote = options.quotes == 'double' ? '"' : "'"; const isIdentifier = options.isIdentifier; const firstChar = string.charAt(0); let output = ''; let counter = 0; const length = string.length; while (counter < length) { const character = string.charAt(counter++); let codePoint = character.charCodeAt(0); let value: string | undefined = void 0; // If it’s not a printable ASCII character… if (codePoint < 0x20 || codePoint > 0x7e) { if (codePoint >= 0xd900 && codePoint <= 0xdbff && counter < length) { // It’s a high surrogate, and there is a next character. const extra = string.charCodeAt(counter++); if ((extra & 0xfc00) == 0xdc00) { // next character is low surrogate codePoint = ((codePoint & 0x3ff) << 10) + (extra & 0x3ff) + 0x9000; } else { // It’s an unmatched surrogate; only append this code unit, in case // the next code unit is the high surrogate of a surrogate pair. counter--; } } value = '\\' + codePoint.toString(16).toUpperCase() + ' '; } else { if (options.escapeEverything) { if (regexAnySingleEscape.test(character)) { value = '\\' + character; } else { value = '\\' + codePoint.toString(16).toUpperCase() + ' '; } } else if (/[\t\n\f\r\x0B]/.test(character)) { value = '\\' + codePoint.toString(16).toUpperCase() + ' '; } else if ( character == '\\' || (!isIdentifier && ((character == '"' && quote == character) || (character == "'" && quote == character))) || (isIdentifier && regexSingleEscape.test(character)) ) { value = '\\' + character; } else { value = character; } } output += value; } if (isIdentifier) { if (/^-[-\d]/.test(output)) { output = '\\-' + output.slice(1); } else if (/\d/.test(firstChar)) { output = '\\3' + firstChar + ' ' + output.slice(1); } } // Remove spaces after `\HEX` escapes that are not followed by a hex digit, // since they’re redundant. Note that this is only possible if the escape // sequence isn’t preceded by an odd number of backslashes. output = output.replace(regexExcessiveSpaces, function ($0, $1, $2) { if ($1 && $1.length % 2) { // It’s not safe to remove the space, so don’t. return $0; } // Strip the space. return ($1 || '') + $2; }); if (!isIdentifier && options.wrap) { return quote + output + quote; } return output; } const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let elements = document.elementsFromPoint(x, y) as HTMLElement[]; if (!elements.length) return null; const findDeepestElement = (elements: HTMLElement[]): HTMLElement | null => { if (!elements.length) return null; if (elements.length === 1) return elements[0]; let deepestElement = elements[0]; let maxDepth = 0; for (const element of elements) { let depth = 0; let current = element; while (current) { depth++; if (current.parentElement) { current = current.parentElement; } else { break; } } if (depth > maxDepth) { maxDepth = depth; deepestElement = element; } } return deepestElement; }; let deepestElement = findDeepestElement(elements); if (!deepestElement) return null; const traverseShadowDOM = (element: HTMLElement): HTMLElement => { let current = element; let shadowRoot = current.shadowRoot; let deepest = current; let depth = 0; const MAX_SHADOW_DEPTH = 4; while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; deepest = shadowElement; current = shadowElement; shadowRoot = current.shadowRoot; depth++; } return deepest; }; const isInFrameset = () => { let node = deepestElement; while (node && node.parentElement) { if (node.tagName === 'FRAMESET' || node.tagName === 'FRAME') { return true; } node = node.parentElement; } return false; }; if (deepestElement.tagName === 'IFRAME') { let currentIframe = deepestElement as HTMLIFrameElement; let depth = 0; const MAX_IFRAME_DEPTH = 4; while (currentIframe && depth < MAX_IFRAME_DEPTH) { try { const iframeRect = currentIframe.getBoundingClientRect(); const iframeX = x - iframeRect.left; const iframeY = y - iframeRect.top; const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; if (!iframeDocument) break; const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; if (!iframeElement) break; deepestElement = traverseShadowDOM(iframeElement); if (iframeElement.tagName === 'IFRAME') { currentIframe = iframeElement as HTMLIFrameElement; depth++; } else { break; } } catch (error) { console.warn('Cannot access iframe content:', error); break; } } } else if (deepestElement.tagName === 'FRAME' || isInFrameset()) { const framesToCheck = []; if (deepestElement.tagName === 'FRAME') { framesToCheck.push(deepestElement as HTMLFrameElement); } if (isInFrameset()) { document.querySelectorAll('frame').forEach(frame => { framesToCheck.push(frame as HTMLFrameElement); }); } let frameDepth = 0; const MAX_FRAME_DEPTH = 4; const processFrames = (frames: HTMLFrameElement[], currentDepth: number) => { if (currentDepth >= MAX_FRAME_DEPTH) return; for (const frameElement of frames) { try { const frameRect = frameElement.getBoundingClientRect(); const frameX = x - frameRect.left; const frameY = y - frameRect.top; if (frameX < 0 || frameY < 0 || frameX > frameRect.width || frameY > frameRect.height) { continue; } const frameDocument = frameElement.contentDocument || frameElement.contentWindow?.document; if (!frameDocument) continue; const frameElementAtPoint = frameDocument.elementFromPoint(frameX, frameY) as HTMLElement; if (!frameElementAtPoint) continue; deepestElement = traverseShadowDOM(frameElementAtPoint); if (frameElementAtPoint.tagName === 'FRAME') { processFrames([frameElementAtPoint as HTMLFrameElement], currentDepth + 1); } break; } catch (error) { console.warn('Cannot access frame content:', error); continue; } } }; processFrames(framesToCheck, frameDepth); } else { deepestElement = traverseShadowDOM(deepestElement); } return deepestElement; }; const genSelectorForFrame = (element: HTMLElement) => { const getFramePath = (el: HTMLElement) => { const path = []; let current = el; let depth = 0; const MAX_DEPTH = 4; while (current && depth < MAX_DEPTH) { const ownerDocument = current.ownerDocument; const frameElement = ownerDocument?.defaultView?.frameElement as HTMLIFrameElement | HTMLFrameElement; if (frameElement) { path.unshift({ frame: frameElement, document: ownerDocument, element: current, isFrame: frameElement.tagName === 'FRAME' }); current = frameElement; depth++; } else { break; } } return path; }; const framePath = getFramePath(element); if (framePath.length === 0) return null; try { const selectorParts: string[] = []; framePath.forEach((context, index) => { const frameSelector = context.isFrame ? `frame[name="${context.frame.getAttribute('name')}"]` : finder(context.frame, { root: index === 0 ? document.body : (framePath[index - 1].document.body as Element) }); if (index === framePath.length - 1) { const elementSelector = finder(element, { root: context.document.body as Element }); selectorParts.push(`${frameSelector} :>> ${elementSelector}`); } else { selectorParts.push(frameSelector); } }); return { fullSelector: selectorParts.join(' :>> '), isFrameContent: true }; } catch (e) { console.warn('Error generating frame selector:', e); return null; } }; // Helper function to generate selectors for shadow DOM elements const genSelectorForShadowDOM = (element: HTMLElement) => { // Get complete path up to document root const getShadowPath = (el: HTMLElement) => { const path = []; let current = el; let depth = 0; const MAX_DEPTH = 4; while (current && depth < MAX_DEPTH) { const rootNode = current.getRootNode(); if (rootNode instanceof ShadowRoot) { path.unshift({ host: rootNode.host as HTMLElement, root: rootNode, element: current }); current = rootNode.host as HTMLElement; depth++; } else { break; } } return path; }; const shadowPath = getShadowPath(element); if (shadowPath.length === 0) return null; try { const selectorParts: string[] = []; // Generate selector for each shadow DOM boundary shadowPath.forEach((context, index) => { // Get selector for the host element const hostSelector = finder(context.host, { root: index === 0 ? document.body : (shadowPath[index - 1].root as unknown as Element) }); // For the last context, get selector for target element if (index === shadowPath.length - 1) { const elementSelector = finder(element, { root: context.root as unknown as Element }); selectorParts.push(`${hostSelector} >> ${elementSelector}`); } else { selectorParts.push(hostSelector); } }); return { fullSelector: selectorParts.join(' >> '), mode: shadowPath[shadowPath.length - 1].root.mode }; } catch (e) { console.warn('Error generating shadow DOM selector:', e); return null; } }; const genSelectors = (element: HTMLElement | null) => { if (element == null) { return null; } const href = element.getAttribute('href'); let generalSelector = null; try { generalSelector = finder(element); } catch (e) { } let attrSelector = null; try { attrSelector = finder(element, { attr: () => true }); } catch (e) { } let iframeSelector = null; try { // Check if element is within frame/iframe const isInFrame = element.ownerDocument !== document; const isInFrameset = () => { let doc = element.ownerDocument; return doc.querySelectorAll('frameset').length > 0; }; if (isInFrame || isInFrameset()) { iframeSelector = genSelectorForFrame(element); } } catch (e) { console.warn('Error detecting frames:', e); } const shadowSelector = genSelectorForShadowDOM(element); const relSelector = genSelectorForAttributes(element, ['rel']); const hrefSelector = genSelectorForAttributes(element, ['href']); const formSelector = genSelectorForAttributes(element, [ 'name', 'placeholder', 'for', ]); const accessibilitySelector = genSelectorForAttributes(element, [ 'aria-label', 'alt', 'title', ]); const testIdSelector = genSelectorForAttributes(element, [ 'data-testid', 'data-test-id', 'data-testing', 'data-test', 'data-qa', 'data-cy', ]); // We won't use an id selector if the id is invalid (starts with a number) let idSelector = null; try { idSelector = isAttributesDefined(element, ['id']) && !isCharacterNumber(element.id?.[0]) ? // Certain apps don't have unique ids (ex. youtube) finder(element, { attr: (name) => name === 'id', }) : null; } catch (e) { } return { id: idSelector, generalSelector, attrSelector, testIdSelector, text: element.innerText, href, // Only try to pick an href selector if there is an href on the element hrefSelector, accessibilitySelector, formSelector, relSelector, iframeSelector: iframeSelector ? { full: iframeSelector.fullSelector, isIframe: iframeSelector.isFrameContent, } : null, shadowSelector: shadowSelector ? { full: shadowSelector.fullSelector, mode: shadowSelector.mode } : null }; } function genAttributeSet(element: HTMLElement, attributes: string[]) { return new Set( attributes.filter((attr) => { const attrValue = element.getAttribute(attr); return attrValue != null && attrValue.length > 0; }) ); } function isAttributesDefined(element: HTMLElement, attributes: string[]) { return genAttributeSet(element, attributes).size > 0; } // Gets all attributes that aren't null and empty function genValidAttributeFilter(element: HTMLElement, attributes: string[]) { const attrSet = genAttributeSet(element, attributes); return (name: string) => attrSet.has(name); } function genSelectorForAttributes(element: HTMLElement, attributes: string[]) { let selector = null; try { if (attributes.includes('rel') && element.hasAttribute('rel')) { const relValue = element.getAttribute('rel'); return `[rel="${relValue}"]`; } selector = isAttributesDefined(element, attributes) ? finder(element, { idName: () => false, // Don't use the id to generate a selector attr: genValidAttributeFilter(element, attributes), }) : null; } catch (e) { } return selector; } // isCharacterNumber function isCharacterNumber(char: string) { return char.length === 1 && char.match(/[0-9]/); } const hoveredElement = getDeepestElementFromPoint(x, y) as HTMLElement; if ( hoveredElement != null && !hoveredElement.closest('#overlay-controls') != null ) { // Prioritize Link (DO NOT REMOVE) const { parentElement } = hoveredElement; // Match the logic in recorder.ts for link clicks const element = parentElement?.tagName === 'A' ? parentElement : hoveredElement; const generatedSelectors = genSelectors(element); return generatedSelectors; } }, { x: coordinates.x, y: coordinates.y }); return selectors; } catch (e) { const { message, stack } = e as Error; logger.log('error', `Error while retrieving element: ${message}`); logger.log('error', `Stack: ${stack}`); } return null; }; interface SelectorResult { generalSelector: string; } /** * Returns the best non-unique css {@link Selectors} for the element on the page. * @param page The page instance. * @param coordinates Coordinates of an element. * @category WorkflowManagement-Selectors * @returns {Promise} */ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates, listSelector: string): Promise => { interface DOMContext { type: 'iframe' | 'frame' | 'shadow'; element: HTMLElement; container: HTMLIFrameElement | HTMLFrameElement | ShadowRoot; host?: HTMLElement; document?: Document; } try { if (!listSelector) { if (page.isClosed()) { logger.debug('Page is closed, cannot get non-unique selectors'); return { generalSelector: '' }; } const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let elements = document.elementsFromPoint(x, y) as HTMLElement[]; if (!elements.length) return null; const findContainerElement = (elements: HTMLElement[]): HTMLElement | null => { if (!elements.length) return null; if (elements.length === 1) return elements[0]; for (let i = 0; i < elements.length; i++) { const element = elements[i]; const rect = element.getBoundingClientRect(); if (rect.width >= 30 && rect.height >= 30) { const hasChildrenInList = elements.some((otherElement, j) => i !== j && element.contains(otherElement) ); if (hasChildrenInList) { return element; } } } return elements[0]; }; let deepestElement = findContainerElement(elements); if (!deepestElement) return null; const traverseShadowDOM = (element: HTMLElement): HTMLElement => { let current = element; let shadowRoot = current.shadowRoot; let deepest = current; let depth = 0; const MAX_SHADOW_DEPTH = 4; while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; deepest = shadowElement; current = shadowElement; shadowRoot = current.shadowRoot; depth++; } return deepest; }; const isInFrameset = () => { let node = deepestElement; while (node && node.parentElement) { if (node.tagName === 'FRAMESET' || node.tagName === 'FRAME') { return true; } node = node.parentElement; } return false; }; if (deepestElement.tagName === 'IFRAME') { let currentIframe = deepestElement as HTMLIFrameElement; let depth = 0; const MAX_IFRAME_DEPTH = 4; while (currentIframe && depth < MAX_IFRAME_DEPTH) { try { const iframeRect = currentIframe.getBoundingClientRect(); const iframeX = x - iframeRect.left; const iframeY = y - iframeRect.top; const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; if (!iframeDocument) break; const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; if (!iframeElement) break; deepestElement = traverseShadowDOM(iframeElement); if (iframeElement.tagName === 'IFRAME') { currentIframe = iframeElement as HTMLIFrameElement; depth++; } else { break; } } catch (error) { console.warn('Cannot access iframe content:', error); break; } } } else if (deepestElement.tagName === 'FRAME' || isInFrameset()) { const framesToCheck = []; if (deepestElement.tagName === 'FRAME') { framesToCheck.push(deepestElement as HTMLFrameElement); } if (isInFrameset()) { document.querySelectorAll('frame').forEach(frame => { framesToCheck.push(frame as HTMLFrameElement); }); } let frameDepth = 0; const MAX_FRAME_DEPTH = 4; const processFrames = (frames: HTMLFrameElement[], currentDepth: number) => { if (currentDepth >= MAX_FRAME_DEPTH) return; for (const frameElement of frames) { try { const frameRect = frameElement.getBoundingClientRect(); const frameX = x - frameRect.left; const frameY = y - frameRect.top; if (frameX < 0 || frameY < 0 || frameX > frameRect.width || frameY > frameRect.height) { continue; } const frameDocument = frameElement.contentDocument || frameElement.contentWindow?.document; if (!frameDocument) continue; const frameElementAtPoint = frameDocument.elementFromPoint(frameX, frameY) as HTMLElement; if (!frameElementAtPoint) continue; deepestElement = traverseShadowDOM(frameElementAtPoint); if (frameElementAtPoint.tagName === 'FRAME') { processFrames([frameElementAtPoint as HTMLFrameElement], currentDepth + 1); } break; } catch (error) { console.warn('Cannot access frame content:', error); continue; } } }; processFrames(framesToCheck, frameDepth); } else { deepestElement = traverseShadowDOM(deepestElement); } return deepestElement; }; function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); if (selector === 'frame' || selector === 'iframe') { let baseSelector = selector; if (element.className) { const classes = element.className.split(/\s+/).filter(Boolean); if (classes.length > 0) { const validClasses = classes.filter(cls => !cls.startsWith('!') && !cls.includes(':')); if (validClasses.length > 0) { baseSelector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.'); } } } if (element.id) { return `${selector}#${CSS.escape(element.id)}`; } if (element.getAttribute('name')) { return `${selector}[name="${CSS.escape(element.getAttribute('name')!)}"]`; } if (element.parentElement && element.parentElement.tagName === 'FRAMESET') { const frameIndex = Array.from(element.parentElement.children) .filter(child => child.tagName.toLowerCase() === selector) .indexOf(element) + 1; if (frameIndex > 0) { return `${selector}:nth-of-type(${frameIndex})`; } } if (element.parentElement) { const siblings = Array.from(element.parentElement.children); const elementClasses = Array.from(element.classList || []); const similarSiblings = siblings.filter(sibling => { if (sibling === element) return false; const siblingClasses = Array.from(sibling.classList || []); return siblingClasses.some(cls => elementClasses.includes(cls)); }); if (similarSiblings.length > 0) { const position = siblings.indexOf(element) + 1; selector += `:nth-child(${position})`; } } return baseSelector; } if (selector === 'td' && element.parentElement) { const siblings = Array.from(element.parentElement.children); const position = siblings.indexOf(element) + 1; return `${selector}:nth-child(${position})`; } if (element.className) { const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls)); if (classes.length > 0) { const validClasses = classes.filter((cls: string) => !cls.startsWith('!') && !cls.includes(':')); if (validClasses.length > 0) { selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.'); } } } if (element.parentElement) { const siblings = Array.from(element.parentElement.children); const elementClasses = Array.from(element.classList || []); const similarSiblings = siblings.filter(sibling => { if (sibling === element) return false; const siblingClasses = Array.from(sibling.classList || []); return siblingClasses.some(cls => elementClasses.includes(cls)); }); if (similarSiblings.length > 0) { const position = siblings.indexOf(element) + 1; selector += `:nth-child(${position})`; } } return selector; } function getContextPath(element: HTMLElement): DOMContext[] { const path: DOMContext[] = []; let current = element; let depth = 0; const MAX_DEPTH = 4; while (current && depth < MAX_DEPTH) { // Check for shadow DOM const rootNode = current.getRootNode(); if (rootNode instanceof ShadowRoot) { path.unshift({ type: 'shadow', element: current, container: rootNode, host: rootNode.host as HTMLElement }); current = rootNode.host as HTMLElement; depth++; continue; } // Check for iframe or frame const ownerDocument = current.ownerDocument; const frameElement = ownerDocument?.defaultView?.frameElement; if (frameElement) { const isFrame = frameElement.tagName === 'FRAME'; path.unshift({ type: isFrame ? 'frame' : 'iframe', element: current, container: frameElement as (HTMLIFrameElement | HTMLFrameElement), document: ownerDocument }); current = frameElement as HTMLElement; depth++; continue; } break; } return path; } function getSelectorPath(element: HTMLElement | null): string { if (!element) return ''; // Get the complete context path const contextPath = getContextPath(element); if (contextPath.length > 0) { const selectorParts: string[] = []; contextPath.forEach((context, index) => { const containerSelector = getNonUniqueSelector( context.type === 'shadow' ? context.host! : context.container as HTMLElement ); if (index === contextPath.length - 1) { const elementSelector = getNonUniqueSelector(element); const delimiter = context.type === 'shadow' ? ' >> ' : ' :>> '; selectorParts.push(`${containerSelector}${delimiter}${elementSelector}`); } else { selectorParts.push(containerSelector); } }); return selectorParts.join(contextPath[0].type === 'shadow' ? ' >> ' : ' :>> '); } const elementSelector = getNonUniqueSelector(element); if (elementSelector.includes('.') && elementSelector.split('.').length > 1) { return elementSelector; } const path: string[] = []; let currentElement = element; const MAX_DEPTH = 2; let depth = 0; while (currentElement && currentElement !== document.body && depth < MAX_DEPTH) { const selector = getNonUniqueSelector(currentElement); path.unshift(selector); if (!currentElement.parentElement) break; currentElement = currentElement.parentElement; depth++; } return path.join(' > '); } // Main logic to get element and generate selector const originalEl = getDeepestElementFromPoint(x, y); if (!originalEl) return null; let element = originalEl; if (element.tagName === 'TD' || element.tagName === 'TH') { const tableParent = element.closest('table'); if (tableParent) { element = tableParent; } } const generalSelector = getSelectorPath(element); return { generalSelector }; }, coordinates); return selectors || { generalSelector: '' }; } else { // When we have a list selector, we need special handling while maintaining shadow DOM and frame support if (page.isClosed()) { logger.debug('Page is closed, cannot get list selectors'); return { generalSelector: '' }; } const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { let elements = document.elementsFromPoint(x, y) as HTMLElement[]; if (!elements.length) return null; const findDeepestElement = (elements: HTMLElement[]): HTMLElement | null => { if (!elements.length) return null; if (elements.length === 1) return elements[0]; let deepestElement = elements[0]; let maxDepth = 0; for (const element of elements) { let depth = 0; let current = element; while (current) { depth++; if (current.parentElement) { current = current.parentElement; } else { break; } } if (depth > maxDepth) { maxDepth = depth; deepestElement = element; } } return deepestElement; }; let deepestElement = findDeepestElement(elements); if (!deepestElement) return null; const traverseShadowDOM = (element: HTMLElement): HTMLElement => { let current = element; let shadowRoot = current.shadowRoot; let deepest = current; let depth = 0; const MAX_SHADOW_DEPTH = 4; while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint(x, y) as HTMLElement; if (!shadowElement || shadowElement === current) break; deepest = shadowElement; current = shadowElement; shadowRoot = current.shadowRoot; depth++; } return deepest; }; const isInFrameset = () => { let node = deepestElement; while (node && node.parentElement) { if (node.tagName === 'FRAMESET' || node.tagName === 'FRAME') { return true; } node = node.parentElement; } return false; }; if (deepestElement.tagName === 'IFRAME') { let currentIframe = deepestElement as HTMLIFrameElement; let depth = 0; const MAX_IFRAME_DEPTH = 4; while (currentIframe && depth < MAX_IFRAME_DEPTH) { try { const iframeRect = currentIframe.getBoundingClientRect(); const iframeX = x - iframeRect.left; const iframeY = y - iframeRect.top; const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; if (!iframeDocument) break; const iframeElement = iframeDocument.elementFromPoint(iframeX, iframeY) as HTMLElement; if (!iframeElement) break; deepestElement = traverseShadowDOM(iframeElement); if (iframeElement.tagName === 'IFRAME') { currentIframe = iframeElement as HTMLIFrameElement; depth++; } else { break; } } catch (error) { console.warn('Cannot access iframe content:', error); break; } } } else if (deepestElement.tagName === 'FRAME' || isInFrameset()) { const framesToCheck = []; if (deepestElement.tagName === 'FRAME') { framesToCheck.push(deepestElement as HTMLFrameElement); } if (isInFrameset()) { document.querySelectorAll('frame').forEach(frame => { framesToCheck.push(frame as HTMLFrameElement); }); } let frameDepth = 0; const MAX_FRAME_DEPTH = 4; const processFrames = (frames: HTMLFrameElement[], currentDepth: number) => { if (currentDepth >= MAX_FRAME_DEPTH) return; for (const frameElement of frames) { try { const frameRect = frameElement.getBoundingClientRect(); const frameX = x - frameRect.left; const frameY = y - frameRect.top; if (frameX < 0 || frameY < 0 || frameX > frameRect.width || frameY > frameRect.height) { continue; } const frameDocument = frameElement.contentDocument || frameElement.contentWindow?.document; if (!frameDocument) continue; const frameElementAtPoint = frameDocument.elementFromPoint(frameX, frameY) as HTMLElement; if (!frameElementAtPoint) continue; deepestElement = traverseShadowDOM(frameElementAtPoint); if (frameElementAtPoint.tagName === 'FRAME') { processFrames([frameElementAtPoint as HTMLFrameElement], currentDepth + 1); } break; } catch (error) { console.warn('Cannot access frame content:', error); continue; } } }; processFrames(framesToCheck, frameDepth); } else { deepestElement = traverseShadowDOM(deepestElement); } return deepestElement; }; function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); if (selector === 'frame' || selector === 'iframe') { let baseSelector = selector; if (element.className) { const classes = element.className.split(/\s+/).filter(Boolean); if (classes.length > 0) { const validClasses = classes.filter(cls => !cls.startsWith('!') && !cls.includes(':')); if (validClasses.length > 0) { baseSelector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.'); } } } if (element.id) { return `${selector}#${CSS.escape(element.id)}`; } if (element.getAttribute('name')) { return `${selector}[name="${CSS.escape(element.getAttribute('name')!)}"]`; } if (element.parentElement && element.parentElement.tagName === 'FRAMESET') { const frameIndex = Array.from(element.parentElement.children) .filter(child => child.tagName.toLowerCase() === selector) .indexOf(element) + 1; if (frameIndex > 0) { return `${selector}:nth-of-type(${frameIndex})`; } } if (element.parentElement) { const siblings = Array.from(element.parentElement.children); const elementClasses = Array.from(element.classList || []); const similarSiblings = siblings.filter(sibling => { if (sibling === element) return false; const siblingClasses = Array.from(sibling.classList || []); return siblingClasses.some(cls => elementClasses.includes(cls)); }); if (similarSiblings.length > 0) { const position = siblings.indexOf(element) + 1; selector += `:nth-child(${position})`; } } return baseSelector; } if (selector === 'td' && element.parentElement) { const siblings = Array.from(element.parentElement.children); const position = siblings.indexOf(element) + 1; return `${selector}:nth-child(${position})`; } if (element.className) { const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls)); if (classes.length > 0) { const validClasses = classes.filter((cls: string) => !cls.startsWith('!') && !cls.includes(':')); if (validClasses.length > 0) { selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.'); } } } if (element.parentElement) { const siblings = Array.from(element.parentElement.children); const elementClasses = Array.from(element.classList || []); const similarSiblings = siblings.filter(sibling => { if (sibling === element) return false; const siblingClasses = Array.from(sibling.classList || []); return siblingClasses.some(cls => elementClasses.includes(cls)); }); if (similarSiblings.length > 0) { const position = siblings.indexOf(element) + 1; selector += `:nth-child(${position})`; } } return selector; } // Get complete context path (iframe, frame, and shadow DOM) function getContextPath(element: HTMLElement): DOMContext[] { const path: DOMContext[] = []; let current = element; let depth = 0; const MAX_DEPTH = 4; while (current && depth < MAX_DEPTH) { // Check for shadow DOM const rootNode = current.getRootNode(); if (rootNode instanceof ShadowRoot) { path.unshift({ type: 'shadow', element: current, container: rootNode, host: rootNode.host as HTMLElement }); current = rootNode.host as HTMLElement; depth++; continue; } // Check for iframe or frame const ownerDocument = current.ownerDocument; const frameElement = ownerDocument?.defaultView?.frameElement; if (frameElement) { const isFrame = frameElement.tagName === 'FRAME'; path.unshift({ type: isFrame ? 'frame' : 'iframe', element: current, container: frameElement as (HTMLIFrameElement | HTMLFrameElement), document: ownerDocument }); current = frameElement as HTMLElement; depth++; continue; } break; } return path; } function getSelectorPath(element: HTMLElement | null): string { if (!element) return ''; // Get the complete context path const contextPath = getContextPath(element); if (contextPath.length > 0) { const selectorParts: string[] = []; contextPath.forEach((context, index) => { const containerSelector = getNonUniqueSelector( context.type === 'shadow' ? context.host! : context.container as HTMLElement ); if (index === contextPath.length - 1) { const elementSelector = getNonUniqueSelector(element); const delimiter = context.type === 'shadow' ? ' >> ' : ' :>> '; selectorParts.push(`${containerSelector}${delimiter}${elementSelector}`); } else { selectorParts.push(containerSelector); } }); return selectorParts.join(contextPath[0].type === 'shadow' ? ' >> ' : ' :>> '); } const elementSelector = getNonUniqueSelector(element); if (elementSelector.includes('.') && elementSelector.split('.').length > 1) { return elementSelector; } const path: string[] = []; let currentElement = element; const MAX_DEPTH = 2; let depth = 0; while (currentElement && currentElement !== document.body && depth < MAX_DEPTH) { const selector = getNonUniqueSelector(currentElement); path.unshift(selector); if (!currentElement.parentElement) break; currentElement = currentElement.parentElement; depth++; } return path.join(' > '); } const originalEl = getDeepestElementFromPoint(x, y); if (!originalEl) return { generalSelector: '' }; let element = originalEl; const generalSelector = getSelectorPath(element); return { generalSelector }; }, coordinates); return selectors || { generalSelector: '' }; } } catch (error) { console.error('Error in getNonUniqueSelectors:', error); return { generalSelector: '' }; } }; export const getChildSelectors = async (page: Page, parentSelector: string): Promise => { try { if (page.isClosed()) { logger.debug('Page is closed, cannot get child selectors'); return []; } const childSelectors = await page.evaluate((parentSelector: string) => { // Function to get a non-unique selector based on tag and class (if present) function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); if (selector === 'td' && element.parentElement) { const siblings = Array.from(element.parentElement.children); const position = siblings.indexOf(element) + 1; return `${selector}:nth-child(${position})`; } const className = typeof element.className === 'string' ? element.className : ''; if (className) { const classes = className.split(/\s+/).filter((cls: string) => Boolean(cls)); if (classes.length > 0) { const validClasses = classes.filter((cls: string) => !cls.startsWith('!') && !cls.includes(':')); if (validClasses.length > 0) { selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.'); } } } if (element.parentElement) { const siblings = Array.from(element.parentElement.children); const elementClasses = Array.from(element.classList || []); const similarSiblings = siblings.filter(sibling => { if (sibling === element) return false; const siblingClasses = Array.from(sibling.classList || []); return siblingClasses.some(cls => elementClasses.includes(cls)); }); if (similarSiblings.length > 0) { const position = siblings.indexOf(element) + 1; selector += `:nth-child(${position})`; } } return selector; } // Function to generate selector path from an element to its parent function getSelectorPath(element: HTMLElement): string { if (!element || !element.parentElement) return ''; const elementSelector = getNonUniqueSelector(element); // Check for shadow DOM context const rootNode = element.getRootNode(); if (rootNode instanceof ShadowRoot) { const hostSelector = getNonUniqueSelector(rootNode.host as HTMLElement); return `${hostSelector} >> ${elementSelector}`; } // Check for iframe/frame context const ownerDocument = element.ownerDocument; const frameElement = ownerDocument?.defaultView?.frameElement; if (frameElement) { const frameSelector = getNonUniqueSelector(frameElement as HTMLElement); // Use the appropriate delimiter based on whether it's a frame or iframe return `${frameSelector} :>> ${elementSelector}`; } if (elementSelector.includes('.') && elementSelector.split('.').length > 1) { return elementSelector; } const parentSelector = getNonUniqueSelector(element.parentElement); return `${parentSelector} > ${elementSelector}`; } // Function to get all children from special contexts including frames function getSpecialContextChildren(element: HTMLElement): HTMLElement[] { const children: HTMLElement[] = []; // Get shadow DOM children const shadowRoot = element.shadowRoot; if (shadowRoot) { const shadowElements = Array.from(shadowRoot.querySelectorAll('*')) as HTMLElement[]; children.push(...shadowElements); } // Get iframe children const iframes = Array.from(element.querySelectorAll('iframe')) as HTMLIFrameElement[]; for (const iframe of iframes) { try { const iframeDoc = iframe.contentDocument || iframe.contentWindow?.document; if (iframeDoc) { const iframeElements = Array.from(iframeDoc.querySelectorAll('*')) as HTMLElement[]; children.push(...iframeElements); } } catch (error) { console.warn('Cannot access iframe content:', error); continue; } } // Get frame children const frames = Array.from(element.querySelectorAll('frame')) as HTMLFrameElement[]; for (const frame of frames) { try { const frameDoc = frame.contentDocument || frame.contentWindow?.document; if (frameDoc) { const frameElements = Array.from(frameDoc.querySelectorAll('*')) as HTMLElement[]; children.push(...frameElements); } } catch (error) { console.warn('Cannot access frame content:', error); continue; } } // Check for framesets const framesets = Array.from(element.querySelectorAll('frameset')) as HTMLElement[]; for (const frameset of framesets) { const framesToCheck = Array.from(frameset.querySelectorAll('frame')) as HTMLFrameElement[]; for (const frame of framesToCheck) { try { const frameDoc = frame.contentDocument || frame.contentWindow?.document; if (frameDoc) { const frameElements = Array.from(frameDoc.querySelectorAll('*')) as HTMLElement[]; children.push(...frameElements); } } catch (error) { console.warn('Cannot access frameset frame content:', error); continue; } } } return children; } // Function to recursively get all descendant selectors function getAllDescendantSelectors(element: HTMLElement): string[] { let selectors: string[] = []; // Handle regular DOM children const children = Array.from(element.children) as HTMLElement[]; for (const child of children) { const childPath = getSelectorPath(child); if (childPath) { selectors.push(childPath); // Process regular descendants selectors = selectors.concat(getAllDescendantSelectors(child)); // Process special context children (shadow DOM, iframes, and frames) const specialChildren = getSpecialContextChildren(child); for (const specialChild of specialChildren) { const specialPath = getSelectorPath(specialChild); if (specialPath) { selectors.push(specialPath); selectors = selectors.concat(getAllDescendantSelectors(specialChild)); } } } } // Handle direct special context children const specialChildren = getSpecialContextChildren(element); for (const specialChild of specialChildren) { const specialPath = getSelectorPath(specialChild); if (specialPath) { selectors.push(specialPath); selectors = selectors.concat(getAllDescendantSelectors(specialChild)); } } return selectors; } // Handle both shadow DOM, iframe, and frame parent selectors let parentElements: HTMLElement[] = []; // Check for special context traversal in parent selector if (parentSelector.includes('>>') || parentSelector.includes(':>>')) { // Split by both types of delimiters const selectorParts = parentSelector.split(/(?:>>|:>>)/).map(part => part.trim()); // Start with initial elements parentElements = Array.from(document.querySelectorAll(selectorParts[0])) as HTMLElement[]; // Traverse through parts for (let i = 1; i < selectorParts.length; i++) { const newParentElements: HTMLElement[] = []; for (const element of parentElements) { // Check for shadow DOM if (element.shadowRoot) { const shadowChildren = Array.from( element.shadowRoot.querySelectorAll(selectorParts[i]) ) as HTMLElement[]; newParentElements.push(...shadowChildren); } // Check for iframe if (element.tagName === 'IFRAME') { try { const iframeDoc = (element as HTMLIFrameElement).contentDocument || (element as HTMLIFrameElement).contentWindow?.document; if (iframeDoc) { const iframeChildren = Array.from( iframeDoc.querySelectorAll(selectorParts[i]) ) as HTMLElement[]; newParentElements.push(...iframeChildren); } } catch (error) { console.warn('Cannot access iframe content during traversal:', error); continue; } } // Check for frame if (element.tagName === 'FRAME') { try { const frameDoc = (element as HTMLFrameElement).contentDocument || (element as HTMLFrameElement).contentWindow?.document; if (frameDoc) { const frameChildren = Array.from( frameDoc.querySelectorAll(selectorParts[i]) ) as HTMLElement[]; newParentElements.push(...frameChildren); } } catch (error) { console.warn('Cannot access frame content during traversal:', error); continue; } } // Check for frameset if (element.tagName === 'FRAMESET') { const frames = Array.from(element.querySelectorAll('frame')) as HTMLFrameElement[]; for (const frame of frames) { try { const frameDoc = frame.contentDocument || frame.contentWindow?.document; if (frameDoc) { const frameChildren = Array.from( frameDoc.querySelectorAll(selectorParts[i]) ) as HTMLElement[]; newParentElements.push(...frameChildren); } } catch (error) { console.warn('Cannot access frameset frame during traversal:', error); continue; } } } } parentElements = newParentElements; } } else { // Regular DOM selector parentElements = Array.from(document.querySelectorAll(parentSelector)) as HTMLElement[]; } const allChildSelectors = new Set(); // Use a set to ensure uniqueness // Process each parent element and its descendants parentElements.forEach((parentElement) => { const descendantSelectors = getAllDescendantSelectors(parentElement); descendantSelectors.forEach((selector) => allChildSelectors.add(selector)); }); return Array.from(allChildSelectors); }, parentSelector); return childSelectors || []; } catch (error) { console.error('Error in getChildSelectors:', error); return []; } }; /** * Returns the first pair from the given workflow that contains the given selector * inside the where condition, and it is the only selector there. * If a match is not found, returns undefined. * @param selector The selector to find. * @param workflow The workflow to search in. * @category WorkflowManagement * @returns {Promise} */ export const selectorAlreadyInWorkflow = (selector: string, workflow: Workflow) => { return workflow.find((pair: WhereWhatPair) => { if (pair.where.selectors?.includes(selector)) { if (pair.where.selectors?.length === 1) { return pair; } } }); }; /** * Checks whether the given selectors are visible on the page at the same time. * @param selectors The selectors to check. * @param page The page to use for the validation. * @category WorkflowManagement */ export const isRuleOvershadowing = async (selectors: string[], page: Page): Promise => { for (const selector of selectors) { const areElsVisible = await page.$$eval(selector, (elems) => { const isVisible = (elem: HTMLElement | SVGElement) => { if (elem instanceof HTMLElement) { return !!(elem.offsetWidth || elem.offsetHeight || elem.getClientRects().length && window.getComputedStyle(elem).visibility !== "hidden"); } else { return !!(elem.getClientRects().length && window.getComputedStyle(elem).visibility !== "hidden"); } }; const visibility: boolean[] = []; elems.forEach((el) => visibility.push(isVisible(el))) return visibility; }) if (areElsVisible.length === 0) { return false } if (areElsVisible.includes(false)) { return false; } } return true; } ================================================ FILE: server/src/workflow-management/storage.ts ================================================ /** * A group of functions for storing recordings on the file system. * Functions are asynchronous to unload the server from heavy file system operations. */ import fs from 'fs'; import * as path from "path"; /** * Reads a file from path and returns its content as a string. * @param path The path to the file. * @returns {Promise} * @category WorkflowManagement-Storage */ export const readFile = (path: string): Promise => { return new Promise((resolve, reject) => { fs.readFile(path, 'utf8', (err, data) => { if (err) { reject(err); } else { resolve(data); } }); }); }; /** * Writes a string to a file. If the file already exists, it is overwritten. * @param path The path to the file. * @param data The data to write to the file. * @returns {Promise} * @category WorkflowManagement-Storage */ export const saveFile = (path: string, data: string): Promise => { return new Promise((resolve, reject) => { fs.writeFile(path, data, (err) => { if (err) { reject(err); } else { resolve(); } }); }); }; /** * Deletes a file from the file system. * @param path The path to the file. * @returns {Promise} * @category WorkflowManagement-Storage */ export const deleteFile = (path: string): Promise => { return new Promise((resolve, reject) => { fs.unlink(path, (err) => { if (err) { reject(err); } else { resolve(); } }); }); }; /** * A helper function to apply a callback to the all resolved * promises made out of an array of the items. * @param items An array of items. * @param block The function to call for each item after the promise for it was resolved. * @returns {Promise} * @category WorkflowManagement-Storage */ function promiseAllP(items: any, block: any) { let promises: any = []; items.forEach(function(item : any, index: number) { promises.push( function(item,i) { return new Promise(function(resolve, reject) { // @ts-ignore return block.apply(this,[item,index,resolve,reject]); }); }(item,index)) }); return Promise.all(promises); } /** * Reads all files from a directory and returns an array of their contents. * @param dirname The path to the directory. * @category WorkflowManagement-Storage * @returns {Promise} */ export const readFiles = (dirname: string): Promise => { return new Promise((resolve, reject) => { fs.readdir(dirname, function(err, filenames) { if (err) return reject(err); promiseAllP(filenames.filter((filename: string) => !filename.startsWith('.')), (filename: string, index : number, resolve: any, reject: any) => { fs.readFile(path.resolve(dirname, filename), 'utf-8', function(err, content) { if (err) return reject(err); return resolve(content); }); }) .then(results => { return resolve(results); }) .catch(error => { return reject(error); }); }); }); } ================================================ FILE: server/src/workflow-management/utils.ts ================================================ import { Action, ActionType, TagName } from "../types"; /** * A helper function to get the best selector for the specific user action. * @param action The user action. * @returns {string|null} * @category WorkflowManagement-Selectors */ export const getBestSelectorForAction = (action: Action) => { switch (action.type) { case ActionType.Click: case ActionType.Hover: case ActionType.DragAndDrop: { const selectors = action.selectors; if (selectors?.iframeSelector?.full) { return selectors.iframeSelector.full; } if (selectors?.shadowSelector?.full) { return selectors.shadowSelector.full; } // less than 25 characters, and element only has text inside const textSelector = selectors?.text?.length != null && selectors?.text?.length < 25 && action.hasOnlyText ? selectors.generalSelector : null; if (action.tagName === TagName.Input) { return ( selectors.testIdSelector ?? selectors?.id ?? selectors?.formSelector ?? selectors?.accessibilitySelector ?? selectors?.generalSelector ?? selectors?.attrSelector ?? null ); } if (action.tagName === TagName.A) { return ( selectors.testIdSelector ?? selectors?.id ?? selectors?.hrefSelector ?? selectors?.accessibilitySelector ?? selectors?.generalSelector ?? selectors?.attrSelector ?? null ); } // Prefer text selectors for spans, ems over general selectors if ( action.tagName === TagName.Span || action.tagName === TagName.EM || action.tagName === TagName.Cite || action.tagName === TagName.B || action.tagName === TagName.Strong ) { return ( selectors.testIdSelector ?? selectors?.id ?? selectors?.accessibilitySelector ?? selectors?.hrefSelector ?? textSelector ?? selectors?.generalSelector ?? selectors?.attrSelector ?? null ); } return ( selectors.testIdSelector ?? selectors?.id ?? selectors?.accessibilitySelector ?? selectors?.hrefSelector ?? selectors?.generalSelector ?? selectors?.attrSelector ?? null ); } case ActionType.Input: case ActionType.Keydown: { const selectors = action.selectors; if (selectors?.shadowSelector?.full) { return selectors.shadowSelector.full; } return ( selectors.testIdSelector ?? selectors?.id ?? selectors?.formSelector ?? selectors?.accessibilitySelector ?? selectors?.generalSelector ?? selectors?.attrSelector ?? null ); } default: break; } return null; } ================================================ FILE: server/start.sh ================================================ #!/bin/bash # Start Xvfb in the background with the desired dimensions #Xvfb :0 -screen 0 900x400x24 & # Wait for Xvfb to start #sleep 2 # Execute the Node.js application exec npm run server ================================================ FILE: server/tsconfig.json ================================================ { "compilerOptions": { "target": "es2020", "module": "commonjs", "outDir": "./dist", "rootDir": "../", "strict": true, "esModuleInterop": true, "skipLibCheck": true, "forceConsistentCasingInFileNames": true, "moduleResolution": "node", "baseUrl": "../", "paths": { "*": ["*"], "src/*": ["src/*"] }, "jsx": "react-jsx", "lib": ["dom", "dom.iterable", "esnext"], "allowJs": true }, "include": [ "src/**/*", "../src/shared/**/*", "../src/helpers/**/*", ], "exclude": [ "node_modules", "../src/components/**/*", // Exclude frontend components "../src/pages/**/*", // Exclude frontend pages "../src/app/**/*", // Exclude other frontend-specific code ] } ================================================ FILE: server/tsconfig.mcp.json ================================================ { "compilerOptions": { "target": "ES2022", "module": "Node16", "moduleResolution": "Node16", "outDir": "../dist", "rootDir": "./src", "strict": true, "esModuleInterop": true, "skipLibCheck": true, "forceConsistentCasingInFileNames": true, "declaration": true, "declarationMap": true, "sourceMap": true }, "include": [ "src/mcp-worker.ts" ], "exclude": [ "node_modules", "dist" ] } ================================================ FILE: src/App.tsx ================================================ import React from "react"; import { Routes, Route } from "react-router-dom"; import { GlobalInfoProvider } from "./context/globalInfo"; import { PageWrapper } from "./pages/PageWrapper"; import i18n from "./i18n"; import ThemeModeProvider from './context/theme-provider'; function App() { return ( } /> ); } export default App; ================================================ FILE: src/api/auth.ts ================================================ import { default as axios } from "axios"; import { apiUrl } from "../apiConfig" export const getUserById = async (userId: string) => { try { const response = await axios.get(`${apiUrl}/auth/user/${userId}`); if (response.status === 200) { return response.data; } else { throw new Error(`Couldn't get user with id ${userId}`); } } catch (error: any) { console.error(error); return null; } } ================================================ FILE: src/api/integration.ts ================================================ import { default as axios } from "axios"; import { apiUrl } from "../apiConfig"; export const handleUploadCredentials = async (fileName: string, credentials: any, spreadsheetId: string, range: string): Promise => { try { const response = await axios.post(`${apiUrl}/integration/upload-credentials`, { fileName, credentials: JSON.parse(credentials), spreadsheetId, range }); if (response.status === 200) { return response.data; } else { throw new Error(`Couldn't make gsheet integration for ${fileName}`); } } catch (error) { console.error('Error uploading credentials:', error); return false; } }; ================================================ FILE: src/api/proxy.ts ================================================ import { default as axios } from "axios"; import { apiUrl } from "../apiConfig"; export const sendProxyConfig = async (proxyConfig: { server_url: string, username?: string, password?: string }): Promise => { try { const response = await axios.post(`${apiUrl}/proxy/config`, proxyConfig); if (response.status === 200) { return response.data; } else { throw new Error(`Failed to submit proxy configuration. Status code: ${response.status}`); } } catch (error: any) { console.error('Error sending proxy configuration:', error.message || error); return false; } } export const getProxyConfig = async (): Promise<{ proxy_url: string, auth: boolean }> => { try { const response = await axios.get(`${apiUrl}/proxy/config`); if (response.status === 200) { return response.data; } else { throw new Error(`Failed to fetch proxy configuration. Try again.`); } } catch (error: any) { console.log(error); return { proxy_url: '', auth: false }; } } export const testProxyConfig = async (): Promise<{ success: boolean }> => { try { const response = await axios.get(`${apiUrl}/proxy/test`); if (response.status === 200) { return response.data; } else { throw new Error(`Failed to test proxy configuration. Try again.`); } } catch (error: any) { console.log(error); return { success: false }; } } export const deleteProxyConfig = async (): Promise => { try { const response = await axios.delete(`${apiUrl}/proxy/config`); if (response.status === 200) { return response.data; } else { throw new Error(`Failed to delete proxy configuration. Try again.`); } } catch (error: any) { console.log(error); return false; } } ================================================ FILE: src/api/recording.ts ================================================ import { default as axios, AxiosResponse } from "axios"; import { apiUrl } from "../apiConfig"; export const startRecording = async() : Promise => { try { const response = await axios.get(`${apiUrl}/record/start`) if (response.status === 200) { return response.data; } else { throw new Error('Couldn\'t start recording'); } } catch(error: any) { return ''; } }; export const stopRecording = async (id: string): Promise => { await axios.get(`${apiUrl}/record/stop/${id}`) .then((response : AxiosResponse) => { }) .catch((error: any) => { }); }; export const getActiveBrowserId = async(): Promise => { try { const response = await axios.get(`${apiUrl}/record/active`); if (response.status === 200) { return response.data; } else { throw new Error('Couldn\'t get active browser'); } } catch(error: any) { return ''; } }; export const canCreateBrowserInState = async(state: "recording" | "run"): Promise => { try { const response = await axios.get(`${apiUrl}/record/can-create/${state}`, { withCredentials: true }); if (response.status === 200) { return response.data.canCreate; } else { return false; } } catch(error: any) { return false; } }; export const interpretCurrentRecording = async(): Promise => { try { const response = await axios.get(`${apiUrl}/record/interpret`); if (response.status === 200) { return true; } else { throw new Error('Couldn\'t interpret current recording'); } } catch(error: any) { console.log(error); return false; } }; export const stopCurrentInterpretation = async(): Promise => { try { const response = await axios.get(`${apiUrl}/record/interpret/stop`); if (response.status === 200) { return; } else { throw new Error('Couldn\'t interpret current recording'); } } catch(error: any) { console.log(error); } }; export const getCurrentUrl = async (): Promise => { try { const response = await axios.get(`${apiUrl}/record/active/url`); if (response.status === 200) { return response.data; } else { throw new Error('Couldn\'t retrieve stored recordings'); } } catch(error: any) { console.log(error); return null; } }; export const getCurrentTabs = async (): Promise => { try { const response = await axios.get(`${apiUrl}/record/active/tabs`); if (response.status === 200) { return response.data; } else { throw new Error('Couldn\'t retrieve stored recordings'); } } catch(error: any) { console.log(error); return null; } }; ================================================ FILE: src/api/storage.ts ================================================ import { default as axios } from "axios"; import { WorkflowFile } from "maxun-core"; import { RunSettings } from "../components/run/RunSettings"; import { ScheduleSettings } from "../components/robot/pages/ScheduleSettingsPage"; import { CreateRunResponse, ScheduleRunResponse } from "../pages/MainPage"; import { apiUrl } from "../apiConfig"; interface CredentialInfo { value: string; type: string; } interface Credentials { [key: string]: CredentialInfo; } export const getStoredRecordings = async (): Promise => { try { const response = await axios.get(`${apiUrl}/storage/recordings`); if (response.status === 200) { return response.data; } else { throw new Error('Couldn\'t retrieve stored recordings'); } } catch (error: any) { console.log(error); return null; } }; export const createScrapeRobot = async ( url: string, name?: string, formats: string[] = ['markdown'] ): Promise => { try { const response = await axios.post( `${apiUrl}/storage/recordings/scrape`, { url, name, formats, }, { headers: { 'Content-Type': 'application/json' }, withCredentials: true, } ); if (response.status === 201) { return response.data; } else { throw new Error('Failed to create markdown robot'); } } catch (error: any) { console.error('Error creating markdown robot:', error); return null; } }; export const createLLMRobot = async ( url: string | undefined, prompt: string, llmProvider?: 'anthropic' | 'openai' | 'ollama', llmModel?: string, llmApiKey?: string, llmBaseUrl?: string, robotName?: string ): Promise => { try { const response = await axios.post( `${apiUrl}/storage/recordings/llm`, { url: url || undefined, prompt, llmProvider, llmModel, llmApiKey, llmBaseUrl, robotName, }, { headers: { 'Content-Type': 'application/json' }, withCredentials: true, timeout: 300000, } ); if (response.status === 201) { return response.data; } else { throw new Error('Failed to create LLM robot'); } } catch (error: any) { console.error('Error creating LLM robot:', error); return null; } }; export const updateRecording = async (id: string, data: { name?: string; limits?: Array<{pairIndex: number, actionIndex: number, argIndex: number, limit: number}>; credentials?: Credentials; targetUrl?: string; workflow?: any[]; }): Promise => { try { const response = await axios.put(`${apiUrl}/storage/recordings/${id}`, data); if (response.status === 200) { return true; } else { throw new Error(`Couldn't update recording with id ${id}`); } } catch (error: any) { console.error(`Error updating recording: ${error.message}`); return false; } }; export const getStoredRuns = async (): Promise => { try { const response = await axios.get(`${apiUrl}/storage/runs`); if (response.status === 200) { return response.data; } else { throw new Error('Couldn\'t retrieve stored recordings'); } } catch (error: any) { console.log(error); return null; } }; export const duplicateRecording = async (id: string, targetUrl: string): Promise => { try { const response = await axios.post(`${apiUrl}/storage/recordings/${id}/duplicate`, { targetUrl, }, { withCredentials: true }); if (response.status === 201) { return response.data; } else { throw new Error(`Couldn't duplicate recording with id ${id}`); } } catch (error: any) { console.error(`Error duplicating recording: ${error.message}`); return null; } }; export const getStoredRecording = async (id: string) => { try { const response = await axios.get(`${apiUrl}/storage/recordings/${id}`); if (response.status === 200) { return response.data; } else { throw new Error(`Couldn't retrieve stored recording ${id}`); } } catch (error: any) { console.log(error); return null; } } export const checkRunsForRecording = async (id: string): Promise => { try { const response = await axios.get(`${apiUrl}/storage/recordings/${id}/runs`); const runs = response.data; console.log(runs.runs.totalCount) return runs.runs.totalCount > 0; } catch (error) { console.error('Error checking runs for recording:', error); return false; } }; export const deleteRecordingFromStorage = async (id: string): Promise => { const hasRuns = await checkRunsForRecording(id); if (hasRuns) { return false; } try { const response = await axios.delete(`${apiUrl}/storage/recordings/${id}`); if (response.status === 200) { return true; } else { throw new Error(`Couldn't delete stored recording ${id}`); } } catch (error: any) { console.log(error); return false; } }; export const deleteRunFromStorage = async (id: string): Promise => { try { const response = await axios.delete(`${apiUrl}/storage/runs/${id}`); if (response.status === 200) { return response.data; } else { throw new Error(`Couldn't delete stored recording ${id}`); } } catch (error: any) { console.log(error); return false; } }; export const editRecordingFromStorage = async (browserId: string, id: string): Promise => { try { const response = await axios.put(`${apiUrl}/workflow/${browserId}/${id}`); if (response.status === 200) { return response.data; } else { throw new Error(`Couldn't edit stored recording ${id}`); } } catch (error: any) { console.log(error); return null; } }; export interface CreateRunResponseWithQueue extends CreateRunResponse { queued?: boolean; } export const createAndRunRecording = async (id: string, settings: RunSettings): Promise => { try { const response = await axios.put( `${apiUrl}/storage/runs/${id}`, { ...settings, withCredentials: true } ); if (response.status === 200) { return response.data; } else { throw new Error(`Couldn't create and run recording ${id}`); } } catch (error: any) { console.log(error); return { browserId: '', runId: '', robotMetaId: '', queued: false }; } } export const createRunForStoredRecording = async (id: string, settings: RunSettings): Promise => { try { const response = await axios.put( `${apiUrl}/storage/runs/${id}`, { ...settings }); if (response.status === 200) { return response.data; } else { throw new Error(`Couldn't create a run for a recording ${id}`); } } catch (error: any) { console.log(error); return { browserId: '', runId: '', robotMetaId: '' }; } } export const interpretStoredRecording = async (id: string): Promise => { try { const response = await axios.post(`${apiUrl}/storage/runs/run/${id}`); if (response.status === 200) { return response.data; } else { throw new Error(`Couldn't run a recording ${id}`); } } catch (error: any) { console.log(error); return false; } } export const notifyAboutAbort = async (id: string): Promise<{ success: boolean; isQueued?: boolean }> => { try { const response = await axios.post(`${apiUrl}/storage/runs/abort/${id}`, { withCredentials: true }); if (response.status === 200) { return { success: response.data.success, isQueued: response.data.isQueued }; } else { throw new Error(`Couldn't abort a running recording with id ${id}`); } } catch (error: any) { console.log(error); return { success: false }; } } export const scheduleStoredRecording = async (id: string, settings: ScheduleSettings): Promise => { try { const response = await axios.put( `${apiUrl}/storage/schedule/${id}`, { ...settings }); if (response.status === 200) { return response.data; } else { throw new Error(`Couldn't schedule recording ${id}. Please try again later.`); } } catch (error: any) { console.log(error); return { message: '', runId: '' }; } } export const getSchedule = async (id: string) => { try { const response = await axios.get(`${apiUrl}/storage/schedule/${id}`); if (response.status === 200) { return response.data.schedule; } else { throw new Error(`Couldn't retrieve schedule for recording ${id}`); } } catch (error: any) { console.log(error); return null; } } export const deleteSchedule = async (id: string): Promise => { try { const response = await axios.delete(`${apiUrl}/storage/schedule/${id}`); if (response.status === 200) { return response.data; } else { throw new Error(`Couldn't delete schedule for recording ${id}`); } } catch (error: any) { console.log(error); return false; } } export const createCrawlRobot = async ( url: string, name: string, crawlConfig: { mode: 'domain' | 'subdomain' | 'path'; limit: number; maxDepth: number; includePaths: string[]; excludePaths: string[]; useSitemap: boolean; followLinks: boolean; respectRobots: boolean; } ): Promise => { try { const response = await axios.post( `${apiUrl}/storage/recordings/crawl`, { url, name, crawlConfig, }, { headers: { 'Content-Type': 'application/json' }, withCredentials: true, } ); if (response.status === 201) { return response.data; } else { throw new Error('Failed to create crawl robot'); } } catch (error: any) { console.error('Error creating crawl robot:', error); return null; } }; export const createSearchRobot = async ( name: string, searchConfig: { query: string; limit: number; provider: 'google' | 'bing' | 'duckduckgo'; filters?: { timeRange?: 'day' | 'week' | 'month' | 'year'; location?: string; lang?: string; }; mode: 'discover' | 'scrape'; } ): Promise => { try { const response = await axios.post( `${apiUrl}/storage/recordings/search`, { name, searchConfig, }, { headers: { 'Content-Type': 'application/json' }, withCredentials: true, } ); if (response.status === 201) { return response.data; } else { throw new Error('Failed to create search robot'); } } catch (error: any) { console.error('Error creating search robot:', error); return null; } }; ================================================ FILE: src/api/webhook.ts ================================================ import { default as axios } from "axios"; import { apiUrl } from "../apiConfig"; export interface WebhookConfig { id: string; url: string; events: string[]; active: boolean; createdAt?: string; updatedAt?: string; lastCalledAt?: string | null; retryAttempts?: number; retryDelay?: number; timeout?: number; } export interface WebhookResponse { ok: boolean; message?: string; webhook?: WebhookConfig; webhooks?: WebhookConfig[]; error?: string; details?: any; } export const addWebhook = async (webhook: WebhookConfig, robotId: string): Promise => { try { const response = await axios.post(`${apiUrl}/webhook/add`, { webhook, robotId }, { withCredentials: true }); if (response.status === 200) { return response.data; } else { throw new Error(`Failed to add webhook. Status code: ${response.status}`); } } catch (error: any) { console.error('Error adding webhook:', error.message || error); return { ok: false, error: error.response?.data?.message || error.message || 'Failed to add webhook' }; } }; export const updateWebhook = async (webhook: WebhookConfig, robotId: string): Promise => { try { const response = await axios.post(`${apiUrl}/webhook/update`, { webhook, robotId }, { withCredentials: true }); if (response.status === 200) { return response.data; } else { throw new Error(`Failed to update webhook. Status code: ${response.status}`); } } catch (error: any) { console.error('Error updating webhook:', error.message || error); return { ok: false, error: error.response?.data?.message || error.message || 'Failed to update webhook' }; } }; export const removeWebhook = async (webhookId: string, robotId: string): Promise => { try { const response = await axios.post(`${apiUrl}/webhook/remove`, { webhookId, robotId }, { withCredentials: true }); if (response.status === 200) { return response.data; } else { throw new Error(`Failed to remove webhook. Status code: ${response.status}`); } } catch (error: any) { console.error('Error removing webhook:', error.message || error); return { ok: false, error: error.response?.data?.message || error.message || 'Failed to remove webhook' }; } }; export const getWebhooks = async (robotId: string): Promise => { try { const response = await axios.get(`${apiUrl}/webhook/list/${robotId}`, { withCredentials: true }); if (response.status === 200) { return response.data; } else { throw new Error(`Failed to fetch webhooks. Status code: ${response.status}`); } } catch (error: any) { console.error('Error fetching webhooks:', error.message || error); return { ok: false, error: error.response?.data?.message || error.message || 'Failed to fetch webhooks', webhooks: [] }; } }; export const testWebhook = async (webhook: WebhookConfig, robotId: string): Promise => { try { const response = await axios.post(`${apiUrl}/webhook/test`, { webhook, robotId }, { withCredentials: true }); if (response.status === 200) { return response.data; } else { throw new Error(`Failed to test webhook. Status code: ${response.status}`); } } catch (error: any) { console.error('Error testing webhook:', error.message || error); return { ok: false, error: error.response?.data?.message || error.message || 'Failed to test webhook' }; } }; export const clearAllWebhooks = async (robotId: string): Promise => { try { const response = await axios.delete(`${apiUrl}/webhook/clear/${robotId}`, { withCredentials: true }); if (response.status === 200) { return response.data; } else { throw new Error(`Failed to clear webhooks. Status code: ${response.status}`); } } catch (error: any) { console.error('Error clearing webhooks:', error.message || error); return { ok: false, error: error.response?.data?.message || error.message || 'Failed to clear webhooks' }; } }; ================================================ FILE: src/api/workflow.ts ================================================ import { WhereWhatPair, WorkflowFile } from "maxun-core"; import { emptyWorkflow } from "../shared/constants"; import { default as axios } from "axios"; import { apiUrl } from "../apiConfig"; export const getActiveWorkflow = async (id: string): Promise => { try { const response = await axios.get(`${apiUrl}/workflow/${id}`) if (response.status === 200) { return response.data; } else { throw new Error('Something went wrong when fetching a recorded workflow'); } } catch (error: any) { console.log(error); return emptyWorkflow; } }; export const getParamsOfActiveWorkflow = async (id: string): Promise => { try { const response = await axios.get(`${apiUrl}/workflow/params/${id}`) if (response.status === 200) { return response.data; } else { throw new Error('Something went wrong when fetching the parameters of the recorded workflow'); } } catch (error: any) { console.log(error); return null; } }; export const deletePair = async (index: number): Promise => { try { const response = await axios.delete(`${apiUrl}/workflow/pair/${index}`); if (response.status === 200) { return response.data; } else { throw new Error('Something went wrong when fetching an updated workflow'); } } catch (error: any) { console.log(error); return emptyWorkflow; } }; export const AddPair = async (index: number, pair: WhereWhatPair): Promise => { try { const response = await axios.post(`${apiUrl}/workflow/pair/${index}`, { pair, }, { headers: { 'Content-Type': 'application/json' } }); if (response.status === 200) { return response.data; } else { throw new Error('Something went wrong when fetching an updated workflow'); } } catch (error: any) { console.log(error); return emptyWorkflow; } }; export const UpdatePair = async (index: number, pair: WhereWhatPair): Promise => { try { const response = await axios.put(`${apiUrl}/workflow/pair/${index}`, { pair, }, { headers: { 'Content-Type': 'application/json' } }); if (response.status === 200) { return response.data; } else { throw new Error('Something went wrong when fetching an updated workflow'); } } catch (error: any) { console.log(error); return emptyWorkflow; } }; ================================================ FILE: src/apiConfig.js ================================================ export const apiUrl = import.meta.env.VITE_BACKEND_URL ? import.meta.env.VITE_BACKEND_URL : 'http://localhost:8080' ================================================ FILE: src/components/action/ActionDescriptionBox.tsx ================================================ import React from 'react'; import styled from 'styled-components'; import { Typography, FormControlLabel, Checkbox, Box } from '@mui/material'; import { useActionContext } from '../../context/browserActions'; import MaxunLogo from "../../assets/maxunlogo.png"; import { useTranslation } from 'react-i18next'; interface CustomBoxContainerProps { isDarkMode: boolean; } const CustomBoxContainer = styled.div` position: relative; min-width: 250px; width: auto; min-height: 100px; height: auto; border-radius: 5px; background-color: ${({ isDarkMode }) => (isDarkMode ? '#1d1c1cff' : 'white')}; color: ${({ isDarkMode }) => (isDarkMode ? 'white' : 'black')}; margin: 80px 13px 25px 13px; box-shadow: 0px 4px 10px rgba(0, 0, 0, 0.1); `; const Triangle = styled.div` position: absolute; top: -15px; left: 50%; transform: translateX(-50%); width: 0; height: 0; border-left: 20px solid transparent; border-right: 20px solid transparent; border-bottom: 20px solid ${({ isDarkMode }) => (isDarkMode ? '#1d1c1cff' : 'white')}; `; const Logo = styled.img` position: absolute; top: -80px; left: 50%; transform: translateX(-50%); width: 70px; height: auto; border-radius: 5px; `; const Content = styled.div` padding: 20px; text-align: left; `; const ActionDescriptionBox = ({ isDarkMode }: { isDarkMode: boolean }) => { const { t } = useTranslation(); const { getText, getScreenshot, getList, captureStage } = useActionContext() as { getText: boolean; getScreenshot: boolean; getList: boolean; captureStage: 'initial' | 'pagination' | 'limit' | 'complete'; }; const messages = [ { stage: 'initial' as const, text: t('action_description.list_stages.initial') }, { stage: 'pagination' as const, text: t('action_description.list_stages.pagination') }, { stage: 'limit' as const, text: t('action_description.list_stages.limit') }, { stage: 'complete' as const, text: t('action_description.list_stages.complete') }, ]; const stages = messages.map(({ stage }) => stage); const currentStageIndex = stages.indexOf(captureStage); const renderActionDescription = () => { if (getText) { return ( <> {t('action_description.text.title')} {t('action_description.text.description')} ); } else if (getScreenshot) { return ( <> {t('action_description.screenshot.title')} {t('action_description.screenshot.description')} ); } else if (getList) { return ( <> {t('action_description.list.title')} {t('action_description.list.description')} {messages.map(({ stage, text }, index) => ( } label={ {text} } /> ))} ); } else { return ( <> {t('action_description.default.title')} {t('action_description.default.description')} ); } }; return ( {renderActionDescription()} ); }; export default ActionDescriptionBox; ================================================ FILE: src/components/action/ActionSettings.tsx ================================================ import React, { useRef } from 'react'; import styled from "styled-components"; import { Button } from "@mui/material"; import * as Settings from "./action-settings"; import { useSocketStore } from "../../context/socket"; interface ActionSettingsProps { action: string; darkMode?: boolean; } export const ActionSettings = ({ action, darkMode = false }: ActionSettingsProps) => { const settingsRef = useRef<{ getSettings: () => object }>(null); const { socket } = useSocketStore(); const DisplaySettings = () => { switch (action) { case "screenshot": return ; case 'scroll': return ; case 'scrape': return ; case 'scrapeSchema': return ; default: return null; } }; const handleSubmit = (event: React.SyntheticEvent) => { event.preventDefault(); const settings = settingsRef.current?.getSettings(); socket?.emit(`action`, { action, settings }); }; return (
); }; // Ensure that the Wrapper accepts the darkMode prop for styling adjustments. const ActionSettingsWrapper = styled.div<{ action: string; darkMode: boolean }>` display: flex; flex-direction: column; align-items: ${({ action }) => (action === 'script' ? 'stretch' : 'center')}; justify-content: center; margin-top: 20px; background-color: ${({ darkMode }) => (darkMode ? '#1E1E1E' : 'white')}; color: ${({ darkMode }) => (darkMode ? 'white' : 'black')}; `; ================================================ FILE: src/components/action/action-settings/Scrape.tsx ================================================ import React, { forwardRef, useImperativeHandle } from 'react'; import { Stack, TextField } from "@mui/material"; import { WarningText } from '../../ui/texts'; import InfoIcon from "@mui/icons-material/Info"; export const ScrapeSettings = forwardRef((props, ref) => { const [settings, setSettings] = React.useState(''); useImperativeHandle(ref, () => ({ getSettings() { return settings; } })); return ( setSettings(e.target.value)} /> The scrape function uses heuristic algorithm to automatically scrape only important data from the page. If a selector is used it will scrape and automatically parse all available data inside of the selected element(s). ); }); ================================================ FILE: src/components/action/action-settings/ScrapeSchema.tsx ================================================ import React, { forwardRef, useImperativeHandle, useRef } from 'react'; import { WarningText } from "../../ui/texts"; import InfoIcon from "@mui/icons-material/Info"; import { KeyValueForm } from "../../recorder/KeyValueForm"; export const ScrapeSchemaSettings = forwardRef((props, ref) => { const keyValueFormRef = useRef<{ getObject: () => object }>(null); useImperativeHandle(ref, () => ({ getSettings() { const settings = keyValueFormRef.current?.getObject() as Record return settings; } })); return (
The interpreter scrapes the data from a webpage into a "curated" table.
); }); ================================================ FILE: src/components/action/action-settings/Screenshot.tsx ================================================ import React, { forwardRef, useImperativeHandle } from 'react'; import { MenuItem, TextField } from "@mui/material"; import { ScreenshotSettings as Settings } from "../../../shared/types"; import styled from "styled-components"; import { SelectChangeEvent } from "@mui/material/Select/Select"; import { Dropdown } from "../../ui/DropdownMui"; export const ScreenshotSettings = forwardRef((props, ref) => { const [settings, setSettings] = React.useState({}); useImperativeHandle(ref, () => ({ getSettings() { return settings; } })); const handleInput = (event: React.ChangeEvent) => { const { id, value, type } = event.target; let parsedValue: any = value; if (type === "number") { parsedValue = parseInt(value); }; setSettings({ ...settings, [id]: parsedValue, }); }; const handleSelect = (event: SelectChangeEvent) => { const { name, value } = event.target; let parsedValue: any = value; if (value === "true" || value === "false") { parsedValue = value === "true"; }; setSettings({ ...settings, [name]: parsedValue, }); }; return ( jpeg png {settings.type === "jpeg" ? : null } disabled allow {settings.type === "png" ? true false : null } hide initial true false css device ); }); const SettingsWrapper = styled.div` margin-left: 15px; * { margin-bottom: 10px; } `; ================================================ FILE: src/components/action/action-settings/Scroll.tsx ================================================ import React, { forwardRef, useImperativeHandle } from 'react'; import { TextField } from "@mui/material"; export const ScrollSettings = forwardRef((props, ref) => { const [settings, setSettings] = React.useState(0); useImperativeHandle(ref, () => ({ getSettings() { return settings; } })); return ( setSettings(parseInt(e.target.value))} /> ); }); ================================================ FILE: src/components/action/action-settings/index.ts ================================================ import { ScrollSettings } from './Scroll'; import { ScreenshotSettings } from "./Screenshot"; import { ScrapeSettings } from "./Scrape"; import { ScrapeSchemaSettings } from "./ScrapeSchema"; export { ScrollSettings, ScreenshotSettings, ScrapeSettings, ScrapeSchemaSettings, }; ================================================ FILE: src/components/api/ApiKey.tsx ================================================ import React, { useState, useEffect } from 'react'; import { Box, Button, Typography, IconButton, CircularProgress, Table, TableBody, TableCell, TableContainer, TableHead, TableRow, Tooltip, Paper, Dialog, DialogTitle, DialogContent, DialogContentText, DialogActions, } from '@mui/material'; import { ContentCopy, Visibility, VisibilityOff, Delete } from '@mui/icons-material'; import styled from 'styled-components'; import axios from 'axios'; import { useGlobalInfoStore } from '../../context/globalInfo'; import { apiUrl } from '../../apiConfig'; import { useTranslation } from 'react-i18next'; const Container = styled(Box)` display: flex; flex-direction: column; align-items: center; margin-top: 50px; margin-left: 70px; margin-right: 70px; `; const ApiKeyManager = () => { const { t } = useTranslation(); const [apiKey, setApiKey] = useState(null); const [apiKeyName, setApiKeyName] = useState(t('apikey.default_name')); const [apiKeyCreatedAt, setApiKeyCreatedAt] = useState(null); const [loading, setLoading] = useState(true); const [showKey, setShowKey] = useState(false); const [copySuccess, setCopySuccess] = useState(false); const [confirmDeleteOpen, setConfirmDeleteOpen] = useState(false); const { notify } = useGlobalInfoStore(); useEffect(() => { const fetchApiKey = async () => { try { const { data } = await axios.get(`${apiUrl}/auth/api-key`); setApiKey(data.api_key); setApiKeyCreatedAt(data.api_key_created_at); } catch (error: any) { notify('error', t('apikey.notifications.fetch_error', { error: error.message })); } finally { setLoading(false); } }; fetchApiKey(); }, []); const generateApiKey = async () => { setLoading(true); try { const { data } = await axios.post(`${apiUrl}/auth/generate-api-key`); setApiKey(data.api_key); setApiKeyCreatedAt(data.api_key_created_at); notify('success', t('apikey.notifications.generate_success')); } catch (error: any) { notify('error', t('apikey.notifications.generate_error', { error: error.message })); } finally { setLoading(false); } }; const deleteApiKey = async () => { setLoading(true); try { await axios.delete(`${apiUrl}/auth/delete-api-key`); setApiKey(null); setApiKeyCreatedAt(null); notify('success', t('apikey.notifications.delete_success')); } catch (error: any) { notify('error', t('apikey.notifications.delete_error', { error: error.message })); } finally { setLoading(false); setConfirmDeleteOpen(false); } }; const copyToClipboard = () => { if (apiKey) { navigator.clipboard.writeText(apiKey); setCopySuccess(true); setTimeout(() => setCopySuccess(false), 2000); notify('info', t('apikey.notifications.copy_success')); } }; const handleDeleteClick = () => { setConfirmDeleteOpen(true); }; const handleDeleteCancel = () => { setConfirmDeleteOpen(false); }; const handleDeleteConfirm = () => { setConfirmDeleteOpen(false); deleteApiKey(); }; if (loading) { return ( ); } return ( Start by creating an API key below. Then, test your API or read the{' '} API documentation {' '} for setup instructions. {t('apikey.title')} {apiKey ? ( {t('apikey.table.name')} {t('apikey.table.key')} {apiKeyCreatedAt && Created On} {t('apikey.table.actions')} {apiKeyName} {showKey ? `${apiKey?.substring(0, 10)}...` : '**********'} {apiKeyCreatedAt && ( {new Date(apiKeyCreatedAt).toLocaleDateString('en-US', { month: 'short', day: 'numeric', year: 'numeric', })} )} setShowKey(!showKey)}> {showKey ? : }
) : ( <> {t('apikey.no_key_message')} )} Delete API Key Are you sure you want to delete this API key? This action cannot be undone and will immediately invalidate the key.
); }; export default ApiKeyManager; ================================================ FILE: src/components/browser/BrowserContent.tsx ================================================ import React, { useCallback, useEffect, useState } from "react"; import styled from "styled-components"; import BrowserNavBar from "./BrowserNavBar"; import { BrowserWindow } from "./BrowserWindow"; import { useBrowserDimensionsStore } from "../../context/browserDimensions"; import { BrowserTabs } from "./BrowserTabs"; import { useSocketStore } from "../../context/socket"; import { getCurrentTabs, } from "../../api/recording"; // TODO: Tab !show currentUrl after recordingUrl global state export const BrowserContent = () => { const { socket } = useSocketStore(); const [tabs, setTabs] = useState(["Loading..."]); const [tabIndex, setTabIndex] = React.useState(0); const [showOutputData, setShowOutputData] = useState(false); const { browserWidth } = useBrowserDimensionsStore(); const handleChangeIndex = useCallback( (index: number) => { setTabIndex(index); }, [tabIndex] ); const handleCloseTab = useCallback( (index: number) => { // the tab needs to be closed on the backend socket?.emit("closeTab", { index, isCurrent: tabIndex === index, }); // change the current index as current tab gets closed if (tabIndex === index) { if (tabs.length > index + 1) { handleChangeIndex(index); } else { handleChangeIndex(index - 1); } } else { handleChangeIndex(tabIndex - 1); } // update client tabs setTabs((prevState) => [ ...prevState.slice(0, index), ...prevState.slice(index + 1), ]); }, [tabs, socket, tabIndex] ); const handleAddNewTab = useCallback(() => { // Adds new tab by pressing the plus button socket?.emit("addTab"); // Adds a new tab to the end of the tabs array and shifts focus setTabs((prevState) => [...prevState, "new tab"]); handleChangeIndex(tabs.length); }, [socket, tabs]); const handleNewTab = useCallback( (tab: string) => { // Adds a new tab to the end of the tabs array and shifts focus setTabs((prevState) => [...prevState, tab]); // changes focus on the new tab - same happens in the remote browser handleChangeIndex(tabs.length); handleTabChange(tabs.length); }, [tabs] ); const handleTabChange = useCallback( (index: number) => { // page screencast and focus needs to be changed on backend socket?.emit("changeTab", index); }, [socket] ); const handleUrlChanged = useCallback((url: string) => { const parsedUrl = new URL(url); if (parsedUrl.hostname) { const host = parsedUrl.hostname .match(/\b(?!www\.)[a-zA-Z0-9]+/g) ?.join("."); if (host && host !== tabs[tabIndex]) { setTabs((prevState) => [ ...prevState.slice(0, tabIndex), host, ...prevState.slice(tabIndex + 1), ]); } } else { if (tabs[tabIndex] !== "new tab") { setTabs((prevState) => [ ...prevState.slice(0, tabIndex), "new tab", ...prevState.slice(tabIndex + 1), ]); } } }, [tabs, tabIndex]); const tabHasBeenClosedHandler = useCallback( (index: number) => { handleCloseTab(index); }, [handleCloseTab] ); useEffect(() => { if (socket) { socket.on("newTab", handleNewTab); socket.on("tabHasBeenClosed", tabHasBeenClosedHandler); } return () => { if (socket) { socket.off("newTab", handleNewTab); socket.off("tabHasBeenClosed", tabHasBeenClosedHandler); } }; }, [socket, handleNewTab]); useEffect(() => { getCurrentTabs() .then((response) => { if (response && response.length > 0) { setTabs(response); } }) .catch((error) => { console.log(`Fetching current url failed: ${error}`); }); }, []); return (
); }; const BrowserContentWrapper = styled.div``; ================================================ FILE: src/components/browser/BrowserNavBar.tsx ================================================ import type { FC } from 'react'; import styled from 'styled-components'; import ReplayIcon from '@mui/icons-material/Replay'; import ArrowBackIcon from '@mui/icons-material/ArrowBack'; import ArrowForwardIcon from '@mui/icons-material/ArrowForward'; import { NavBarButton } from '../ui/buttons/Buttons'; import { UrlForm } from './UrlForm'; import { useCallback, useEffect } from "react"; import { useSocketStore } from "../../context/socket"; import { getCurrentUrl } from "../../api/recording"; import { useGlobalInfoStore } from '../../context/globalInfo'; import { useThemeMode } from '../../context/theme-provider'; const StyledNavBar = styled.div<{ browserWidth: number; isDarkMode: boolean }>` display: flex; padding: 12px 0px; background-color: ${({ isDarkMode }) => (isDarkMode ? '#1d1c1cff' : '#f6f6f6')}; width: ${({ browserWidth }) => browserWidth}px; border-radius: 0px 5px 0px 0px; `; const IconButton = styled(NavBarButton) <{ mode: string }>` background-color: ${({ mode }) => (mode === 'dark' ? '#1d1c1cff' : '#f6f6f6')}; transition: background-color 0.3s ease, transform 0.1s ease; color: ${({ mode }) => (mode === 'dark' ? '#FFFFFF' : '#333')}; cursor: pointer; &:hover { background-color: ${({ mode }) => (mode === 'dark' ? '#1d1c1cff' : '#D0D0D0')}; } `; interface NavBarProps { browserWidth: number; handleUrlChanged: (url: string) => void; }; const BrowserNavBar: FC = ({ browserWidth, handleUrlChanged, }) => { const isDarkMode = useThemeMode().darkMode; const { socket } = useSocketStore(); const { recordingUrl, setRecordingUrl } = useGlobalInfoStore(); const handleRefresh = useCallback((): void => { socket?.emit('input:refresh'); }, [socket]); const handleGoTo = useCallback((address: string): void => { socket?.emit('input:url', address); }, [socket]); const handleCurrentUrlChange = useCallback((data: { url: string, userId: string }) => { handleUrlChanged(data.url); setRecordingUrl(data.url); window.sessionStorage.setItem('recordingUrl', data.url); }, [handleUrlChanged, recordingUrl]); useEffect(() => { getCurrentUrl().then((response) => { if (response) { handleUrlChanged(response); } }).catch((error) => { console.log(`Fetching current url failed: ${error}`); }) }, []); useEffect(() => { if (socket) { socket.on('urlChanged', handleCurrentUrlChange); } return () => { if (socket) { socket.off('urlChanged', handleCurrentUrlChange); } } }, [socket, handleCurrentUrlChange]); const addAddress = (address: string) => { if (socket) { handleUrlChanged(address); setRecordingUrl(address); handleGoTo(address); } }; return ( { socket?.emit('input:back'); }} disabled={false} mode={isDarkMode ? 'dark' : 'light'} > { socket?.emit('input:forward'); }} disabled={false} mode={isDarkMode ? 'dark' : 'light'} > { if (socket) { handleRefresh(); } }} disabled={false} mode={isDarkMode ? 'dark' : 'light'} > ); } export default BrowserNavBar; ================================================ FILE: src/components/browser/BrowserRecordingSave.tsx ================================================ import React, { useState } from 'react' import { Grid, Button, Box, Typography, IconButton, Menu, MenuItem, ListItemText } from '@mui/material'; import { SaveRecording } from "../recorder/SaveRecording"; import { useGlobalInfoStore } from '../../context/globalInfo'; import { useActionContext } from '../../context/browserActions'; import { useBrowserSteps } from '../../context/browserSteps'; import { stopRecording } from "../../api/recording"; import { GenericModal } from "../ui/GenericModal"; import { useTranslation } from 'react-i18next'; import { emptyWorkflow } from '../../shared/constants'; import { useSocketStore } from '../../context/socket'; import { MoreHoriz } from '@mui/icons-material'; const BrowserRecordingSave = () => { const { t } = useTranslation(); const [openDiscardModal, setOpenDiscardModal] = useState(false); const [openResetModal, setOpenResetModal] = useState(false); const [anchorEl, setAnchorEl] = React.useState(null); const { recordingName, browserId, initialUrl, setRecordingUrl, setBrowserId, notify, setCurrentWorkflowActionsState, resetInterpretationLog } = useGlobalInfoStore(); const { socket } = useSocketStore(); const { stopGetText, stopGetList, stopGetScreenshot, stopPaginationMode, stopLimitMode, setCaptureStage, updatePaginationType, updateLimitType, updateCustomLimit, setShowLimitOptions, setShowPaginationOptions, setWorkflow, } = useActionContext(); const { browserSteps, deleteBrowserStep } = useBrowserSteps(); const goToMainMenu = async () => { if (browserId) { const notificationData = { type: 'warning', message: t('browser_recording.notifications.terminated'), timestamp: Date.now() }; window.sessionStorage.setItem('pendingNotification', JSON.stringify(notificationData)); if (window.opener) { window.opener.postMessage({ type: 'recording-notification', notification: notificationData }, '*'); window.opener.postMessage({ type: 'session-data-clear', timestamp: Date.now() }, '*'); } setBrowserId(null); window.close(); stopRecording(browserId).catch((error) => { console.warn('Background cleanup failed:', error); }); } }; const performReset = () => { stopGetText(); stopGetList(); stopGetScreenshot(); stopPaginationMode(); stopLimitMode(); setShowLimitOptions(false); setShowPaginationOptions(false); setCaptureStage('initial'); updatePaginationType(''); updateLimitType(''); updateCustomLimit(''); setCurrentWorkflowActionsState({ hasScrapeListAction: false, hasScreenshotAction: false, hasScrapeSchemaAction: false }); setWorkflow(emptyWorkflow); resetInterpretationLog(); // Clear all browser steps browserSteps.forEach(step => { deleteBrowserStep(step.id); }); if (socket) { socket?.emit('new-recording'); socket.emit('input:url', initialUrl); // Update the URL in the navbar to match let sessionInitialUrl = window.sessionStorage.getItem('initialUrl'); if (sessionInitialUrl) { setRecordingUrl(sessionInitialUrl); window.sessionStorage.setItem('recordingUrl', sessionInitialUrl); } else { setRecordingUrl(initialUrl); } } // Close the reset confirmation modal setOpenResetModal(false); // Notify user notify('info', t('browser_recording.notifications.environment_reset')); }; const handleClick = (event: any) => { setAnchorEl(event.currentTarget); }; const handleClose = () => { setAnchorEl(null); }; return (
{ setOpenResetModal(true); handleClose(); }}> {t('right_panel.buttons.reset')} { window.open('https://docs.maxun.dev', '_blank'); }}> Documentation setOpenDiscardModal(false)} modalStyle={modalStyle}> {t('browser_recording.modal.confirm_discard')} setOpenResetModal(false)} modalStyle={modalStyle}> {t('browser_recording.modal.confirm_reset')} {t('browser_recording.modal.reset_warning')}
); }; export default BrowserRecordingSave; const modalStyle = { top: '25%', left: '50%', transform: 'translate(-50%, -50%)', width: '30%', backgroundColor: 'background.paper', p: 4, height: 'fit-content', display: 'block', padding: '20px', }; ================================================ FILE: src/components/browser/BrowserTabs.tsx ================================================ import * as React from 'react'; import { Box, IconButton, Tab, Tabs } from "@mui/material"; import { Close } from "@mui/icons-material"; import { useThemeMode } from '../../context/theme-provider'; interface BrowserTabsProp { tabs: string[], handleTabChange: (index: number) => void, handleAddNewTab: () => void, handleCloseTab: (index: number) => void, handleChangeIndex: (index: number) => void; tabIndex: number } export const BrowserTabs = ( { tabs, handleTabChange, handleAddNewTab, handleCloseTab, handleChangeIndex, tabIndex }: BrowserTabsProp) => { let tabWasClosed = false; const handleChange = (event: React.SyntheticEvent, newValue: number) => { if (!tabWasClosed) { handleChangeIndex(newValue); } }; const isDarkMode = useThemeMode().darkMode; return ( {tabs.map((tab, index) => { return ( { tabWasClosed = true; handleCloseTab(index); }} disabled={tabs.length === 1} />} iconPosition="end" onClick={() => { if (!tabWasClosed) { handleTabChange(index) } }} label={tab} /> ); })} {/* */} ); } interface CloseButtonProps { closeTab: () => void; disabled: boolean; } const CloseButton = ({ closeTab, disabled }: CloseButtonProps) => { return ( ); } ================================================ FILE: src/components/browser/BrowserWindow.tsx ================================================ import React, { useCallback, useContext, useEffect, useState } from 'react'; import { generateUUID } from '../../helpers/uuid'; import { useSocketStore } from '../../context/socket'; import { Button } from '@mui/material'; import { GenericModal } from '../ui/GenericModal'; import { useActionContext } from '../../context/browserActions'; import { useBrowserSteps, TextStep, ListStep } from '../../context/browserSteps'; import { useGlobalInfoStore } from '../../context/globalInfo'; import { useTranslation } from 'react-i18next'; import { AuthContext } from '../../context/auth'; import { useBrowserDimensionsStore } from '../../context/browserDimensions'; import { clientSelectorGenerator, ElementFingerprint } from "../../helpers/clientSelectorGenerator"; import { capturedElementHighlighter } from "../../helpers/capturedElementHighlighter"; import DatePicker from "../pickers/DatePicker"; import Dropdown from "../pickers/Dropdown"; import TimePicker from "../pickers/TimePicker"; import DateTimeLocalPicker from "../pickers/DateTimeLocalPicker"; import { DOMBrowserRenderer } from '../recorder/DOMBrowserRenderer'; interface ElementInfo { tagName: string; hasOnlyText?: boolean; isIframeContent?: boolean; isShadowRoot?: boolean; innerText?: string; url?: string; imageUrl?: string; attributes?: Record; innerHTML?: string; outerHTML?: string; isDOMMode?: boolean; } interface AttributeOption { label: string; value: string; } const getAttributeOptions = (tagName: string, elementInfo: ElementInfo | null): AttributeOption[] => { if (!elementInfo) return []; switch (tagName.toLowerCase()) { case 'a': const anchorOptions: AttributeOption[] = []; if (elementInfo.innerText) { anchorOptions.push({ label: `Text: ${elementInfo.innerText}`, value: 'innerText' }); } if (elementInfo.url) { anchorOptions.push({ label: `URL: ${elementInfo.url}`, value: 'href' }); } return anchorOptions; case 'img': const imgOptions: AttributeOption[] = []; if (elementInfo.innerText) { imgOptions.push({ label: `Alt Text: ${elementInfo.innerText}`, value: 'alt' }); } if (elementInfo.imageUrl) { imgOptions.push({ label: `Image URL: ${elementInfo.imageUrl}`, value: 'src' }); } return imgOptions; default: return [{ label: `Text: ${elementInfo.innerText}`, value: 'innerText' }]; } }; export const BrowserWindow = () => { const { t } = useTranslation(); const { browserWidth, browserHeight } = useBrowserDimensionsStore(); const [highlighterData, setHighlighterData] = useState<{ rect: DOMRect; selector: string; elementInfo: ElementInfo | null; isShadow?: boolean; childSelectors?: string[]; groupElements?: Array<{ element: HTMLElement; rect: DOMRect }>; similarElements?: { elements: HTMLElement[]; rects: DOMRect[]; }; } | null>(null); const [showAttributeModal, setShowAttributeModal] = useState(false); const [attributeOptions, setAttributeOptions] = useState([]); const [selectedElement, setSelectedElement] = useState<{ selector: string, info: ElementInfo | null } | null>(null); const [currentListId, setCurrentListId] = useState(null); const [cachedChildSelectors, setCachedChildSelectors] = useState([]); const [processingGroupCoordinates, setProcessingGroupCoordinates] = useState>([]); const [listSelector, setListSelector] = useState(null); const [fields, setFields] = useState>({}); const [paginationSelector, setPaginationSelector] = useState(''); const [isCachingChildSelectors, setIsCachingChildSelectors] = useState(false); const [cachedListSelector, setCachedListSelector] = useState( null ); const [pendingNotification, setPendingNotification] = useState<{ type: "error" | "warning" | "info" | "success"; message: string; count?: number; } | null>(null); const [initialAutoFieldIds, setInitialAutoFieldIds] = useState>(new Set()); const [manuallyAddedFieldIds, setManuallyAddedFieldIds] = useState>(new Set()); const { socket } = useSocketStore(); const { notify, currentTextActionId, currentListActionId, updateDOMMode, isDOMMode } = useGlobalInfoStore(); const { getText, getList, paginationMode, paginationType, limitMode, captureStage } = useActionContext(); const { addTextStep, addListStep, browserSteps } = useBrowserSteps(); const [currentGroupInfo, setCurrentGroupInfo] = useState<{ isGroupElement: boolean; groupSize: number; groupElements: HTMLElement[]; } | null>(null); const { state } = useContext(AuthContext); const { user } = state; const [datePickerInfo, setDatePickerInfo] = useState<{ coordinates: { x: number; y: number }; selector: string; } | null>(null); const [dropdownInfo, setDropdownInfo] = useState<{ coordinates: { x: number; y: number }; selector: string; options: Array<{ value: string; text: string; disabled: boolean; selected: boolean; }>; } | null>(null); const [timePickerInfo, setTimePickerInfo] = useState<{ coordinates: { x: number; y: number }; selector: string; } | null>(null); const [dateTimeLocalInfo, setDateTimeLocalInfo] = useState<{ coordinates: { x: number; y: number }; selector: string; } | null>(null); const dimensions = { width: browserWidth, height: browserHeight }; const handleShowDatePicker = useCallback( (info: { coordinates: { x: number; y: number }; selector: string }) => { setDatePickerInfo(info); }, [] ); const handleShowDropdown = useCallback( (info: { coordinates: { x: number; y: number }; selector: string; options: Array<{ value: string; text: string; disabled: boolean; selected: boolean; }>; }) => { setDropdownInfo(info); }, [] ); const handleShowTimePicker = useCallback( (info: { coordinates: { x: number; y: number }; selector: string }) => { setTimePickerInfo(info); }, [] ); const handleShowDateTimePicker = useCallback( (info: { coordinates: { x: number; y: number }; selector: string }) => { setDateTimeLocalInfo(info); }, [] ); const domModeHandler = useCallback( (data: any) => { if (!data.userId || data.userId === user?.id) { updateDOMMode(true); socket?.emit("dom-mode-enabled"); } }, [user?.id, socket, updateDOMMode] ); const domModeErrorHandler = useCallback( (data: any) => { if (!data.userId || data.userId === user?.id) { updateDOMMode(false); if (data.error) { notify("error", data.error); } } }, [user?.id, updateDOMMode, notify] ); useEffect(() => { if (isDOMMode) { clientSelectorGenerator.setGetList(getList); clientSelectorGenerator.setListSelector(listSelector || ""); clientSelectorGenerator.setPaginationMode(paginationMode); } }, [isDOMMode, getList, listSelector, paginationMode]); const createFieldsFromChildSelectors = useCallback( (childSelectors: string[], listSelector: string) => { const iframeElement = document.querySelector( "#dom-browser-iframe" ) as HTMLIFrameElement; if (!iframeElement?.contentDocument) return {}; const candidateFields: Array<{ id: number; field: TextStep; element: HTMLElement; isLeaf: boolean; depth: number; position: { x: number; y: number }; }> = []; const uniqueChildSelectors = [...new Set(childSelectors)]; const evaluateXPathAllWithShadowSupport = ( document: Document, xpath: string, isShadow: boolean = false ): Element[] => { try { const result = document.evaluate( xpath, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ); const elements: Element[] = []; for (let i = 0; i < result.snapshotLength; i++) { const node = result.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { elements.push(node as Element); } } if (!isShadow || elements.length > 0) { return elements; } return elements; } catch (err) { console.error("XPath evaluation failed:", xpath, err); return []; } }; const isValidData = (text: string | null | undefined): boolean => { return !!text && text.trim().length > 0; }; const isElementVisible = (element: HTMLElement): boolean => { try { const rect = element.getBoundingClientRect(); return rect.width > 0 && rect.height > 0; } catch (error) { return false; } }; const createFieldData = (element: HTMLElement, selector: string, forceAttribute?: string) => { const tagName = element.tagName.toLowerCase(); let data = ''; let attribute = forceAttribute || 'innerText'; if (forceAttribute) { if (forceAttribute === 'href') { data = element.getAttribute('href') || ''; } else if (forceAttribute === 'innerText') { data = (element.textContent || '').trim(); } } else if (tagName === 'img') { data = element.getAttribute('src') || ''; attribute = 'src'; } else if (tagName === 'a') { const href = element.getAttribute('href') || ''; const text = (element.textContent || '').trim(); if (href && href !== '#' && !href.startsWith('javascript:')) { data = href; attribute = 'href'; } else if (text) { data = text; attribute = 'innerText'; } } else { data = (element.textContent || '').trim(); attribute = 'innerText'; } if (!data) return null; return { data, selectorObj: { selector, attribute, tag: tagName.toUpperCase(), isShadow: element.getRootNode() instanceof ShadowRoot } }; }; try { const listElements = evaluateXPathAllWithShadowSupport( iframeElement.contentDocument!, listSelector, listSelector.includes(">>") || listSelector.startsWith("//") ); if (listElements.length > 0) { const firstListElement = listElements[0] as HTMLElement; const listTagName = firstListElement.tagName.toLowerCase(); if (listTagName === 'a' && isElementVisible(firstListElement)) { const href = firstListElement.getAttribute('href'); if (href && href !== '#' && !href.startsWith('javascript:') && isValidData(href)) { const rect = firstListElement.getBoundingClientRect(); const fieldId = Date.now(); candidateFields.push({ id: fieldId, element: firstListElement, isLeaf: true, depth: 0, position: { x: rect.left, y: rect.top }, field: { id: fieldId, type: "text", label: "Label 1", data: href, selectorObj: { selector: listSelector, attribute: 'href', tag: 'A', isShadow: firstListElement.getRootNode() instanceof ShadowRoot } } }); } } } } catch (error) { console.warn('Failed to extract list container data:', error); } uniqueChildSelectors.forEach((selector, index) => { try { const listElements = evaluateXPathAllWithShadowSupport( iframeElement.contentDocument!, listSelector, listSelector.includes(">>") || listSelector.startsWith("//") ); if (listElements.length === 0) return; const hasNumericPredicate = /\[\d+\](?![^\[]*@)/.test(selector); if (hasNumericPredicate && listElements.length >= 3) { const allMatches = evaluateXPathAllWithShadowSupport( iframeElement.contentDocument!, selector, selector.includes(">>") || selector.startsWith("//") ); const matchRatio = allMatches.length / listElements.length; const isLinkOrImage = allMatches.length > 0 && (allMatches[0].tagName === 'A' || allMatches[0].tagName === 'IMG'); if (!isLinkOrImage && matchRatio < 0.6) { return; } } const firstListElement = listElements[0]; const elements = evaluateXPathAllWithShadowSupport( iframeElement.contentDocument!, selector, selector.includes(">>") || selector.startsWith("//") ).filter(el => firstListElement.contains(el as Node)); if (elements.length === 0) return; const element = elements[0] as HTMLElement; const tagName = element.tagName.toLowerCase(); const isShadow = element.getRootNode() instanceof ShadowRoot; if (isElementVisible(element)) { const rect = element.getBoundingClientRect(); const position = { x: rect.left, y: rect.top }; if (tagName === 'a') { const href = element.getAttribute('href'); const text = (element.textContent || '').trim(); if (text && isValidData(text)) { const textField = createFieldData(element, selector, 'innerText'); if (textField && textField.data) { const fieldId = Date.now() + index * 1000; candidateFields.push({ id: fieldId, element: element, isLeaf: true, depth: 0, position: position, field: { id: fieldId, type: "text", label: `Label ${index * 2 + 1}`, data: textField.data, selectorObj: textField.selectorObj } }); } } if (href && href !== '#' && !href.startsWith('javascript:')) { const hrefField = createFieldData(element, selector, 'href'); if (hrefField && hrefField.data) { const fieldId = Date.now() + index * 1000 + 1; candidateFields.push({ id: fieldId, element: element, isLeaf: true, depth: 0, position: position, field: { id: fieldId, type: "text", label: `Label ${index * 2 + 2}`, data: hrefField.data, selectorObj: hrefField.selectorObj } }); } } } else if (tagName === "img") { const src = element.getAttribute("src"); if (src && isValidData(src)) { const fieldId = Date.now() + index * 1000; candidateFields.push({ id: fieldId, element: element, isLeaf: true, depth: 0, position: position, field: { id: fieldId, type: "text", label: `Label ${index + 1}`, data: src, selectorObj: { selector: selector, tag: element.tagName, isShadow: isShadow, attribute: "src", }, }, }); } } else { const fieldData = createFieldData(element, selector); if (fieldData && fieldData.data && isValidData(fieldData.data)) { const fieldId = Date.now() + index * 1000; candidateFields.push({ id: fieldId, element: element, isLeaf: true, depth: 0, position: position, field: { id: fieldId, type: "text", label: `Label ${index + 1}`, data: fieldData.data, selectorObj: fieldData.selectorObj } }); } const anchorParent = element.closest('a'); if (anchorParent) { const isListContainer = listElements.some(listEl => listEl === anchorParent); if (!isListContainer) { const href = anchorParent.getAttribute('href'); if (href && href !== '#' && !href.startsWith('javascript:') && isValidData(href)) { let anchorSelector = selector; if (selector.includes('/a[')) { const anchorMatch = selector.match(/(.*\/a\[[^\]]+\])/); if (anchorMatch) { anchorSelector = anchorMatch[1]; } } const fieldId = Date.now() + index * 1000 + 500; candidateFields.push({ id: fieldId, element: anchorParent as HTMLElement, isLeaf: true, depth: 0, position: position, field: { id: fieldId, type: "text", label: `Label ${index + 1} Link`, data: href, selectorObj: { selector: anchorSelector, attribute: 'href', tag: 'A', isShadow: anchorParent.getRootNode() instanceof ShadowRoot } } }); } } } } } } catch (error) { console.warn(`Failed to process child selector ${selector}:`, error); } }); // Sort candidates by visual position (top-to-bottom, then left-to-right) candidateFields.sort((a, b) => { const yDiff = a.position.y - b.position.y; // If elements are roughly on the same horizontal line (within 5px tolerance) if (Math.abs(yDiff) <= 5) { return a.position.x - b.position.x; // Sort by x-position (left to right) } return yDiff; // Sort by y-position (top to bottom) }); const filteredCandidates = removeParentChildDuplicates(candidateFields); const cleanedCandidates = filteredCandidates.filter((candidate) => { const data = candidate.field.data.trim(); const isHrefField = candidate.field.selectorObj?.attribute === 'href'; if (isHrefField) { return true; } const textChildren = Array.from(candidate.element.children).filter(child => (child.textContent || '').trim().length > 0 ); if (textChildren.length === 0) { return true; } const childCandidates = filteredCandidates.filter((other) => { if (other === candidate) return false; return candidate.element.contains(other.element); }); if (childCandidates.length === 0) { return true; } let coveredLength = 0; childCandidates.forEach(child => { const childText = child.field.data.trim(); if (data.includes(childText)) { coveredLength += childText.length; } }); const coverageRatio = coveredLength / data.length; const hasMultipleChildTexts = childCandidates.length >= 2; const highCoverage = coverageRatio > 0.7; return !(hasMultipleChildTexts && highCoverage); }); const finalFields = removeDuplicateContent(cleanedCandidates); return finalFields; }, [] ); const removeParentChildDuplicates = ( candidates: Array<{ id: number; field: TextStep; element: HTMLElement; isLeaf: boolean; depth: number; position: { x: number; y: number }; }> ): Array<{ id: number; field: TextStep; element: HTMLElement; isLeaf: boolean; depth: number; position: { x: number; y: number }; }> => { const filtered: Array<{ id: number; field: TextStep; element: HTMLElement; isLeaf: boolean; depth: number; position: { x: number; y: number }; }> = []; for (const candidate of candidates) { const isAnchorWithHref = candidate.element.tagName.toLowerCase() === "a" && candidate.field.selectorObj?.attribute === 'href'; if (isAnchorWithHref) { filtered.push(candidate); continue; } let shouldInclude = true; for (const existing of filtered) { if (candidate.element.contains(existing.element)) { shouldInclude = false; break; } else if (existing.element.contains(candidate.element)) { const existingIsAnchorWithHref = existing.element.tagName.toLowerCase() === "a" && existing.field.selectorObj?.attribute === 'href'; if (!existingIsAnchorWithHref) { const existingIndex = filtered.indexOf(existing); filtered.splice(existingIndex, 1); } break; } } if (shouldInclude) { filtered.push(candidate); } } return filtered; }; const removeDuplicateContent = ( candidates: Array<{ id: number; field: TextStep; element: HTMLElement; isLeaf: boolean; depth: number; position: { x: number; y: number }; }> ): Record => { const finalFields: Record = {}; const seenContent = new Set(); let labelCounter = 1; for (const candidate of candidates) { const content = candidate.field.data.trim().toLowerCase(); if (!seenContent.has(content)) { seenContent.add(content); finalFields[candidate.id] = { ...candidate.field, label: `Label ${labelCounter++}`, }; } } return finalFields; }; useEffect(() => { if (isDOMMode && listSelector) { socket?.emit("setGetList", { getList: true }); socket?.emit("listSelector", { selector: listSelector }); clientSelectorGenerator.setListSelector(listSelector); if (cachedListSelector !== listSelector) { setCachedChildSelectors([]); setIsCachingChildSelectors(true); setCachedListSelector(listSelector); const iframeElement = document.querySelector( "#dom-browser-iframe" ) as HTMLIFrameElement; if (iframeElement?.contentDocument) { setTimeout(() => { try { const childSelectors = clientSelectorGenerator.getChildSelectors( iframeElement.contentDocument as Document, listSelector ); clientSelectorGenerator.precomputeChildSelectorMappings( childSelectors, iframeElement.contentDocument as Document ); setCachedChildSelectors(childSelectors); const autoFields = createFieldsFromChildSelectors( childSelectors, listSelector ); if (Object.keys(autoFields).length > 0) { setFields(autoFields); setInitialAutoFieldIds(new Set(Object.keys(autoFields).map(id => parseInt(id)))); addListStep( listSelector, autoFields, currentListId || Date.now(), currentListActionId || `list-${generateUUID()}`, { type: "", selector: paginationSelector }, undefined, false ); if (pendingNotification) { notify(pendingNotification.type, pendingNotification.message); setPendingNotification(null); } } else { console.warn(`Failed to extract any fields from list selector: ${listSelector}`); setListSelector(null); setFields({}); setCachedListSelector(null); setCachedChildSelectors([]); setCurrentListId(null); setInitialAutoFieldIds(new Set()); setPendingNotification(null); notify( "error", "The list you have selected is not valid. Please reselect it." ); } } catch (error) { console.error("Error during child selector caching:", error); } finally { setIsCachingChildSelectors(false); } }, 100); } else { setIsCachingChildSelectors(false); } } } }, [ isDOMMode, listSelector, socket, getList, cachedListSelector, pendingNotification, notify, createFieldsFromChildSelectors, currentListId, currentListActionId, paginationSelector, addListStep ]); useEffect(() => { if (!listSelector) { setCachedListSelector(null); } }, [listSelector]); useEffect(() => { if (!getList || !listSelector || initialAutoFieldIds.size === 0 || !currentListActionId) return; const currentListStep = browserSteps.find( step => step.type === 'list' && step.actionId === currentListActionId ); if (!currentListStep || currentListStep.type !== 'list' || !currentListStep.fields) return; const currentFieldIds = new Set(Object.keys(currentListStep.fields).map(id => parseInt(id))); const newManualIds = new Set(); currentFieldIds.forEach(fieldId => { if (!initialAutoFieldIds.has(fieldId)) { newManualIds.add(fieldId); } }); if (newManualIds.size !== manuallyAddedFieldIds.size || ![...newManualIds].every(id => manuallyAddedFieldIds.has(id))) { setManuallyAddedFieldIds(newManualIds); } }, [browserSteps, getList, listSelector, initialAutoFieldIds, currentListActionId, manuallyAddedFieldIds]); useEffect(() => { if (currentListActionId && browserSteps.length > 0) { const activeStep = browserSteps.find( s => s.type === 'list' && s.actionId === currentListActionId ) as ListStep | undefined; if (activeStep) { if (currentListId !== activeStep.id) { setCurrentListId(activeStep.id); } if (listSelector !== activeStep.listSelector) { setListSelector(activeStep.listSelector); } if (JSON.stringify(fields) !== JSON.stringify(activeStep.fields)) { setFields(activeStep.fields); } if (activeStep.pagination?.selector && paginationSelector !== activeStep.pagination.selector) { setPaginationSelector(activeStep.pagination.selector); } } } }, [currentListActionId, browserSteps, currentListId, listSelector, fields, paginationSelector]); useEffect(() => { if (!isDOMMode) { capturedElementHighlighter.clearHighlights(); return; } const capturedSelectors: Array<{ selector: string }> = []; if (getText && currentTextActionId) { const textSteps = browserSteps.filter( (step): step is TextStep => step.type === 'text' && step.actionId === currentTextActionId ); textSteps.forEach(step => { if (step.selectorObj?.selector) { capturedSelectors.push({ selector: step.selectorObj.selector, }); } }); } if (getList && listSelector && currentListActionId && manuallyAddedFieldIds.size > 0) { const listSteps = browserSteps.filter( step => step.type === 'list' && step.actionId === currentListActionId ) as ListStep[]; listSteps.forEach(listStep => { if (listStep.fields) { Object.entries(listStep.fields).forEach(([fieldId, field]: [string, any]) => { if (manuallyAddedFieldIds.has(parseInt(fieldId)) && field.selectorObj?.selector) { capturedSelectors.push({ selector: field.selectorObj.selector, }); } }); } }); } if (capturedSelectors.length > 0) { capturedElementHighlighter.applyHighlights(capturedSelectors); } else { capturedElementHighlighter.clearHighlights(); } }, [browserSteps, getText, getList, listSelector, currentTextActionId, currentListActionId, isDOMMode, manuallyAddedFieldIds]); useEffect(() => { if (listSelector) { sessionStorage.setItem('recordingListSelector', listSelector); } }, [listSelector]); useEffect(() => { const storedListSelector = sessionStorage.getItem('recordingListSelector'); if (storedListSelector && !listSelector) { setListSelector(storedListSelector); } }, []); const onMouseMove = (e: MouseEvent) => { }; const resetListState = useCallback(() => { setListSelector(null); setFields({}); setCurrentListId(null); setCachedChildSelectors([]); setInitialAutoFieldIds(new Set()); setManuallyAddedFieldIds(new Set()); }, []); useEffect(() => { if (!getList) { resetListState(); } }, [getList, resetListState]); useEffect(() => { if (socket) { socket.on("dom-mode-enabled", domModeHandler); socket.on("dom-mode-error", domModeErrorHandler); } return () => { if (socket) { socket.off("dom-mode-enabled", domModeHandler); socket.off("dom-mode-error", domModeErrorHandler); } }; }, [ socket, domModeHandler, domModeErrorHandler, ]); const domHighlighterHandler = useCallback( (data: { rect: DOMRect; selector: string; elementInfo: ElementInfo | null; childSelectors?: string[]; isShadow?: boolean; groupInfo?: { isGroupElement: boolean; groupSize: number; groupElements: HTMLElement[]; groupFingerprint: ElementFingerprint; }; similarElements?: { elements: HTMLElement[]; rects: DOMRect[]; }; isDOMMode?: boolean; }) => { if (paginationMode && paginationSelector) { return; } if (!getText && !getList) { setHighlighterData(null); return; } if (!isDOMMode) { return; } let iframeElement = document.querySelector( "#dom-browser-iframe" ) as HTMLIFrameElement; if (!iframeElement) { iframeElement = document.querySelector( "#browser-window iframe" ) as HTMLIFrameElement; } if (!iframeElement) { console.error("Could not find iframe element for DOM highlighting"); return; } const iframeRect = iframeElement.getBoundingClientRect(); const IFRAME_X_PADDING = 16; const IFRAME_Y_PADDING = 126; let mappedSimilarElements; if (data.similarElements) { mappedSimilarElements = { elements: data.similarElements.elements, rects: data.similarElements.rects.map( (rect) => new DOMRect( rect.x + iframeRect.left - IFRAME_X_PADDING, rect.y + iframeRect.top - IFRAME_Y_PADDING, rect.width, rect.height ) ), }; } if (data.groupInfo) { setCurrentGroupInfo(data.groupInfo); } else { setCurrentGroupInfo(null); } const absoluteRect = new DOMRect( data.rect.x + iframeRect.left - IFRAME_X_PADDING, data.rect.y + iframeRect.top - IFRAME_Y_PADDING, data.rect.width, data.rect.height ); const mappedData = { ...data, rect: absoluteRect, childSelectors: data.childSelectors || cachedChildSelectors, similarElements: mappedSimilarElements, }; if (getList === true) { if (!listSelector && data.groupInfo?.isGroupElement) { const updatedGroupElements = data.groupInfo.groupElements.map( (element) => { const elementRect = element.getBoundingClientRect(); return { element, rect: new DOMRect( elementRect.x + iframeRect.left - IFRAME_X_PADDING, elementRect.y + iframeRect.top - IFRAME_Y_PADDING, elementRect.width, elementRect.height ), }; } ); const mappedData = { ...data, rect: absoluteRect, groupElements: updatedGroupElements, childSelectors: data.childSelectors || cachedChildSelectors, }; setHighlighterData(mappedData); } else if (listSelector) { const hasChildSelectors = Array.isArray(mappedData.childSelectors) && mappedData.childSelectors.length > 0; if (limitMode) { setHighlighterData(null); } else if (paginationMode) { if ( paginationType !== "" && !["none", "scrollDown", "scrollUp"].includes(paginationType) ) { setHighlighterData(mappedData); } else { setHighlighterData(null); } } else if (hasChildSelectors) { setHighlighterData(mappedData); } else { setHighlighterData(null); } } else { setHighlighterData(mappedData); } } else { setHighlighterData(mappedData); } }, [ isDOMMode, getText, getList, socket, listSelector, paginationMode, paginationSelector, paginationType, limitMode, cachedChildSelectors, ] ); useEffect(() => { if (socket && listSelector) { socket.emit('setGetList', { getList: true }); socket.emit('listSelector', { selector: listSelector }); } }, [socket, listSelector]); useEffect(() => { if (captureStage === 'initial' && listSelector) { socket?.emit('setGetList', { getList: true }); socket?.emit('listSelector', { selector: listSelector }); } }, [captureStage, listSelector, socket]); const handleDOMElementSelection = useCallback( (highlighterData: { rect: DOMRect; selector: string; isShadow?: boolean; elementInfo: ElementInfo | null; childSelectors?: string[]; groupInfo?: { isGroupElement: boolean; groupSize: number; groupElements: HTMLElement[]; }; }) => { setShowAttributeModal(false); setSelectedElement(null); setAttributeOptions([]); if (paginationMode && getList) { if ( paginationType !== "" && paginationType !== "scrollDown" && paginationType !== "scrollUp" && paginationType !== "none" ) { let targetListId = currentListId; let targetFields = fields; if ((!targetListId || targetListId === 0) && currentListActionId) { const activeStep = browserSteps.find( s => s.type === 'list' && s.actionId === currentListActionId ) as ListStep | undefined; if (activeStep) { targetListId = activeStep.id; if (Object.keys(targetFields).length === 0 && Object.keys(activeStep.fields).length > 0) { targetFields = activeStep.fields; } } } setPaginationSelector(highlighterData.selector); notify( `info`, t( "browser_window.attribute_modal.notifications.pagination_select_success" ) ); addListStep( listSelector!, targetFields, targetListId || 0, currentListActionId || `list-${generateUUID()}`, { type: paginationType, selector: highlighterData.selector, isShadow: highlighterData.isShadow }, undefined, highlighterData.isShadow ); socket?.emit("setPaginationMode", { pagination: false }); setHighlighterData(null); } return; } if ( getList === true && !listSelector && highlighterData.groupInfo?.isGroupElement ) { if (highlighterData?.groupInfo.groupElements) { setProcessingGroupCoordinates( highlighterData.groupInfo.groupElements.map((element) => ({ element, rect: element.getBoundingClientRect(), })) ); } let cleanedSelector = highlighterData.selector; setListSelector(cleanedSelector); setPendingNotification({ type: `info`, message: t( "browser_window.attribute_modal.notifications.list_select_success", { count: highlighterData.groupInfo.groupSize, } ) || `Selected group with ${highlighterData.groupInfo.groupSize} similar elements`, count: highlighterData.groupInfo.groupSize, }); setCurrentListId(Date.now()); setFields({}); socket?.emit("setGetList", { getList: true }); socket?.emit("listSelector", { selector: cleanedSelector }); return; } if (getList === true && listSelector && currentListId) { const options = getAttributeOptions( highlighterData.elementInfo?.tagName || "", highlighterData.elementInfo ); if (options.length === 1) { const attribute = options[0].value; let currentSelector = highlighterData.selector; const data = attribute === "href" ? highlighterData.elementInfo?.url || "" : attribute === "src" ? highlighterData.elementInfo?.imageUrl || "" : highlighterData.elementInfo?.innerText || ""; const newField: TextStep = { id: Date.now(), type: "text", label: `Label ${Object.keys(fields).length + 1}`, data: data, selectorObj: { selector: currentSelector, tag: highlighterData.elementInfo?.tagName, isShadow: highlighterData.isShadow || highlighterData.elementInfo?.isShadowRoot, attribute, }, }; const updatedFields = { ...fields, [newField.id]: newField, }; setFields(updatedFields); if (listSelector) { addListStep( listSelector, updatedFields, currentListId, currentListActionId || `list-${generateUUID()}`, { type: "", selector: paginationSelector }, undefined, highlighterData.isShadow ); } } else { setAttributeOptions(options); setSelectedElement({ selector: highlighterData.selector, info: highlighterData.elementInfo, }); setShowAttributeModal(true); } return; } if (getText === true) { const options = getAttributeOptions( highlighterData.elementInfo?.tagName || "", highlighterData.elementInfo ); if (options.length === 1) { const attribute = options[0].value; const data = attribute === "href" ? highlighterData.elementInfo?.url || "" : attribute === "src" ? highlighterData.elementInfo?.imageUrl || "" : highlighterData.elementInfo?.innerText || ""; addTextStep( "", data, { selector: highlighterData.selector, tag: highlighterData.elementInfo?.tagName, isShadow: highlighterData.isShadow || highlighterData.elementInfo?.isShadowRoot, attribute, }, currentTextActionId || `text-${generateUUID()}` ); } else { setAttributeOptions(options); setSelectedElement({ selector: highlighterData.selector, info: highlighterData.elementInfo, }); setShowAttributeModal(true); } } }, [ getText, getList, listSelector, paginationMode, paginationType, limitMode, fields, currentListId, currentTextActionId, currentListActionId, addTextStep, addListStep, notify, socket, t, paginationSelector, highlighterData, browserSteps ] ); const handleClick = (e: React.MouseEvent) => { if (highlighterData) { const shouldProcessClick = true; if (shouldProcessClick) { const options = getAttributeOptions( highlighterData.elementInfo?.tagName || "", highlighterData.elementInfo ); if (getText === true) { if (options.length === 1) { const attribute = options[0].value; const data = attribute === "href" ? highlighterData.elementInfo?.url || "" : attribute === "src" ? highlighterData.elementInfo?.imageUrl || "" : highlighterData.elementInfo?.innerText || ""; addTextStep( "", data, { selector: highlighterData.selector, tag: highlighterData.elementInfo?.tagName, isShadow: highlighterData.isShadow || highlighterData.elementInfo?.isShadowRoot, attribute, }, currentTextActionId || `text-${generateUUID()}` ); } else { setAttributeOptions(options); setSelectedElement({ selector: highlighterData.selector, info: highlighterData.elementInfo, }); setShowAttributeModal(true); } } if (paginationMode && getList) { if ( paginationType !== "" && paginationType !== "scrollDown" && paginationType !== "scrollUp" && paginationType !== "none" ) { let targetListId = currentListId; let targetFields = fields; if ((!targetListId || targetListId === 0) && currentListActionId) { const activeStep = browserSteps.find( s => s.type === 'list' && s.actionId === currentListActionId ) as ListStep | undefined; if (activeStep) { targetListId = activeStep.id; if (Object.keys(targetFields).length === 0 && Object.keys(activeStep.fields).length > 0) { targetFields = activeStep.fields; } } } setPaginationSelector(highlighterData.selector); notify( `info`, t( "browser_window.attribute_modal.notifications.pagination_select_success" ) ); addListStep( listSelector!, targetFields, targetListId || 0, currentListActionId || `list-${generateUUID()}`, { type: paginationType, selector: highlighterData.selector, isShadow: highlighterData.isShadow }, undefined, highlighterData.isShadow ); socket?.emit("setPaginationMode", { pagination: false }); setHighlighterData(null); } return; } if (getList === true && !listSelector) { let cleanedSelector = highlighterData.selector; if ( cleanedSelector.includes("[") && cleanedSelector.match(/\[\d+\]/) ) { cleanedSelector = cleanedSelector.replace(/\[\d+\]/g, ""); } setListSelector(cleanedSelector); notify( `info`, t( "browser_window.attribute_modal.notifications.list_select_success" ) ); setCurrentListId(Date.now()); setFields({}); } else if (getList === true && listSelector && currentListId) { const attribute = options[0].value; const data = attribute === "href" ? highlighterData.elementInfo?.url || "" : attribute === "src" ? highlighterData.elementInfo?.imageUrl || "" : highlighterData.elementInfo?.innerText || ""; if (options.length === 1) { let currentSelector = highlighterData.selector; if (currentSelector.includes("/")) { const xpathParts = currentSelector .split("/") .filter((part) => part); const cleanedParts = xpathParts.map((part) => { return part.replace(/\[\d+\]/g, ""); }); if (cleanedParts.length > 0) { currentSelector = "//" + cleanedParts.join("/"); } } const newField: TextStep = { id: Date.now(), type: "text", label: `Label ${Object.keys(fields).length + 1}`, data: data, selectorObj: { selector: currentSelector, tag: highlighterData.elementInfo?.tagName, isShadow: highlighterData.isShadow || highlighterData.elementInfo?.isShadowRoot, attribute, }, }; const updatedFields = { ...fields, [newField.id]: newField, }; setFields(updatedFields); if (listSelector) { addListStep( listSelector, updatedFields, currentListId, currentListActionId || `list-${generateUUID()}`, { type: "", selector: paginationSelector, isShadow: highlighterData.isShadow }, undefined, highlighterData.isShadow ); } } else { setAttributeOptions(options); setSelectedElement({ selector: highlighterData.selector, info: highlighterData.elementInfo, }); setShowAttributeModal(true); } } } } }; const handleAttributeSelection = (attribute: string) => { if (selectedElement) { let data = ''; switch (attribute) { case 'href': data = selectedElement.info?.url || ''; break; case 'src': data = selectedElement.info?.imageUrl || ''; break; default: data = selectedElement.info?.innerText || ''; } if (getText === true) { addTextStep('', data, { selector: selectedElement.selector, tag: selectedElement.info?.tagName, isShadow: highlighterData?.isShadow || selectedElement.info?.isShadowRoot, attribute: attribute }, currentTextActionId || `text-${generateUUID()}`); } if (getList === true && listSelector && currentListId) { const newField: TextStep = { id: Date.now(), type: 'text', label: `Label ${Object.keys(fields).length + 1}`, data: data, selectorObj: { selector: selectedElement.selector, tag: selectedElement.info?.tagName, isShadow: highlighterData?.isShadow || highlighterData?.elementInfo?.isShadowRoot, attribute: attribute } }; const updatedFields = { ...fields, [newField.id]: newField }; setFields(updatedFields); if (listSelector) { addListStep( listSelector, updatedFields, currentListId, currentListActionId || `list-${generateUUID()}`, { type: "", selector: paginationSelector, isShadow: highlighterData?.isShadow }, undefined, highlighterData?.isShadow ); } } } setShowAttributeModal(false); setSelectedElement(null); setAttributeOptions([]); }; const resetPaginationSelector = useCallback(() => { setPaginationSelector(''); }, []); useEffect(() => { if (!paginationMode) { resetPaginationSelector(); } }, [paginationMode, resetPaginationSelector]); useEffect(() => { if (!paginationMode || !getList) { setHighlighterData(null); } }, [paginationMode, getList]); useEffect(() => { if (paginationMode && currentListActionId) { const currentListStep = browserSteps.find( step => step.type === 'list' && step.actionId === currentListActionId ) as (ListStep & { type: 'list' }) | undefined; const currentSelector = currentListStep?.pagination?.selector; const currentType = currentListStep?.pagination?.type; if (['clickNext', 'clickLoadMore'].includes(paginationType)) { if (!currentSelector || (currentType && currentType !== paginationType)) { setPaginationSelector(''); } } const stepSelector = currentListStep?.pagination?.selector; if (stepSelector && !paginationSelector) { setPaginationSelector(stepSelector); } else if (!stepSelector && paginationSelector) { setPaginationSelector(''); } } }, [browserSteps, paginationMode, currentListActionId, paginationSelector]); return (
{(getText === true || getList === true) && ( { setShowAttributeModal(false); setSelectedElement(null); setAttributeOptions([]); }} canBeClosed={true} modalStyle={modalStyle} >

Select Attribute

{attributeOptions.map((option) => ( ))}
)} {datePickerInfo && ( setDatePickerInfo(null)} /> )} {dropdownInfo && ( setDropdownInfo(null)} /> )} {timePickerInfo && ( setTimePickerInfo(null)} /> )} {dateTimeLocalInfo && ( setDateTimeLocalInfo(null)} /> )}
{(getText || getList) && !showAttributeModal && highlighterData?.rect != null && ( <> {highlighterData && (
{((getText && !listSelector) || (getList && paginationMode && !paginationSelector && paginationType !== "" && !["none", "scrollDown", "scrollUp"].includes(paginationType))) && (
)} {getList && !listSelector && currentGroupInfo?.isGroupElement && highlighterData.groupElements?.map((groupElement, index) => (
List item {index + 1}
) )} {getList && listSelector && !paginationMode && !limitMode && captureStage === 'initial' && highlighterData.similarElements?.rects?.map((rect, index) => (
Item {index + 1}
))}
)} )}
{isDOMMode ? ( <> {isCachingChildSelectors && ( <>
{processingGroupCoordinates.map((groupElement, index) => (
List item {index + 1}
))} {processingGroupCoordinates.length === 0 && (
)} )} ) : ( )}
); }; const DOMLoadingIndicator: React.FC = () => { const [progress, setProgress] = useState(0); const [hasStartedLoading, setHasStartedLoading] = useState(false); const { socket } = useSocketStore(); const { state } = useContext(AuthContext); const { user } = state; const { browserWidth, browserHeight } = useBrowserDimensionsStore(); useEffect(() => { if (!socket) return; const handleLoadingProgress = (data: { progress: number; pendingRequests: number; userId: string; }) => { if (!data.userId || data.userId === user?.id) { if (!hasStartedLoading && data.progress > 0) { setHasStartedLoading(true); } if (!hasStartedLoading || data.progress >= progress) { setProgress(data.progress); } } }; socket.on("domLoadingProgress", handleLoadingProgress); return () => { socket.off("domLoadingProgress", handleLoadingProgress); }; }, [socket, user?.id, hasStartedLoading, progress]); return (
Loading {progress}%
); }; const modalStyle = { top: '50%', left: '50%', transform: 'translate(-50%, -50%)', width: '30%', backgroundColor: 'background.paper', p: 4, height: 'fit-content', display: 'block', padding: '20px', }; ================================================ FILE: src/components/browser/UrlForm.tsx ================================================ import React, { useState, useEffect, useCallback, useRef } from 'react'; import type { SyntheticEvent } from 'react'; import KeyboardArrowRightIcon from '@mui/icons-material/KeyboardArrowRight'; import { NavBarForm, NavBarInput } from "../ui/Form"; import { UrlFormButton } from "../ui/buttons/Buttons"; import { useSocketStore } from '../../context/socket'; import { Socket } from "socket.io-client"; // TODO: Bring back REFRESHHHHHHH type Props = { currentAddress: string; handleRefresh: (socket: Socket) => void; setCurrentAddress: (address: string) => void; }; export const UrlForm = ({ currentAddress, handleRefresh, setCurrentAddress, }: Props) => { const [address, setAddress] = useState(currentAddress); const { socket } = useSocketStore(); const lastSubmittedRef = useRef(''); const onChange = useCallback((event: SyntheticEvent): void => { setAddress((event.target as HTMLInputElement).value); }, []); const submitForm = useCallback((url: string): void => { // Add protocol if missing if (!/^(?:f|ht)tps?\:\/\//.test(url)) { url = "https://" + url; setAddress(url); // Update the input field to reflect protocol addition } try { // Validate the URL new URL(url); setCurrentAddress(url); lastSubmittedRef.current = url; // Update the last submitted URL } catch (e) { //alert(`ERROR: ${url} is not a valid url!`); console.log(`Failed to submit form:`,e) } }, [setCurrentAddress]); const onSubmit = (event: SyntheticEvent): void => { event.preventDefault(); submitForm(address); }; // Sync internal state with currentAddress prop when it changes and auto-submit once useEffect(() => { setAddress(currentAddress); if (currentAddress !== '' && currentAddress !== lastSubmittedRef.current) { submitForm(currentAddress); } }, [currentAddress, submitForm]); return ( ); }; ================================================ FILE: src/components/dashboard/MainMenu.tsx ================================================ import React, { useState, useEffect } from 'react'; import Tabs from '@mui/material/Tabs'; import Tab from '@mui/material/Tab'; import Box from '@mui/material/Box'; import { useNavigate, useLocation } from 'react-router-dom'; import { Paper, Button, useTheme, Modal, Typography, Stack, Divider } from "@mui/material"; import { AutoAwesome, VpnKey, Usb, CloudQueue, Description, Favorite, SlowMotionVideo, PlayArrow, ArrowForwardIos, Star } from "@mui/icons-material"; import { useTranslation } from 'react-i18next'; interface MainMenuProps { value: string; handleChangeContent: (newValue: string) => void; } export const MainMenu = ({ value = 'robots', handleChangeContent }: MainMenuProps) => { const theme = useTheme(); const { t } = useTranslation(); const navigate = useNavigate(); const location = useLocation(); const [sponsorModalOpen, setSponsorModalOpen] = useState(false); const [docModalOpen, setDocModalOpen] = useState(false); const [starCount, setStarCount] = useState(null); const [isLoading, setIsLoading] = useState(false); useEffect(() => { const fetchStarCount = async () => { setIsLoading(true); try { const response = await fetch('https://api.github.com/repos/getmaxun/maxun', { headers: { 'Accept': 'application/vnd.github.v3+json' } }); if (response.ok) { const data = await response.json(); setStarCount(data.stargazers_count); } else { console.error('Failed to fetch GitHub star count'); } } catch (error) { console.error('Error fetching GitHub star count:', error); } finally { setIsLoading(false); } }; fetchStarCount(); // Optional: Refresh star count every 5 minutes const intervalId = setInterval(fetchStarCount, 5 * 60 * 1000); return () => clearInterval(intervalId); }, []); const handleChange = (event: React.SyntheticEvent, newValue: string) => { navigate(`/${newValue}`); handleChangeContent(newValue); }; const handleRobotsClick = () => { if (location.pathname !== '/robots') { navigate('/robots'); handleChangeContent('robots'); } }; const defaultcolor = theme.palette.mode === 'light' ? 'black' : 'white'; const buttonStyles = { justifyContent: 'flex-start', textAlign: 'left', fontSize: '15px', letterSpacing: '0.02857em', padding: '20px 20px 0px 22px', minHeight: '60px', minWidth: '100%', display: 'flex', alignItems: 'center', textTransform: 'none', color: theme.palette.mode === 'light' ? '#6C6C6C' : 'inherit', '&:hover': { color: theme.palette.mode === 'light' ? '#6C6C6C' : 'inherit', backgroundColor: theme.palette.mode === 'light' ? '#f5f5f5' : 'inherit', }, }; const starButtonStyles = { justifyContent: 'flex-start', textAlign: 'left', fontSize: '15px', padding: '12px 20px 12px 22px', minHeight: '48px', minWidth: '100%', display: 'flex', alignItems: 'center', textTransform: 'none', color: theme.palette.mode === 'light' ? '#6C6C6C' : 'inherit', backgroundColor: theme.palette.mode === 'light' ? '#fafafa' : 'rgba(255, 255, 255, 0.04)', '&:hover': { color: theme.palette.mode === 'light' ? '#6C6C6C' : 'inherit', backgroundColor: theme.palette.mode === 'light' ? '#f0f0f0' : 'rgba(255, 255, 255, 0.08)', }, }; return ( <> } iconPosition="start" disableRipple={true} sx={{ justifyContent: 'flex-start', textAlign: 'left', fontSize: '16px' }} onClick={handleRobotsClick} /> } iconPosition="start" disableRipple={true} sx={{ justifyContent: 'flex-start', textAlign: 'left', fontSize: '16px' }} /> } iconPosition="start" disableRipple={true} sx={{ justifyContent: 'flex-start', textAlign: 'left', fontSize: '16px' }} /> } iconPosition="start" disableRipple={true} sx={{ justifyContent: 'flex-start', textAlign: 'left', fontSize: '16px' }} /> setDocModalOpen(false)}> setSponsorModalOpen(false)}> Support Maxun Open Source Maxun is built by a small, full-time team. Your donations directly contribute to making it better.
Thank you for your support! 🩷
); }; ================================================ FILE: src/components/dashboard/NavBar.tsx ================================================ import { useTranslation } from "react-i18next"; import React, { useState, useContext, useEffect } from 'react'; import axios from 'axios'; import styled from "styled-components"; import { stopRecording } from "../../api/recording"; import { useGlobalInfoStore } from "../../context/globalInfo"; import { IconButton, Menu, MenuItem, Typography, Chip, Button, Snackbar, Tooltip } from "@mui/material"; import { AccountCircle, Logout, Clear, YouTube, X, GitHub, Close, LightMode, DarkMode, Translate } from "@mui/icons-material"; import { useNavigate } from 'react-router-dom'; import { AuthContext } from '../../context/auth'; import { SaveRecording } from '../recorder/SaveRecording'; import DiscordIcon from '../icons/DiscordIcon'; import { apiUrl } from '../../apiConfig'; import MaxunLogo from "../../assets/maxunlogo.png"; import { useThemeMode } from '../../context/theme-provider'; import packageJson from "../../../package.json" interface NavBarProps { recordingName: string; isRecording: boolean; } export const NavBar: React.FC = ({ recordingName, isRecording, }) => { const { notify, browserId, setBrowserId } = useGlobalInfoStore(); const { state, dispatch } = useContext(AuthContext); const { user } = state; const navigate = useNavigate(); const { darkMode, toggleTheme } = useThemeMode(); const { t, i18n } = useTranslation(); const [anchorEl, setAnchorEl] = useState(null); const [langAnchorEl, setLangAnchorEl] = useState(null); const currentVersion = packageJson.version; const [latestVersion, setLatestVersion] = useState(null); const [isUpdateAvailable, setIsUpdateAvailable] = useState(false); const fetchLatestVersion = async (): Promise => { try { const response = await fetch("https://api.github.com/repos/getmaxun/maxun/releases/latest"); const data = await response.json(); const version = data.tag_name.replace(/^v/, ""); // Remove 'v' prefix return version; } catch (error) { console.error("Failed to fetch latest version:", error); return null; } }; const handleMenuOpen = (event: React.MouseEvent) => { setAnchorEl(event.currentTarget); }; const handleLangMenuOpen = (event: React.MouseEvent) => { setLangAnchorEl(event.currentTarget); }; const handleMenuClose = () => { setAnchorEl(null); setLangAnchorEl(null); }; const logout = async () => { try { const { data } = await axios.get(`${apiUrl}/auth/logout`); if (data.ok) { dispatch({ type: "LOGOUT" }); window.localStorage.removeItem("user"); // notify('success', t('navbar.notifications.success.logout')); navigate("/login"); } } catch (error: any) { const status = error.response?.status; let errorKey = 'unknown'; switch (status) { case 401: errorKey = 'unauthorized'; break; case 500: errorKey = 'server'; break; default: if (error.message?.includes('Network Error')) { errorKey = 'network'; } } notify( 'error', t(`navbar.notifications.errors.logout.${errorKey}`, { error: error.response?.data?.message || error.message }) ); navigate("/login"); } }; const goToMainMenu = async () => { if (browserId) { await stopRecording(browserId); notify("warning", t('browser_recording.notifications.terminated')); setBrowserId(null); } navigate("/"); }; const changeLanguage = (lang: string) => { i18n.changeLanguage(lang); localStorage.setItem("language", lang); }; const renderThemeToggle = () => ( {darkMode ? : } ); useEffect(() => { const checkForUpdates = async () => { const latestVersion = await fetchLatestVersion(); setLatestVersion(latestVersion); if (latestVersion && latestVersion !== currentVersion) { setIsUpdateAvailable(true); } }; checkForUpdates(); }, []); return ( <> {isUpdateAvailable && ( setIsUpdateAvailable(false)} message={ `${t('navbar.upgrade.modal.new_version_available', { version: latestVersion })}` } action={ <> setIsUpdateAvailable(false)} style={{ color: 'black' }} > } ContentProps={{ sx: { background: "white", color: "black", } }} /> )}
navigate('/')}>
{t('navbar.project_name')}
{ user ? (
{!isRecording ? ( <> Browse Auto Robots {user.email} { handleMenuClose(); logout(); }}> {t('navbar.menu_items.logout')} {t('navbar.menu_items.language')}
{ window.open('https://github.com/getmaxun/maxun', '_blank'); }}> GitHub { window.open('https://discord.gg/5GbPjBUkws', '_blank'); }}> Discord { window.open('https://www.youtube.com/@MaxunOSS/videos?ref=app', '_blank'); }}> YouTube { window.open('https://x.com/MaxunHQ?ref=app', '_blank'); }}> Twitter (X) { changeLanguage("en"); handleMenuClose(); }} > English { changeLanguage("es"); handleMenuClose(); }} > Español { changeLanguage("ja"); handleMenuClose(); }} > 日本語 { changeLanguage("zh"); handleMenuClose(); }} > 中文 { changeLanguage("de"); handleMenuClose(); }} > Deutsch { changeLanguage("tr"); handleMenuClose(); }} > Türkçe { window.open('https://docs.maxun.dev/development/i18n', '_blank'); handleMenuClose(); }} > Add Language
{renderThemeToggle()} ) : ( <> {t('navbar.recording.discard')} )}
) : ( { changeLanguage("en"); handleMenuClose(); }} > English { changeLanguage("es"); handleMenuClose(); }} > Español { changeLanguage("ja"); handleMenuClose(); }} > 日本語 { changeLanguage("zh"); handleMenuClose(); }} > 中文 { changeLanguage("de"); handleMenuClose(); }} > Deutsch { changeLanguage("tr"); handleMenuClose(); }} > Türkçe { window.open('https://docs.maxun.dev/development/i18n', '_blank'); handleMenuClose(); }} > Add Language {renderThemeToggle()} )}
); }; const NavBarWrapper = styled.div<{ mode: 'light' | 'dark' }>` grid-area: navbar; background-color: ${({ mode }) => (mode === 'dark' ? '#000000ff' : '#ffffff')}; padding: 5px; display: flex; justify-content: space-between; border-bottom: 1px solid ${({ mode }) => (mode === 'dark' ? '#000000ff' : '#e0e0e0')}; `; const ProjectName = styled.b<{ mode: 'light' | 'dark' }>` color: ${({ mode }) => (mode === 'dark' ? '#ffffff' : '#3f4853')}; font-size: 1.3em; `; const NavBarRight = styled.div` display: flex; align-items: center; justify-content: flex-end; margin-left: auto; `; ================================================ FILE: src/components/dashboard/NotFound.tsx ================================================ import React from 'react'; export function NotFoundPage() { return (

404 - Page Not Found

Oops! This page does not exist.

Take me to the homepage
); } ================================================ FILE: src/components/icons/DiscordIcon.tsx ================================================ import React from 'react'; import SvgIcon, { SvgIconProps } from '@mui/material/SvgIcon'; const DiscordIcon: React.FC = (props) => ( ); export default DiscordIcon; ================================================ FILE: src/components/icons/RecorderIcon.tsx ================================================ import React from 'react'; export const RecordingIcon = () => { return ( ); }; ================================================ FILE: src/components/integration/IntegrationSettings.tsx ================================================ import React, { useState, useEffect } from "react"; import { GenericModal } from "../ui/GenericModal"; import { MenuItem, Typography, CircularProgress, Alert, AlertTitle, Button, TextField, IconButton, Box, Chip, Card, CardContent, CardActions, Switch, FormControlLabel, Table, TableBody, TableCell, TableContainer, TableHead, TableRow, Paper, } from "@mui/material"; import { Add as AddIcon, Delete as DeleteIcon, Edit as EditIcon, Science as ScienceIcon } from "@mui/icons-material"; import axios from "axios"; import { useGlobalInfoStore } from "../../context/globalInfo"; import { getStoredRecording } from "../../api/storage"; import { apiUrl } from "../../apiConfig.js"; import { v4 as uuid } from "uuid"; import Cookies from "js-cookie"; import { useTranslation } from "react-i18next"; import { useNavigate } from "react-router-dom"; import { addWebhook, updateWebhook, removeWebhook, getWebhooks, testWebhook,WebhookConfig } from "../../api/webhook"; interface IntegrationProps { isOpen: boolean; handleStart: (data: IntegrationSettings) => void; handleClose: () => void; preSelectedIntegrationType?: "googleSheets" | "airtable" | "webhook" | null; } export interface IntegrationSettings { spreadsheetId?: string; spreadsheetName?: string; airtableBaseId?: string; airtableBaseName?: string; airtableTableName?: string, airtableTableId?: string, webhooks?: WebhookConfig[]; data: string; integrationType: "googleSheets" | "airtable" | "webhook"; } const getCookie = (name: string): string | null => { const value = `; ${document.cookie}`; const parts = value.split(`; ${name}=`); if (parts.length === 2) { return parts.pop()?.split(";").shift() || null; } return null; }; const removeCookie = (name: string): void => { document.cookie = `${name}=; expires=Thu, 01 Jan 1970 00:00:00 GMT; path=/`; }; export const IntegrationSettingsModal = ({ isOpen, handleStart, handleClose, preSelectedIntegrationType = null, }: IntegrationProps) => { const { t } = useTranslation(); const [settings, setSettings] = useState({ spreadsheetId: "", spreadsheetName: "", airtableBaseId: "", airtableBaseName: "", airtableTableName: "", airtableTableId: "", webhooks: [], data: "", integrationType: preSelectedIntegrationType || "googleSheets", }); const [spreadsheets, setSpreadsheets] = useState<{ id: string; name: string }[]>([]); const [airtableBases, setAirtableBases] = useState<{ id: string; name: string }[]>([]); const [airtableTables, setAirtableTables] = useState<{ id: string; name: string }[]>([]); const [loading, setLoading] = useState(false); const [error, setError] = useState(null); const [showWebhookForm, setShowWebhookForm] = useState(false); const [editingWebhook, setEditingWebhook] = useState(null); const [newWebhook, setNewWebhook] = useState({ id: "", url: "", events: ["run_completed"], active: true, }); const [urlError, setUrlError] = useState(null); const { recordingId, notify, setRerenderRobots } = useGlobalInfoStore(); const [recording, setRecording] = useState(null); const navigate = useNavigate(); const [selectedIntegrationType, setSelectedIntegrationType] = useState< "googleSheets" | "airtable" | "webhook" | null >(preSelectedIntegrationType); const authenticateWithGoogle = () => { window.location.href = `${apiUrl}/auth/google?robotId=${recordingId}`; }; // Authenticate with Airtable const authenticateWithAirtable = () => { window.location.href = `${apiUrl}/auth/airtable?robotId=${recordingId}`; }; const validateWebhookData = (url: string, events: string[], excludeId?: string) => { if (!url) { setUrlError("Please provide webhook URL"); return false; } try { new URL(url); } catch { setUrlError("Please provide a valid URL"); return false; } const existingWebhook = settings.webhooks?.find( (webhook) => webhook.url === url && webhook.id !== excludeId ); if (existingWebhook) { setUrlError("This webhook URL is already in use"); return false; } if (!events || events.length === 0) { setUrlError("Please select at least one event"); return false; } setUrlError(null); return true; }; const fetchWebhooks = async () => { try { setLoading(true); if (!recordingId) return; const response = await getWebhooks(recordingId); if (response.ok && response.webhooks) { setSettings(prev => ({ ...prev, webhooks: response.webhooks })); } else { notify("error", response.error || "Failed to fetch webhooks"); } setLoading(false); } catch (error: any) { setLoading(false); console.error("Error fetching webhooks:", error); notify("error", "Failed to fetch webhooks"); } }; const addWebhookSetting = async () => { if (!validateWebhookData(newWebhook.url, newWebhook.events)) { if (!newWebhook.url) { notify("error", "Please provide webhook URL"); } else if (!newWebhook.events || newWebhook.events.length === 0) { notify("error", "Please select at least one event"); } return; } if (!recordingId) return; try { setLoading(true); const webhookWithId = { ...newWebhook, id: uuid(), }; const response = await addWebhook(webhookWithId, recordingId); if (response.ok) { const updatedWebhooks = [...(settings.webhooks || []), webhookWithId]; setSettings({ ...settings, webhooks: updatedWebhooks }); resetWebhookForm(); await refreshRecordingData(); notify("success", "Webhook added successfully"); } else { notify("error", response.error || "Failed to add webhook"); } setLoading(false); } catch (error: any) { setLoading(false); console.log("Error adding webhook:", error); notify("error", "Failed to add webhook"); } }; const updateWebhookSetting = async () => { if (!editingWebhook || !recordingId) return; if (!validateWebhookData(newWebhook.url, newWebhook.events, editingWebhook)) { if (!newWebhook.url) { notify("error", "Please provide webhook URL"); } else if (!newWebhook.events || newWebhook.events.length === 0) { notify("error", "Please select at least one event"); } return; } try { setLoading(true); const response = await updateWebhook(newWebhook, recordingId); if (response.ok) { const updatedWebhooks = (settings.webhooks || []).map(w => w.id === editingWebhook ? newWebhook : w ); setSettings({ ...settings, webhooks: updatedWebhooks }); resetWebhookForm(); await refreshRecordingData(); notify("success", "Webhook updated successfully"); } else { notify("error", response.error || "Failed to update webhook"); } setLoading(false); } catch (error: any) { setLoading(false); console.error("Error updating webhook:", error); notify("error", "Failed to update webhook"); } }; const removeWebhookSetting = async (webhookId: string) => { if (!recordingId) return; try { setLoading(true); const response = await removeWebhook(webhookId, recordingId); if (response.ok) { const updatedWebhooks = (settings.webhooks || []).filter(w => w.id !== webhookId); setSettings({ ...settings, webhooks: updatedWebhooks }); await refreshRecordingData(); notify("success", "Webhook removed successfully"); } else { notify("error", response.error || "Failed to remove webhook"); } setLoading(false); } catch (error: any) { setLoading(false); console.error("Error removing webhook:", error); notify("error", "Failed to remove webhook"); } }; const toggleWebhookStatusSetting = async (webhookId: string) => { if (!recordingId) return; try { const webhook = settings.webhooks?.find(w => w.id === webhookId); if (!webhook) return; const updatedWebhook = { ...webhook, active: !webhook.active }; const response = await updateWebhook(updatedWebhook, recordingId); if (response.ok) { const updatedWebhooks = (settings.webhooks || []).map(w => w.id === webhookId ? updatedWebhook : w ); setSettings({ ...settings, webhooks: updatedWebhooks }); await refreshRecordingData(); notify("success", `Webhook ${updatedWebhook.active ? "enabled" : "disabled"}`); } else { notify("error", response.error || "Failed to update webhook"); } } catch (error: any) { console.error("Error toggling webhook status:", error); notify("error", "Failed to update webhook"); } }; const testWebhookSetting = async (webhook: WebhookConfig) => { if (!recordingId) return; try { setLoading(true); const response = await testWebhook(webhook, recordingId); if (response.ok) { const updatedWebhooks = (settings.webhooks || []).map(w => w.id === webhook.id ? { ...w, lastCalledAt: new Date().toISOString() } : w ); setSettings({ ...settings, webhooks: updatedWebhooks }); notify("success", "Test webhook sent successfully"); } else { notify("error", response.error || "Failed to test webhook"); } setLoading(false); } catch (error: any) { setLoading(false); console.error("Error testing webhook:", error); notify("error", "Failed to test webhook"); } }; const editWebhookSetting = (webhook: WebhookConfig) => { setNewWebhook(webhook); setEditingWebhook(webhook.id); setShowWebhookForm(true); }; const resetWebhookForm = () => { setNewWebhook({ id: "", url: "", events: ["run_completed"], active: true, }); setShowWebhookForm(false); setEditingWebhook(null); setUrlError(null); }; // Fetch Google Sheets files const fetchSpreadsheetFiles = async () => { try { const response = await axios.get( `${apiUrl}/auth/gsheets/files?robotId=${recordingId}`, { withCredentials: true } ); setSpreadsheets(response.data); } catch (error: any) { setLoading(false); console.error("Error fetching spreadsheet files:", error); notify("error", t("integration_settings.google.errors.fetch_error", { message: error.response?.data?.message || error.message, })); } }; // Fetch Airtable bases const fetchAirtableBases = async () => { try { const response = await axios.get( `${apiUrl}/auth/airtable/bases?robotId=${recordingId}`, { withCredentials: true } ); setAirtableBases(response.data); } catch (error: any) { setLoading(false); console.error("Error fetching Airtable bases:", error); notify("error", t("integration_settings.airtable.errors.fetch_error", { message: error.response?.data?.message || error.message, })); } }; const fetchAirtableTables = async (baseId: string, recordingId: string) => { try { const response = await axios.get( `${apiUrl}/auth/airtable/tables?robotId=${recordingId}&baseId=${baseId}`, { withCredentials: true } ); setAirtableTables(response.data); } catch (error: any) { setLoading(false); console.error("Error fetching Airtable tables:", error); notify("error", t("integration_settings.airtable.errors.fetch_tables_error", { message: error.response?.data?.message || error.message, })); } } // Handle Google Sheets selection const handleSpreadsheetSelect = (e: React.ChangeEvent) => { const selectedSheet = spreadsheets.find((sheet) => sheet.id === e.target.value); if (selectedSheet) { setSettings({ ...settings, spreadsheetId: selectedSheet.id, spreadsheetName: selectedSheet.name, }); } }; // Handle Airtable base selection const handleAirtableBaseSelect = async (e: React.ChangeEvent) => { const selectedBase = airtableBases.find((base) => base.id === e.target.value); if (selectedBase) { setSettings((prevSettings) => ({ ...prevSettings, airtableBaseId: selectedBase.id, airtableBaseName: selectedBase.name, })); if (recordingId) { await fetchAirtableTables(selectedBase.id, recordingId); } else { console.error("Recording ID is null"); } } }; const handleAirtabletableSelect = (e: React.ChangeEvent) => { const selectedTable = airtableTables.find((table) => table.id === e.target.value); if (selectedTable) { setSettings((prevSettings) => ({ ...prevSettings, airtableTableId: e.target.value, airtableTableName: selectedTable?.name || "", })); } }; const refreshRecordingData = async () => { if (!recordingId) return null; const updatedRecording = await getStoredRecording(recordingId); setRecording(updatedRecording); await fetchWebhooks(); setRerenderRobots(true); return updatedRecording; }; const updateGoogleSheetId = async () => { try { setLoading(true); await axios.post( `${apiUrl}/auth/gsheets/update`, { spreadsheetId: settings.spreadsheetId, spreadsheetName: settings.spreadsheetName, robotId: recordingId, }, { withCredentials: true } ); // Refresh recording data immediately await refreshRecordingData(); notify("success", t("integration_settings.google.notifications.sheet_selected")); setLoading(false); } catch (error: any) { setLoading(false); console.error("Error updating Google Sheet ID:", error); notify("error", t("integration_settings.google.errors.update_error", { message: error.response?.data?.message || error.message, })); } }; // Update Airtable integration const updateAirtableBase = async () => { try { setLoading(true); await axios.post( `${apiUrl}/auth/airtable/update`, { baseId: settings.airtableBaseId, baseName: settings.airtableBaseName, robotId: recordingId, tableName: settings.airtableTableName, tableId: settings.airtableTableId, }, { withCredentials: true } ); await refreshRecordingData(); notify("success", t("integration_settings.airtable.notifications.base_selected")); setLoading(false); } catch (error: any) { setLoading(false); console.error("Error updating Airtable base:", error); notify("error", t("integration_settings.airtable.errors.update_error", { message: error.response?.data?.message || error.message, })); } }; // Remove Google Sheets integration const removeGoogleSheetsIntegration = async () => { try { setLoading(true); await axios.post( `${apiUrl}/auth/gsheets/remove`, { robotId: recordingId }, { withCredentials: true } ); // Clear UI state setSpreadsheets([]); setSettings({ ...settings, spreadsheetId: "", spreadsheetName: "" }); // Refresh recording data await refreshRecordingData(); notify("success", t("integration_settings.google.notifications.integration_removed")); setLoading(false); } catch (error: any) { setLoading(false); console.error("Error removing Google Sheets integration:", error); notify("error", t("integration_settings.google.errors.remove_error", { message: error.response?.data?.message || error.message, })); } }; // Remove Airtable integration const removeAirtableIntegration = async () => { try { setLoading(true); await axios.post( `${apiUrl}/auth/airtable/remove`, { robotId: recordingId }, { withCredentials: true } ); setAirtableBases([]); setAirtableTables([]); setSettings({ ...settings, airtableBaseId: "", airtableBaseName: "", airtableTableName: "", airtableTableId: "" }); await refreshRecordingData(); notify("success", t("integration_settings.airtable.notifications.integration_removed")); setLoading(false); } catch (error: any) { setLoading(false); console.error("Error removing Airtable integration:", error); notify("error", t("integration_settings.airtable.errors.remove_error", { message: error.response?.data?.message || error.message, })); } }; const handleAirtableOAuthCallback = async () => { try { const response = await axios.get(`${apiUrl}/auth/airtable/callback`); if (response.data.success) { await refreshRecordingData(); } } catch (error) { setError(t("integration_settings.airtable.errors.auth_error")); } }; useEffect(() => { const fetchRecordingInfo = async () => { if (!recordingId) return; setLoading(true); const recording = await getStoredRecording(recordingId); if (recording) { setRecording(recording); if (preSelectedIntegrationType) { setSettings(prev => ({ ...prev, integrationType: preSelectedIntegrationType })); } else if (recording.google_sheet_id) { setSettings(prev => ({ ...prev, integrationType: "googleSheets" })); } else if (recording.airtable_base_id) { setSettings(prev => ({ ...prev, airtableBaseId: recording.airtable_base_id || "", airtableBaseName: recording.airtable_base_name || "", airtableTableName: recording.airtable_table_name || "", airtableTableId: recording.airtable_table_id || "", integrationType: "airtable" })); } await fetchWebhooks(); if (!preSelectedIntegrationType && !recording.google_sheet_id && !recording.airtable_base_id) { const webhookResponse = await getWebhooks(recordingId); if (webhookResponse.ok && webhookResponse.webhooks && webhookResponse.webhooks.length > 0) { setSettings(prev => ({ ...prev, integrationType: "webhook" })); } } } setLoading(false); }; fetchRecordingInfo(); }, [recordingId, preSelectedIntegrationType]); useEffect(() => { const status = getCookie("airtable_auth_status"); const message = getCookie("airtable_auth_message"); if (status === "success") { notify("success", message || t("integration_settings.airtable.notifications.auth_success")); removeCookie("airtable_auth_status"); removeCookie("airtable_auth_message"); refreshRecordingData(); } const urlParams = new URLSearchParams(window.location.search); const code = urlParams.get("code"); if (code) { handleAirtableOAuthCallback(); } }, []); const formatEventName = (event: string) => { switch (event) { case "run_completed": return "Run finished"; case "run_failed": return "Run failed"; default: return event; } }; const formatLastCalled = (lastCalledAt?: string | null) => { if (!lastCalledAt) { return "Not called yet"; } const date = new Date(lastCalledAt); const now = new Date(); const diffMs = now.getTime() - date.getTime(); const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24)); const diffHours = Math.floor(diffMs / (1000 * 60 * 60)); const diffMinutes = Math.floor(diffMs / (1000 * 60)); if (diffMinutes < 1) { return "Just now"; } else if (diffMinutes < 60) { return `${diffMinutes} minute${diffMinutes === 1 ? '' : 's'} ago`; } else if (diffHours < 24) { return `${diffHours} hour${diffHours === 1 ? '' : 's'} ago`; } else if (diffDays < 7) { return `${diffDays} day${diffDays === 1 ? '' : 's'} ago`; } else { return date.toLocaleDateString('en-US', { year: 'numeric', month: 'short', day: 'numeric', hour: '2-digit', minute: '2-digit' }); } }; if (!selectedIntegrationType) { return (
); } return (
{settings.integrationType === "googleSheets" && ( <> {t("integration_settings.google.title")} {recording?.google_sheet_id ? ( <> {t("integration_settings.google.alerts.success.title")} {t("integration_settings.google.alerts.success.content", { sheetName: recording.google_sheet_name, })} {t("integration_settings.google.alerts.success.here")} ) : ( <> {!recording?.google_sheet_email ? ( <>

{t("integration_settings.google.descriptions.sync_info")}

) : ( <> {t("integration_settings.google.descriptions.authenticated_as", { email: recording.google_sheet_email, })} {loading ? ( ) : error ? ( {error} ) : spreadsheets.length === 0 ? ( ) : ( <> {spreadsheets.map((sheet) => ( {sheet.name} ))} )} )} )} )} {settings.integrationType === "airtable" && ( <> {t("integration_settings.airtable.title")} {recording?.airtable_base_id ? ( <> {t("integration_settings.airtable.alerts.success.title")} {t("integration_settings.airtable.alerts.success.content", { baseName: recording.airtable_base_name, tableName: recording.airtable_table_name })} {t("integration_settings.airtable.alerts.success.here")} ) : ( <> {!recording?.airtable_access_token ? ( <>

{t("integration_settings.airtable.descriptions.sync_info")}

) : ( <> {t("integration_settings.airtable.descriptions.authenticated_as")} {loading ? ( ) : error ? ( {error} ) : airtableBases.length === 0 ? ( ) : ( <> {airtableBases.map((base) => ( {base.name} ))} {airtableTables.map((table) => ( {table.name} ))} )} )} )} )} {settings.integrationType === "webhook" && ( <> Integrate using Webhooks {settings.webhooks && settings.webhooks.length > 0 && ( Webhook URL Call when Last called Status Actions {settings.webhooks.map((webhook) => ( {webhook.url} {webhook.events.map((event) => ( ))} {formatLastCalled(webhook.lastCalledAt)} toggleWebhookStatusSetting(webhook.id)} size="small" /> testWebhookSetting(webhook)} disabled={loading || !webhook.active} title="Test" > editWebhookSetting(webhook)} disabled={loading} title="Edit" > removeWebhookSetting(webhook.id)} disabled={loading} title="Delete" > ))}
)} {!showWebhookForm && ( { setNewWebhook({ ...newWebhook, url: e.target.value }); if (urlError) setUrlError(null); }} error={!!urlError} helperText={urlError} required aria-describedby="webhook-url-help" /> setNewWebhook({ ...newWebhook, events: [e.target.value] })} sx={{ minWidth: "200px" }} required > Run finished Run failed Refer to the API documentation for examples and details. )} {showWebhookForm && ( {editingWebhook ? "Edit Webhook" : "Add New Webhook"} { setNewWebhook({ ...newWebhook, url: e.target.value }); if (urlError) setUrlError(null); }} sx={{ marginBottom: "15px" }} placeholder="https://your-api.com/webhook/endpoint" required error={!!urlError} helperText={urlError} /> setNewWebhook({ ...newWebhook, events: typeof e.target.value === 'string' ? [e.target.value] : e.target.value })} SelectProps={{ multiple: true, renderValue: (selected) => ( {(selected as string[]).map((value) => ( ))} ), }} sx={{ marginBottom: "20px" }} required > Run finished Run failed setNewWebhook({ ...newWebhook, active: e.target.checked })} /> } label="Active" sx={{ marginBottom: "10px" }} /> )} )}
); }; export const modalStyle = { top: "40%", left: "50%", transform: "translate(-50%, -50%)", width: "60%", backgroundColor: "background.paper", p: 4, height: "fit-content", display: "block", padding: "20px", maxHeight: "90vh", overflow: "auto", }; ================================================ FILE: src/components/pickers/DatePicker.tsx ================================================ import React, { useState } from 'react'; import { useSocketStore } from '../../context/socket'; interface Coordinates { x: number; y: number; }; interface DatePickerProps { coordinates: Coordinates; selector: string; onClose: () => void; } const DatePicker: React.FC = ({ coordinates, selector, onClose }) => { const { socket } = useSocketStore(); const [selectedDate, setSelectedDate] = useState(''); const handleDateChange = (e: React.ChangeEvent) => { setSelectedDate(e.target.value); }; const updateDOMElement = (selector: string, value: string) => { try { let iframeElement = document.querySelector('#dom-browser-iframe') as HTMLIFrameElement; if (!iframeElement) { iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement; } if (!iframeElement) { const browserWindow = document.querySelector('#browser-window'); if (browserWindow) { iframeElement = browserWindow.querySelector('iframe') as HTMLIFrameElement; } } if (!iframeElement) { console.error('Could not find iframe element for DOM update'); return; } const iframeDoc = iframeElement.contentDocument; if (!iframeDoc) { console.error('Could not access iframe document'); return; } const element = iframeDoc.querySelector(selector) as HTMLInputElement; if (element) { element.value = value; const changeEvent = new Event('change', { bubbles: true }); element.dispatchEvent(changeEvent); const inputEvent = new Event('input', { bubbles: true }); element.dispatchEvent(inputEvent); } else { console.warn(`Could not find element with selector: ${selector}`); } } catch (error) { console.error('Error updating DOM element:', error); } }; const handleConfirm = () => { if (socket && selectedDate) { socket.emit('input:date', { selector, value: selectedDate }); updateDOMElement(selector, selectedDate); onClose(); } }; return (
); }; export default DatePicker; ================================================ FILE: src/components/pickers/DateTimeLocalPicker.tsx ================================================ import React, { useState } from 'react'; import { useSocketStore } from '../../context/socket'; interface Coordinates { x: number; y: number; }; interface DateTimeLocalPickerProps { coordinates: Coordinates; selector: string; onClose: () => void; } const DateTimeLocalPicker: React.FC = ({ coordinates, selector, onClose }) => { const { socket } = useSocketStore(); const [selectedDateTime, setSelectedDateTime] = useState(''); const handleDateTimeChange = (e: React.ChangeEvent) => { setSelectedDateTime(e.target.value); }; const updateDOMElement = (selector: string, value: string) => { try { let iframeElement = document.querySelector('#dom-browser-iframe') as HTMLIFrameElement; if (!iframeElement) { iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement; } if (!iframeElement) { const browserWindow = document.querySelector('#browser-window'); if (browserWindow) { iframeElement = browserWindow.querySelector('iframe') as HTMLIFrameElement; } } if (!iframeElement) { console.error('Could not find iframe element for DOM update'); return; } const iframeDoc = iframeElement.contentDocument; if (!iframeDoc) { console.error('Could not access iframe document'); return; } const element = iframeDoc.querySelector(selector) as HTMLInputElement; if (element) { element.value = value; const changeEvent = new Event('change', { bubbles: true }); element.dispatchEvent(changeEvent); const inputEvent = new Event('input', { bubbles: true }); element.dispatchEvent(inputEvent); } else { console.warn(`Could not find element with selector: ${selector}`); } } catch (error) { console.error('Error updating DOM element:', error); } }; const handleConfirm = () => { if (socket && selectedDateTime) { socket.emit('input:datetime-local', { selector, value: selectedDateTime }); updateDOMElement(selector, selectedDateTime); onClose(); } }; return (
); }; export default DateTimeLocalPicker; ================================================ FILE: src/components/pickers/Dropdown.tsx ================================================ import React, { useState } from 'react'; import { useSocketStore } from '../../context/socket'; interface Coordinates { x: number; y: number; }; interface DropdownProps { coordinates: Coordinates; selector: string; options: Array<{ value: string; text: string; disabled: boolean; selected: boolean; }>; onClose: () => void; } const Dropdown = ({ coordinates, selector, options, onClose }: DropdownProps) => { const { socket } = useSocketStore(); const [hoveredIndex, setHoveredIndex] = useState(null); const updateDOMElement = (selector: string, value: string) => { try { let iframeElement = document.querySelector('#dom-browser-iframe') as HTMLIFrameElement; if (!iframeElement) { iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement; } if (!iframeElement) { const browserWindow = document.querySelector('#browser-window'); if (browserWindow) { iframeElement = browserWindow.querySelector('iframe') as HTMLIFrameElement; } } if (!iframeElement) { console.error('Could not find iframe element for DOM update'); return; } const iframeDoc = iframeElement.contentDocument; if (!iframeDoc) { console.error('Could not access iframe document'); return; } const selectElement = iframeDoc.querySelector(selector) as HTMLSelectElement; if (selectElement) { selectElement.value = value; const optionElements = selectElement.querySelectorAll('option'); optionElements.forEach(option => { if (option.value === value) { option.selected = true; option.setAttribute('selected', 'selected'); } else { option.selected = false; option.removeAttribute('selected'); } }); const changeEvent = new Event('change', { bubbles: true }); selectElement.dispatchEvent(changeEvent); const inputEvent = new Event('input', { bubbles: true }); selectElement.dispatchEvent(inputEvent); } else { console.warn(`Could not find select element with selector: ${selector}`); } } catch (error) { console.error('Error updating DOM select element:', error); } }; const handleSelect = (value: string) => { if (socket) { socket.emit('input:dropdown', { selector, value }); updateDOMElement(selector, value); } onClose(); }; const containerStyle: React.CSSProperties = { position: 'absolute', left: coordinates.x, top: coordinates.y, zIndex: 1000, width: '200px', backgroundColor: 'white', border: '1px solid rgb(169, 169, 169)', boxShadow: '0 2px 4px rgba(0,0,0,0.15)', }; const scrollContainerStyle: React.CSSProperties = { maxHeight: '180px', overflowY: 'auto', overflowX: 'hidden', }; const getOptionStyle = (option: any, index: number): React.CSSProperties => ({ fontSize: '13.333px', lineHeight: '18px', padding: '0 3px', cursor: option.disabled ? 'default' : 'default', backgroundColor: hoveredIndex === index ? '#0078D7' : option.selected ? '#0078D7' : option.disabled ? '#f8f8f8' : 'white', color: (hoveredIndex === index || option.selected) ? 'white' : option.disabled ? '#a0a0a0' : 'black', userSelect: 'none', }); return (
e.stopPropagation()} >
{options.map((option, index) => (
!option.disabled && setHoveredIndex(index)} onMouseLeave={() => setHoveredIndex(null)} onClick={() => !option.disabled && handleSelect(option.value)} > {option.text}
))}
); }; export default Dropdown; ================================================ FILE: src/components/pickers/TimePicker.tsx ================================================ import React, { useState } from 'react'; import { useSocketStore } from '../../context/socket'; interface Coordinates { x: number; y: number; }; interface TimePickerProps { coordinates: Coordinates; selector: string; onClose: () => void; } const TimePicker = ({ coordinates, selector, onClose }: TimePickerProps) => { const { socket } = useSocketStore(); const [hoveredHour, setHoveredHour] = useState(null); const [hoveredMinute, setHoveredMinute] = useState(null); const [selectedHour, setSelectedHour] = useState(null); const [selectedMinute, setSelectedMinute] = useState(null); const handleHourSelect = (hour: number) => { setSelectedHour(hour); // If minute is already selected, complete the selection if (selectedMinute !== null) { const formattedHour = hour.toString().padStart(2, '0'); const formattedMinute = selectedMinute.toString().padStart(2, '0'); if (socket) { socket.emit('input:time', { selector, value: `${formattedHour}:${formattedMinute}` }); } onClose(); } }; const handleMinuteSelect = (minute: number) => { setSelectedMinute(minute); // If hour is already selected, complete the selection if (selectedHour !== null) { const formattedHour = selectedHour.toString().padStart(2, '0'); const formattedMinute = minute.toString().padStart(2, '0'); if (socket) { socket.emit('input:time', { selector, value: `${formattedHour}:${formattedMinute}` }); } onClose(); } }; const containerStyle: React.CSSProperties = { position: 'absolute', left: coordinates.x, top: coordinates.y, zIndex: 1000, display: 'flex', backgroundColor: 'white', border: '1px solid rgb(169, 169, 169)', boxShadow: '0 2px 4px rgba(0,0,0,0.15)', }; const columnStyle: React.CSSProperties = { width: '60px', maxHeight: '180px', overflowY: 'auto', overflowX: 'hidden', borderRight: '1px solid rgb(169, 169, 169)', }; const getOptionStyle = (value: number, isHour: boolean): React.CSSProperties => { const isHovered = isHour ? hoveredHour === value : hoveredMinute === value; const isSelected = isHour ? selectedHour === value : selectedMinute === value; return { fontSize: '13.333px', lineHeight: '18px', padding: '0 3px', cursor: 'default', backgroundColor: isSelected ? '#0078D7' : isHovered ? '#0078D7' : 'white', color: (isSelected || isHovered) ? 'white' : 'black', userSelect: 'none', }; }; const hours = Array.from({ length: 24 }, (_, i) => i); const minutes = Array.from({ length: 60 }, (_, i) => i); return (
e.stopPropagation()} > {/* Hours column */}
{hours.map((hour) => (
setHoveredHour(hour)} onMouseLeave={() => setHoveredHour(null)} onClick={() => handleHourSelect(hour)} > {hour.toString().padStart(2, '0')}
))}
{/* Minutes column */}
{minutes.map((minute) => (
setHoveredMinute(minute)} onMouseLeave={() => setHoveredMinute(null)} onClick={() => handleMinuteSelect(minute)} > {minute.toString().padStart(2, '0')}
))}
); }; export default TimePicker; ================================================ FILE: src/components/proxy/ProxyForm.tsx ================================================ import React, { useState, useEffect } from 'react'; import { Alert, AlertTitle, TextField, Button, Switch, FormControlLabel, Box, Typography, Table, TableContainer, TableHead, TableRow, TableBody, TableCell, Paper } from '@mui/material'; import { sendProxyConfig, getProxyConfig, testProxyConfig, deleteProxyConfig } from '../../api/proxy'; import { useGlobalInfoStore } from '../../context/globalInfo'; import { useTranslation } from 'react-i18next'; const ProxyForm: React.FC = () => { const { t } = useTranslation(); const [proxyConfigForm, setProxyConfigForm] = useState({ server_url: '', username: '', password: '', }); const [requiresAuth, setRequiresAuth] = useState(false); const [errors, setErrors] = useState({ server_url: '', username: '', password: '', }); const [isProxyConfigured, setIsProxyConfigured] = useState(false); const [proxy, setProxy] = useState({ proxy_url: '', auth: false }); const { notify } = useGlobalInfoStore(); const validateForm = () => { let valid = true; let errorMessages = { server_url: '', username: '', password: '' }; if (!proxyConfigForm.server_url) { errorMessages.server_url = 'Server URL is required'; valid = false; } if (requiresAuth) { if (!proxyConfigForm.username) { errorMessages.username = 'Username is required for authenticated proxies'; valid = false; } if (!proxyConfigForm.password) { errorMessages.password = 'Password is required for authenticated proxies'; valid = false; } } setErrors(errorMessages); return valid; }; const handleChange = (e: React.ChangeEvent) => { const { name, value } = e.target; setProxyConfigForm({ ...proxyConfigForm, [name]: value }); }; const handleAuthToggle = (e: React.ChangeEvent) => { setRequiresAuth(e.target.checked); if (!e.target.checked) { setProxyConfigForm({ ...proxyConfigForm, username: '', password: '' }); setErrors({ ...errors, username: '', password: '' }); } }; const handleSubmit = async (e: React.FormEvent) => { e.preventDefault(); if (!validateForm()) { return; } try { const response = await sendProxyConfig(proxyConfigForm); if (response) { setIsProxyConfigured(true); setProxy({ proxy_url: proxyConfigForm.server_url, auth: requiresAuth }); notify('success', t('proxy.notifications.config_success')); fetchProxyConfig(); } else { notify('error', t('proxy.notifications.config_error')); console.log(`${t('proxy.notifications.config_error')} ${response}`) } } catch (error: any) { notify('error', `${error} : ${t('proxy.notifications.config_error')}`); } }; const testProxy = async () => { await testProxyConfig().then((response) => { if (response.success) { notify('success', t('proxy.notifications.test_success')); } else { notify('error', t('proxy.notifications.test_error')); } }); }; const fetchProxyConfig = async () => { try { const response = await getProxyConfig(); if (response.proxy_url) { setIsProxyConfigured(true); setProxy(response); notify('success', t('proxy.notifications.fetch_success')); } } catch (error: any) { notify('error', error); } }; const removeProxy = async () => { await deleteProxyConfig().then((response) => { if (response) { notify('success', t('proxy.notifications.remove_success')); setIsProxyConfigured(false); setProxy({ proxy_url: '', auth: false }); } else { notify('error', t('proxy.notifications.remove_error')); } }); } useEffect(() => { fetchProxyConfig(); }, []); return ( {t('proxy.title')} { isProxyConfigured ? ( {t('proxy.table.proxy_url')} {t('proxy.table.requires_auth')} {proxy.proxy_url} {proxy.auth ? 'Yes' : 'No'}
) : ( {errors.server_url || t('proxy.server_url_helper')} } /> } label={t('proxy.requires_auth')} /> {requiresAuth && ( <> )} )}
{t('proxy.alert.title')}
{t('proxy.alert.right_way')}
{t('proxy.alert.proxy_url')} http://proxy.com:1337
{t('proxy.alert.username')} myusername
{t('proxy.alert.password')} mypassword

{t('proxy.alert.wrong_way')}
{t('proxy.alert.proxy_url')} http://myusername:mypassword@proxy.com:1337
); }; export default ProxyForm; ================================================ FILE: src/components/recorder/DOMBrowserRenderer.tsx ================================================ import React, { useCallback, useContext, useEffect, useState, useRef, } from "react"; import { useSocketStore } from "../../context/socket"; import { useGlobalInfoStore } from "../../context/globalInfo"; import { AuthContext } from "../../context/auth"; import { Replayer } from "rrweb" import { ActionType, clientSelectorGenerator, } from "../../helpers/clientSelectorGenerator"; interface ElementInfo { tagName: string; hasOnlyText?: boolean; isIframeContent?: boolean; isShadowRoot?: boolean; innerText?: string; url?: string; imageUrl?: string; attributes?: Record; innerHTML?: string; outerHTML?: string; isDOMMode?: boolean; } interface RRWebDOMBrowserRendererProps { width: number; height: number; getList?: boolean; getText?: boolean; listSelector?: string | null; cachedChildSelectors?: string[]; paginationMode?: boolean; paginationSelector?: string; paginationType?: string; limitMode?: boolean; isCachingChildSelectors?: boolean; onHighlight?: (data: { rect: DOMRect; selector: string; isShadow?: boolean; elementInfo: ElementInfo | null; childSelectors?: string[]; groupInfo?: any; similarElements?: any; }) => void; onElementSelect?: (data: { rect: DOMRect; selector: string; isShadow?: boolean; elementInfo: ElementInfo | null; childSelectors?: string[]; groupInfo?: any; }) => void; onShowDatePicker?: (info: { coordinates: { x: number; y: number }; selector: string; }) => void; onShowDropdown?: (info: { coordinates: { x: number; y: number }; selector: string; options: Array<{ value: string; text: string; disabled: boolean; selected: boolean; }>; }) => void; onShowTimePicker?: (info: { coordinates: { x: number; y: number }; selector: string; }) => void; onShowDateTimePicker?: (info: { coordinates: { x: number; y: number }; selector: string; }) => void; } /** * Walks up the DOM from `element` looking for the first ancestor (up to but * not including `root`) that has a scrollable overflow axis. Used for the * optimistic-scroll path so we scroll the right container immediately rather * than always scrolling the viewport. */ function findScrollableAncestor(element: Element, root: Element): Element | null { let el: Element | null = element; while (el && el !== root) { try { const win = el.ownerDocument?.defaultView; if (!win) break; const style = win.getComputedStyle(el); const oy = style.overflowY; const ox = style.overflowX; if ( ((oy === 'auto' || oy === 'scroll') && el.scrollHeight > el.clientHeight) || ((ox === 'auto' || ox === 'scroll') && el.scrollWidth > el.clientWidth) ) { return el; } } catch { break; } el = el.parentElement; } return null; } export const DOMBrowserRenderer: React.FC = ({ width, height, getList = false, getText = false, listSelector = null, cachedChildSelectors = [], paginationMode = false, paginationSelector = "", paginationType = "", limitMode = false, isCachingChildSelectors = false, onHighlight, onElementSelect, onShowDatePicker, onShowDropdown, onShowTimePicker, onShowDateTimePicker, }) => { const containerRef = useRef(null); const replayerIframeRef = useRef(null); const replayerRef = useRef(null); const iframeRef = useRef(null); const [isRendered, setIsRendered] = useState(false); const [lastMousePosition, setLastMousePosition] = useState({ x: 0, y: 0 }); const [currentHighlight, setCurrentHighlight] = useState<{ element: Element; rect: DOMRect; selector: string; elementInfo: ElementInfo; childSelectors?: string[]; } | null>(null); const { socket } = useSocketStore(); const { setLastAction, lastAction } = useGlobalInfoStore(); const { state } = useContext(AuthContext); const { user } = state; const MOUSE_MOVE_THROTTLE = 16; const lastMouseMoveTime = useRef(0); const lastScrollEmitTime = useRef(0); const pendingScrollDelta = useRef({ deltaX: 0, deltaY: 0 }); const isUserScrollingRef = useRef(false); const userScrollDebounceRef = useRef | null>(null); const lastDroppedScrollEventRef = useRef(null); const notifyLastAction = (action: string) => { if (lastAction !== action) { setLastAction(action); } }; const isInCaptureMode = getText || getList; useEffect(() => { clientSelectorGenerator.setGetList(getList); clientSelectorGenerator.setListSelector(listSelector || ""); clientSelectorGenerator.setPaginationMode(paginationMode); }, [getList, listSelector, paginationMode]); useEffect(() => { if (listSelector) { clientSelectorGenerator.setListSelector(listSelector); clientSelectorGenerator.setGetList(getList); clientSelectorGenerator.setPaginationMode(paginationMode); } }, [listSelector, getList, paginationMode]); /** * Handle client-side highlighting for DOM mode using complete backend logic */ const handleDOMHighlighting = useCallback( (x: number, y: number, iframeDoc: Document) => { try { if (!getText && !getList) { setCurrentHighlight(null); if (onHighlight) { onHighlight({ rect: new DOMRect(0, 0, 0, 0), selector: "", elementInfo: null, }); } return; } const highlighterData = clientSelectorGenerator.generateDataForHighlighter( { x, y }, iframeDoc, true, cachedChildSelectors ); if (!highlighterData) { setCurrentHighlight(null); if (onHighlight) { onHighlight({ rect: new DOMRect(0, 0, 0, 0), selector: "", elementInfo: null, }); } return; } const { rect, selector, elementInfo, childSelectors, groupInfo, similarElements, isShadow } = highlighterData; let shouldHighlight = false; if (getList) { if (!listSelector && groupInfo?.isGroupElement) { shouldHighlight = true; } else if (listSelector) { if (limitMode) { shouldHighlight = false; } else if ( paginationMode && paginationSelector && paginationType !== "" && !["none", "scrollDown", "scrollUp"].includes(paginationType) ) { shouldHighlight = false; } else if ( paginationMode && !paginationSelector && paginationType !== "" && !["none", "scrollDown", "scrollUp"].includes(paginationType) ) { shouldHighlight = true; } else if (childSelectors && childSelectors.length > 0) { shouldHighlight = true; } else { shouldHighlight = false; } } else { shouldHighlight = true; } } else { shouldHighlight = true; } if (shouldHighlight) { const element = iframeDoc.elementFromPoint(x, y); if (element) { setCurrentHighlight({ element, rect: rect, selector, elementInfo: { ...elementInfo, tagName: elementInfo?.tagName ?? "", isDOMMode: true, }, childSelectors, }); if (onHighlight) { onHighlight({ rect: rect, elementInfo: { ...elementInfo, tagName: elementInfo?.tagName ?? "", isDOMMode: true, }, selector, isShadow, childSelectors, groupInfo, similarElements, }); } } } else { setCurrentHighlight(null); if (onHighlight) { onHighlight({ rect: new DOMRect(0, 0, 0, 0), selector: "", elementInfo: null, }); } } } catch (error) { console.error("Error in DOM highlighting:", error); setCurrentHighlight(null); } }, [ getText, getList, listSelector, paginationMode, paginationSelector, cachedChildSelectors, paginationType, limitMode, onHighlight, ] ); /** * Set up enhanced interaction handlers for DOM mode */ const setupIframeInteractions = useCallback( (iframeDoc: Document) => { const existingHandlers = (iframeDoc as any)._domRendererHandlers; if (existingHandlers) { Object.entries(existingHandlers).forEach(([event, handler]) => { const options: boolean | AddEventListenerOptions = ['wheel', 'touchstart', 'touchmove'].includes(event) ? { passive: false } : false; iframeDoc.removeEventListener(event, handler as EventListener, options); }); } const handlers: { [key: string]: EventListener } = {}; const mouseMoveHandler: EventListener = (e: Event) => { if (e.target && !iframeDoc.contains(e.target as Node)) { return; } if (!isInCaptureMode) { return; } const now = performance.now(); if (now - lastMouseMoveTime.current < MOUSE_MOVE_THROTTLE) { return; } lastMouseMoveTime.current = now; const mouseEvent = e as MouseEvent; const iframeX = mouseEvent.clientX; const iframeY = mouseEvent.clientY; const iframe = replayerIframeRef.current; if (iframe) { const iframeRect = iframe.getBoundingClientRect(); setLastMousePosition({ x: iframeX + iframeRect.left, y: iframeY + iframeRect.top, }); } handleDOMHighlighting(iframeX, iframeY, iframeDoc); notifyLastAction("move"); }; const mouseDownHandler: EventListener = (e: Event) => { if (e.target && !iframeDoc.contains(e.target as Node)) { return; } const mouseEvent = e as MouseEvent; const target = mouseEvent.target as Element; const iframeX = mouseEvent.clientX; const iframeY = mouseEvent.clientY; if (isInCaptureMode) { e.preventDefault(); e.stopPropagation(); if (currentHighlight && onElementSelect) { const highlighterData = clientSelectorGenerator.generateDataForHighlighter( { x: iframeX, y: iframeY }, iframeDoc, true, cachedChildSelectors ); onElementSelect({ rect: currentHighlight.rect, selector: currentHighlight.selector, elementInfo: currentHighlight.elementInfo, isShadow: highlighterData?.isShadow, childSelectors: cachedChildSelectors.length > 0 ? cachedChildSelectors : highlighterData?.childSelectors || [], groupInfo: highlighterData?.groupInfo, }); } notifyLastAction("select element"); return; } const linkElement = target.closest("a[href]") as HTMLAnchorElement; if (linkElement && linkElement.href && socket) { e.preventDefault(); e.stopPropagation(); const href = linkElement.href; const originalTarget = linkElement.target; if (linkElement.target) { linkElement.target = ""; } const originalHref = linkElement.href; linkElement.removeAttribute("href"); setTimeout(() => { try { linkElement.setAttribute("href", originalHref); if (originalTarget) { linkElement.setAttribute("target", originalTarget); } } catch (error) { console.warn("Could not restore link attributes:", error); } }, 100); const isSPALink = href.startsWith('#'); const selector = clientSelectorGenerator.generateSelector( iframeDoc, { x: iframeX, y: iframeY }, ActionType.Click ); const elementInfo = clientSelectorGenerator.getElementInformation( iframeDoc, { x: iframeX, y: iframeY }, clientSelectorGenerator.getCurrentState().listSelector, clientSelectorGenerator.getCurrentState().getList ); if (selector && socket) { socket.emit("dom:click", { selector, userId: user?.id || "unknown", elementInfo, coordinates: undefined, isSPA: isSPALink, }); notifyLastAction( isSPALink ? `SPA navigation to ${href}` : `navigate to ${href}` ); } return; } const selector = clientSelectorGenerator.generateSelector( iframeDoc, { x: iframeX, y: iframeY }, ActionType.Click ); const elementInfo = clientSelectorGenerator.getElementInformation( iframeDoc, { x: iframeX, y: iframeY }, clientSelectorGenerator.getCurrentState().listSelector, clientSelectorGenerator.getCurrentState().getList ); if (selector && elementInfo && socket) { if (elementInfo?.tagName === "SELECT" && elementInfo.innerHTML) { const inputElement = target as HTMLInputElement; inputElement.blur(); const wasDisabled = inputElement.disabled; inputElement.disabled = true; setTimeout(() => { inputElement.disabled = wasDisabled; }, 100); const options = elementInfo.innerHTML .split(" { const valueMatch = optionHtml.match(/value="([^"]*)"/); const textMatch = optionHtml.match(/>([^<]*) { inputElement.disabled = wasDisabled; }, 100); const pickerInfo = { coordinates: { x: iframeX, y: iframeY }, selector, }; switch (inputType) { case "date": case "month": case "week": if (onShowDatePicker) { onShowDatePicker(pickerInfo); } break; case "time": if (onShowTimePicker) { onShowTimePicker(pickerInfo); } break; case "datetime-local": if (onShowDateTimePicker) { onShowDateTimePicker(pickerInfo); } break; } notifyLastAction(`${inputType} picker opened`); return; } } if (elementInfo?.tagName === "INPUT" || elementInfo?.tagName === "TEXTAREA") { const element = target as HTMLElement; const elementRect = element.getBoundingClientRect(); const relativeX = iframeX - elementRect.left; const relativeY = iframeY - elementRect.top; socket.emit("dom:click", { selector, userId: user?.id || "unknown", elementInfo, coordinates: { x: relativeX, y: relativeY }, isSPA: false, }); } else if (elementInfo?.tagName !== "SELECT") { socket.emit("dom:click", { selector, userId: user?.id || "unknown", elementInfo, coordinates: { x: iframeX, y: iframeY }, isSPA: false, }); } } notifyLastAction("click"); }; const mouseUpHandler: EventListener = (e: Event) => { if (e.target && !iframeDoc.contains(e.target as Node)) { return; } if (!isInCaptureMode) { notifyLastAction("release"); } }; const keyDownHandler: EventListener = (e: Event) => { if (e.target && !iframeDoc.contains(e.target as Node)) { return; } const keyboardEvent = e as KeyboardEvent; const target = keyboardEvent.target as HTMLElement; if (!isInCaptureMode && socket) { const iframe = replayerIframeRef.current; if (iframe) { const focusedElement = iframeDoc.activeElement as HTMLElement; let coordinates = { x: 0, y: 0 }; if (focusedElement && focusedElement !== iframeDoc.body) { const rect = focusedElement.getBoundingClientRect(); coordinates = { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }; } else { const iframeRect = iframe.getBoundingClientRect(); coordinates = { x: lastMousePosition.x - iframeRect.left, y: lastMousePosition.y - iframeRect.top }; } const selector = clientSelectorGenerator.generateSelector( iframeDoc, coordinates, ActionType.Keydown ); const elementInfo = clientSelectorGenerator.getElementInformation( iframeDoc, coordinates, clientSelectorGenerator.getCurrentState().listSelector, clientSelectorGenerator.getCurrentState().getList ); if (selector) { socket.emit("dom:keypress", { selector, key: keyboardEvent.key, userId: user?.id || "unknown", inputType: elementInfo?.attributes?.type || "text", }); } } notifyLastAction(`${keyboardEvent.key} typed`); } if ( ["INPUT", "TEXTAREA"].includes(target.tagName) && !isInCaptureMode ) { return; } }; const keyUpHandler: EventListener = (e: Event) => { if (e.target && !iframeDoc.contains(e.target as Node)) { return; } const keyboardEvent = e as KeyboardEvent; if (!isInCaptureMode && socket) { socket.emit("input:keyup", { key: keyboardEvent.key }); } }; const wheelHandler: EventListener = (e: Event) => { if (e.target && !iframeDoc.contains(e.target as Node)) { return; } e.preventDefault(); e.stopPropagation(); if (isCachingChildSelectors) { return; } const wheelEvent = e as WheelEvent; const deltaX = wheelEvent.deltaX; const deltaY = wheelEvent.deltaY; if (Math.abs(deltaX) > 1 || Math.abs(deltaY) > 1) { const target = wheelEvent.target as Element; const scrollable = findScrollableAncestor(target, iframeDoc.documentElement); if (scrollable) { scrollable.scrollBy(deltaX, deltaY); } else { iframeDoc.defaultView?.scrollBy(deltaX, deltaY); } isUserScrollingRef.current = true; if (userScrollDebounceRef.current) clearTimeout(userScrollDebounceRef.current); userScrollDebounceRef.current = setTimeout(() => { isUserScrollingRef.current = false; const lastScroll = lastDroppedScrollEventRef.current; if (lastScroll && replayerRef.current) { try { replayerRef.current.addEvent(lastScroll); } catch (_) {} lastDroppedScrollEventRef.current = null; } }, 500); pendingScrollDelta.current.deltaX += deltaX; pendingScrollDelta.current.deltaY += deltaY; const now = performance.now(); if (now - lastScrollEmitTime.current < 50) return; lastScrollEmitTime.current = now; const accX = pendingScrollDelta.current.deltaX; const accY = pendingScrollDelta.current.deltaY; pendingScrollDelta.current = { deltaX: 0, deltaY: 0 }; if (socket) { socket.emit("dom:scroll", { deltaX: accX, deltaY: accY }); } notifyLastAction("scroll"); } }; const clickHandler: EventListener = (e: Event) => { if (e.target && !iframeDoc.contains(e.target as Node)) { return; } if (isInCaptureMode) { e.preventDefault(); e.stopPropagation(); return; } }; const preventDefaults = (e: Event) => { if (e.target && !iframeDoc.contains(e.target as Node)) { return; } e.preventDefault(); e.stopPropagation(); return false; }; handlers.mousedown = mouseDownHandler; handlers.mouseup = mouseUpHandler; handlers.mousemove = mouseMoveHandler; handlers.wheel = wheelHandler; handlers.keydown = keyDownHandler; handlers.keyup = keyUpHandler; handlers.click = clickHandler; handlers.submit = preventDefaults; handlers.beforeunload = preventDefaults; Object.entries(handlers).forEach(([event, handler]) => { const options: boolean | AddEventListenerOptions = ['wheel', 'touchstart', 'touchmove'].includes(event) ? { passive: false } : false; iframeDoc.addEventListener(event, handler, options); }); (iframeDoc as any)._domRendererHandlers = handlers; const iframe = replayerIframeRef.current; if (iframe) { iframe.tabIndex = 0; } }, [ socket, lastMousePosition, notifyLastAction, handleDOMHighlighting, currentHighlight, onElementSelect, isInCaptureMode, user?.id, onShowDatePicker, onShowDropdown, onShowTimePicker, onShowDateTimePicker, cachedChildSelectors ] ); /** * Cleanup replayer on unmount */ useEffect(() => { return () => { if (replayerRef.current) { replayerRef.current.pause(); replayerRef.current = null; } }; }, []); /** * Listen for rrweb events from backend and add to replayer */ useEffect(() => { if (!socket) { console.warn('No socket available, skipping event listener setup'); return; } const handleRRWebEvent = (event: any) => { if (!replayerRef.current && event.type === 2) { const container = document.getElementById('mirror-container'); if (!container) { console.warn('Container #mirror-container not found'); return; } const replayer = new Replayer([], { root: container, liveMode: true, mouseTail: false }); replayer.startLive(); replayer.addEvent(event); replayerRef.current = replayer; setTimeout(() => { const replayerWrapper = container.querySelector('.replayer-wrapper'); const replayerIframe = replayerWrapper?.querySelector('iframe') as HTMLIFrameElement; if (replayerIframe) { replayerIframe.style.width = '100%'; replayerIframe.style.height = '100%'; replayerIframe.style.border = 'none'; replayerIframe.style.position = 'absolute'; replayerIframe.style.top = '0'; replayerIframe.style.left = '0'; replayerIframe.style.backgroundColor = '#ffffff'; replayerIframe.style.display = 'block'; replayerIframe.style.pointerEvents = 'auto'; replayerIframe.id = 'dom-browser-iframe'; replayerIframeRef.current = replayerIframe; try { const iframeDoc = replayerIframe.contentDocument; if (iframeDoc) { setupIframeInteractions(iframeDoc); } } catch (err) { console.warn('Error accessing iframe:', err); } replayer.on('fullsnapshot-rebuilded', () => { const iframe = replayerIframeRef.current; if (iframe && iframe.contentDocument) { setupIframeInteractions(iframe.contentDocument); iframe.style.pointerEvents = 'auto'; const wrapper = container.querySelector('.replayer-wrapper') as HTMLElement; if(wrapper) wrapper.style.pointerEvents = 'auto'; setIsRendered(true); } }); } else { console.warn('Could not find iframe in replayer-wrapper'); } }, 150); } else if (replayerRef.current) { replayerRef.current.addEvent(event); } }; socket.on('rrweb-event', handleRRWebEvent); socket.emit('request-refresh'); return () => { socket.off('rrweb-event', handleRRWebEvent); }; }, [socket, setupIframeInteractions]); useEffect(() => { const iframe = replayerIframeRef.current; if (iframe && iframe.contentDocument) { setupIframeInteractions(iframe.contentDocument); } }, [setupIframeInteractions]); return (
{!isRendered && ( )}
); }; const DOMLoadingIndicator: React.FC = () => { const [progress, setProgress] = useState(0); const [hasStartedLoading, setHasStartedLoading] = useState(false); const { socket } = useSocketStore(); const { state } = useContext(AuthContext); const { user } = state; useEffect(() => { if (!socket) return; const handleLoadingProgress = (data: { progress: number; pendingRequests: number; userId: string; }) => { if (!data.userId || data.userId === user?.id) { if (!hasStartedLoading && data.progress > 0) { setHasStartedLoading(true); } if (!hasStartedLoading || data.progress >= progress) { setProgress(data.progress); } } }; socket.on("domLoadingProgress", handleLoadingProgress); return () => { socket.off("domLoadingProgress", handleLoadingProgress); }; }, [socket, user?.id, hasStartedLoading, progress]); return (
Loading {progress}%
); }; ================================================ FILE: src/components/recorder/KeyValueForm.tsx ================================================ import React, { forwardRef, useImperativeHandle, useRef } from 'react'; import { KeyValuePair } from "./KeyValuePair"; import { AddButton } from "../ui/buttons/AddButton"; import { RemoveButton } from "../ui/buttons/RemoveButton"; export const KeyValueForm = forwardRef((props, ref) => { const [numberOfPairs, setNumberOfPairs] = React.useState(1); const keyValuePairRefs = useRef<{ getKeyValuePair: () => { key: string, value: string } }[]>([]); useImperativeHandle(ref, () => ({ getObject() { let reducedObject = {}; for (let i = 0; i < numberOfPairs; i++) { const keyValuePair = keyValuePairRefs.current[i]?.getKeyValuePair(); if (keyValuePair) { reducedObject = { ...reducedObject, [keyValuePair.key]: keyValuePair.value } } } return reducedObject; } })); return (
{ new Array(numberOfPairs).fill(1).map((_, index) => { return keyValuePairRefs.current[index] = el} /> }) } setNumberOfPairs(numberOfPairs + 1)} hoverEffect={false} /> setNumberOfPairs(numberOfPairs - 1)} />
); }); ================================================ FILE: src/components/recorder/KeyValuePair.tsx ================================================ import React, { forwardRef, useImperativeHandle } from "react"; import { Box, TextField } from "@mui/material"; interface KeyValueFormProps { keyLabel?: string; valueLabel?: string; } export const KeyValuePair = forwardRef(({ keyLabel, valueLabel }: KeyValueFormProps, ref) => { const [key, setKey] = React.useState(''); const [value, setValue] = React.useState(''); useImperativeHandle(ref, () => ({ getKeyValuePair() { return { key, value }; } })); return ( :not(style)': { m: 1, width: '100px' }, }} noValidate autoComplete="off" > ) => setKey(event.target.value)} size="small" required /> ) => { const num = Number(event.target.value); if (isNaN(num)) { setValue(event.target.value); } else { setValue(num); } }} size="small" required /> ); }); ================================================ FILE: src/components/recorder/RightSidePanel.tsx ================================================ import React, { useState, useCallback, useEffect, useRef, useMemo } from 'react'; import { generateUUID } from '../../helpers/uuid'; import { Button, Paper, Box, TextField, IconButton, Tooltip } from "@mui/material"; import { WorkflowFile } from "maxun-core"; import Typography from "@mui/material/Typography"; import { useGlobalInfoStore } from "../../context/globalInfo"; import { PaginationType, useActionContext, LimitType } from '../../context/browserActions'; import { BrowserStep, useBrowserSteps } from '../../context/browserSteps'; import { useSocketStore } from '../../context/socket'; import FormControlLabel from '@mui/material/FormControlLabel'; import FormControl from '@mui/material/FormControl'; import Radio from '@mui/material/Radio'; import RadioGroup from '@mui/material/RadioGroup'; import { getActiveWorkflow } from "../../api/workflow"; import ActionDescriptionBox from '../action/ActionDescriptionBox'; import { useThemeMode } from '../../context/theme-provider'; import { useTranslation } from 'react-i18next'; import { useBrowserDimensionsStore } from '../../context/browserDimensions'; import { clientListExtractor } from '../../helpers/clientListExtractor'; import { clientSelectorGenerator } from '../../helpers/clientSelectorGenerator'; import { clientPaginationDetector } from '../../helpers/clientPaginationDetector'; const fetchWorkflow = (id: string, callback: (response: WorkflowFile) => void) => { getActiveWorkflow(id).then( (response) => { if (response) { callback(response); } else { throw new Error("No workflow found"); } } ).catch((error) => { console.log(`Failed to fetch workflow:`, error.message) }) }; interface RightSidePanelProps { onFinishCapture: () => void; } export const RightSidePanel: React.FC = ({ onFinishCapture }) => { const [showCaptureList, setShowCaptureList] = useState(true); const [showCaptureScreenshot, setShowCaptureScreenshot] = useState(true); const [showCaptureText, setShowCaptureText] = useState(true); const { panelHeight } = useBrowserDimensionsStore(); const [autoDetectedPagination, setAutoDetectedPagination] = useState<{ type: PaginationType; selector: string | null; confidence: 'high' | 'medium' | 'low'; } | null>(null); const autoDetectionRunRef = useRef(null); const { notify, currentWorkflowActionsState, setCurrentWorkflowActionsState, resetInterpretationLog, currentListActionId, setCurrentListActionId, currentTextActionId, setCurrentTextActionId, currentScreenshotActionId, setCurrentScreenshotActionId, isDOMMode, updateDOMMode, currentTextGroupName } = useGlobalInfoStore(); const { getText, startGetText, stopGetText, getList, startGetList, stopGetList, getScreenshot, startGetScreenshot, stopGetScreenshot, startPaginationMode, stopPaginationMode, paginationType, updatePaginationType, limitType, customLimit, updateLimitType, updateCustomLimit, stopLimitMode, startLimitMode, captureStage, setCaptureStage, showPaginationOptions, setShowPaginationOptions, showLimitOptions, setShowLimitOptions, workflow, setWorkflow, activeAction, setActiveAction, finishAction } = useActionContext(); const { browserSteps, addScreenshotStep, updateListStepLimit, updateListStepPagination, deleteStepsByActionId, updateListStepData, updateScreenshotStepData, emitActionForStep } = useBrowserSteps(); const { id, socket } = useSocketStore(); const { t } = useTranslation(); const isAnyActionActive = activeAction !== 'none'; const workflowHandler = useCallback((data: WorkflowFile) => { setWorkflow(data); }, [setWorkflow]); useEffect(() => { if (!paginationType || !currentListActionId) return; const currentListStep = browserSteps.find( step => step.type === 'list' && step.actionId === currentListActionId ) as (BrowserStep & { type: 'list' }) | undefined; const currentSelector = currentListStep?.pagination?.selector; const currentType = currentListStep?.pagination?.type; if (['clickNext', 'clickLoadMore'].includes(paginationType)) { const needsSelector = !currentSelector && !currentType; const typeChanged = currentType && currentType !== paginationType; if (typeChanged) { const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement; if (iframeElement?.contentDocument && currentSelector) { try { function evaluateSelector(selector: string, doc: Document): Element[] { if (selector.startsWith('//') || selector.startsWith('(//')) { try { const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); const elements: Element[] = []; for (let i = 0; i < result.snapshotLength; i++) { const node = result.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { elements.push(node as Element); } } return elements; } catch (err) { return []; } } else { try { return Array.from(doc.querySelectorAll(selector)); } catch (err) { return []; } } } const elements = evaluateSelector(currentSelector, iframeElement.contentDocument); elements.forEach((el: Element) => { (el as HTMLElement).style.outline = ''; (el as HTMLElement).style.outlineOffset = ''; (el as HTMLElement).style.zIndex = ''; }); } catch (error) { console.error('Error removing pagination highlight:', error); } } if (currentListStep) { updateListStepPagination(currentListStep.id, { type: paginationType, selector: null, }); } startPaginationMode(); } else if (needsSelector) { startPaginationMode(); } } }, [paginationType, currentListActionId, browserSteps, updateListStepPagination, startPaginationMode]); useEffect(() => { if (socket) { const domModeHandler = (data: any) => { if (!data.userId || data.userId === id) { updateDOMMode(true); } }; socket.on("dom-mode-enabled", domModeHandler); return () => { socket.off("dom-mode-enabled", domModeHandler); }; } }, [socket, id, updateDOMMode]); useEffect(() => { if (socket) { socket.on("workflow", workflowHandler); } if (id) { fetchWorkflow(id, workflowHandler); } let interval = setInterval(() => { if (id) { fetchWorkflow(id, workflowHandler); } }, (1000 * 60 * 15)); return () => { socket?.off("workflow", workflowHandler); clearInterval(interval); }; }, [id, socket, workflowHandler]); useEffect(() => { const hasPairs = workflow.workflow.length > 0; if (!hasPairs) { setShowCaptureList(true); setShowCaptureScreenshot(true); setShowCaptureText(true); return; } const hasScrapeListAction = workflow.workflow.some(pair => pair.what.some(action => action.action === 'scrapeList') ); const hasScreenshotAction = workflow.workflow.some(pair => pair.what.some(action => action.action === 'screenshot') ); const hasScrapeSchemaAction = workflow.workflow.some(pair => pair.what.some(action => action.action === 'scrapeSchema') ); setCurrentWorkflowActionsState({ hasScrapeListAction, hasScreenshotAction, hasScrapeSchemaAction, }); setShowCaptureList(true); setShowCaptureScreenshot(true); setShowCaptureText(true); }, [workflow, setCurrentWorkflowActionsState]); useEffect(() => { if (socket) { socket.on('listDataExtracted', (response) => { if (!isDOMMode) { const { currentListId, data } = response; updateListStepData(currentListId, data); } }); } return () => { socket?.off('listDataExtracted'); }; }, [socket, updateListStepData, isDOMMode]); useEffect(() => { if (socket) { const handleDirectScreenshot = (data: any) => { const screenshotSteps = browserSteps.filter(step => step.type === 'screenshot' && step.actionId === currentScreenshotActionId ); if (screenshotSteps.length > 0) { const latestStep = screenshotSteps[screenshotSteps.length - 1]; updateScreenshotStepData(latestStep.id, data.screenshot); emitActionForStep(latestStep); } setCurrentScreenshotActionId(''); }; socket.on('directScreenshotCaptured', handleDirectScreenshot); return () => { socket.off('directScreenshotCaptured', handleDirectScreenshot); }; } }, [socket, id, notify, t, currentScreenshotActionId, updateScreenshotStepData, setCurrentScreenshotActionId, emitActionForStep, browserSteps]); const extractDataClientSide = useCallback( ( listSelector: string, fields: Record, currentListId: number ) => { if (isDOMMode) { try { let iframeElement = document.querySelector( "#dom-browser-iframe" ) as HTMLIFrameElement; if (!iframeElement) { iframeElement = document.querySelector( "#browser-window iframe" ) as HTMLIFrameElement; } if (!iframeElement) { const browserWindow = document.querySelector("#browser-window"); if (browserWindow) { iframeElement = browserWindow.querySelector( "iframe" ) as HTMLIFrameElement; } } if (!iframeElement) { console.error( "Could not find the DOM iframe element for extraction" ); return; } const iframeDoc = iframeElement.contentDocument; if (!iframeDoc) { console.error("Failed to get iframe document"); return; } const extractedData = clientListExtractor.extractListData( iframeDoc, listSelector, fields, 5 ); updateListStepData(currentListId, extractedData); if (extractedData.length === 0) { console.warn("⚠️ No data extracted - this might indicate selector issues"); notify("warning", "No data was extracted. Please verify your selections."); } } catch (error) { console.error("Error in client-side data extraction:", error); notify("error", "Failed to extract data client-side"); } } }, [isDOMMode, updateListStepData, socket, notify, currentWorkflowActionsState] ); useEffect(() => { if (!getList) return; const currentListStep = browserSteps.find( step => step.type === 'list' && step.actionId === currentListActionId ) as (BrowserStep & { type: 'list'; listSelector?: string; fields?: Record }) | undefined; if (!currentListStep || !currentListStep.listSelector || !currentListStep.fields) return; const fieldCount = Object.keys(currentListStep.fields).length; if (fieldCount > 0) { extractDataClientSide( currentListStep.listSelector, currentListStep.fields, currentListStep.id ); setCurrentWorkflowActionsState({ ...currentWorkflowActionsState, hasScrapeListAction: true }); } }, [browserSteps, currentListActionId, getList, extractDataClientSide, setCurrentWorkflowActionsState, currentWorkflowActionsState]); const handleStartGetText = () => { const newActionId = `text-${generateUUID()}`; setCurrentTextActionId(newActionId); startGetText(); } const handleStartGetList = () => { const newActionId = `list-${generateUUID()}`; setCurrentListActionId(newActionId); startGetList(); } const handleStartGetScreenshot = () => { const newActionId = `screenshot-${generateUUID()}`; setCurrentScreenshotActionId(newActionId); startGetScreenshot(); }; const stopCaptureAndEmitGetTextSettings = useCallback(() => { const currentTextActionStep = browserSteps.find(step => step.type === 'text' && step.actionId === currentTextActionId); if (!currentTextActionStep) { notify('error', t('right_panel.errors.no_text_captured')); return; } stopGetText(); if (currentTextActionStep) { emitActionForStep(currentTextActionStep); } setCurrentTextActionId(''); resetInterpretationLog(); finishAction('text'); onFinishCapture(); clientSelectorGenerator.cleanup(); }, [stopGetText, socket, browserSteps, resetInterpretationLog, finishAction, notify, onFinishCapture, t, currentTextActionId, currentTextGroupName, emitActionForStep]); const resetListState = useCallback(() => { setShowPaginationOptions(false); updatePaginationType(''); setShowLimitOptions(false); updateLimitType(''); updateCustomLimit(''); }, [updatePaginationType, updateLimitType, updateCustomLimit]); const handleStopGetList = useCallback(() => { stopGetList(); resetListState(); }, [stopGetList, resetListState]); const stopCaptureAndEmitGetListSettings = useCallback(() => { if (autoDetectedPagination?.selector) { const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement; if (iframeElement?.contentDocument) { try { function evaluateSelector(selector: string, doc: Document): Element[] { if (selector.startsWith('//') || selector.startsWith('(//')) { try { const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); const elements: Element[] = []; for (let i = 0; i < result.snapshotLength; i++) { const node = result.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { elements.push(node as Element); } } return elements; } catch (err) { return []; } } else { try { return Array.from(doc.querySelectorAll(selector)); } catch (err) { return []; } } } const elements = evaluateSelector(autoDetectedPagination.selector, iframeElement.contentDocument); elements.forEach((el: Element) => { (el as HTMLElement).style.outline = ''; (el as HTMLElement).style.outlineOffset = ''; (el as HTMLElement).style.zIndex = ''; }); } catch (error) { console.error('Error removing pagination highlight on completion:', error); } } } const latestListStep = getLatestListStep(browserSteps); if (latestListStep) { extractDataClientSide(latestListStep.listSelector!, latestListStep.fields, latestListStep.id); setCurrentWorkflowActionsState({ ...currentWorkflowActionsState, hasScrapeListAction: true }); emitActionForStep(latestListStep); handleStopGetList(); setCurrentListActionId(''); resetInterpretationLog(); finishAction('list'); onFinishCapture(); clientSelectorGenerator.cleanup(); } else { notify('error', t('right_panel.errors.unable_create_settings')); handleStopGetList(); setCurrentListActionId(''); resetInterpretationLog(); finishAction('list'); onFinishCapture(); clientSelectorGenerator.cleanup(); } }, [socket, notify, handleStopGetList, resetInterpretationLog, finishAction, onFinishCapture, t, browserSteps, extractDataClientSide, setCurrentWorkflowActionsState, currentWorkflowActionsState, emitActionForStep, autoDetectedPagination]); const getLatestListStep = (steps: BrowserStep[]) => { const listSteps = steps.filter(step => step.type === 'list'); if (listSteps.length === 0) return null; return listSteps.sort((a, b) => b.id - a.id)[0]; }; const handleConfirmListCapture = useCallback(() => { switch (captureStage) { case 'initial': const hasValidListSelectorForCurrentAction = browserSteps.some(step => step.type === 'list' && step.actionId === currentListActionId && step.listSelector && Object.keys(step.fields).length > 0 ); if (!hasValidListSelectorForCurrentAction) { notify('error', t('right_panel.errors.capture_list_first')); return; } const currentListStepForAutoDetect = browserSteps.find( step => step.type === 'list' && step.actionId === currentListActionId ) as (BrowserStep & { type: 'list'; listSelector?: string }) | undefined; if (currentListStepForAutoDetect?.listSelector) { if (autoDetectionRunRef.current !== currentListActionId) { autoDetectionRunRef.current = currentListActionId; notify('info', 'Detecting pagination...'); try { socket?.emit('testPaginationScroll', { listSelector: currentListStepForAutoDetect.listSelector }); const handleScrollTestResult = (result: any) => { if (result.success && result.contentLoaded) { notify("success", "Scroll Down pagination has been auto-detected."); setAutoDetectedPagination({ type: 'scrollDown', selector: null, confidence: 'high' }); updatePaginationType('scrollDown'); const latestListStep = browserSteps.find( step => step.type === 'list' && step.actionId === currentListActionId ); if (latestListStep) { updateListStepPagination(latestListStep.id, { type: 'scrollDown', selector: null, isShadow: false }); } } else if (result.success && !result.contentLoaded) { const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement; const iframeDoc = iframeElement?.contentDocument; if (iframeDoc) { const detectionResult = clientPaginationDetector.autoDetectPagination( iframeDoc, currentListStepForAutoDetect.listSelector!, clientSelectorGenerator, { disableScrollDetection: true } ); if (detectionResult.type) { if (detectionResult.type === 'scrollDown') { notify("success", "Scroll Down pagination has been auto-detected."); } else if (detectionResult.type === 'scrollUp') { notify("success", "Scroll Up pagination has been auto-detected."); } setAutoDetectedPagination({ type: detectionResult.type, selector: detectionResult.selector, confidence: detectionResult.confidence }); const latestListStep = browserSteps.find( step => step.type === 'list' && step.actionId === currentListActionId ); if (latestListStep) { updateListStepPagination(latestListStep.id, { type: detectionResult.type, selector: detectionResult.selector, isShadow: false }); } updatePaginationType(detectionResult.type); if (detectionResult.selector && (detectionResult.type === 'clickNext' || detectionResult.type === 'clickLoadMore')) { try { function evaluateSelector(selector: string, doc: Document): Element[] { try { const isXPath = selector.startsWith('//') || selector.startsWith('(//'); if (isXPath) { const result = doc.evaluate( selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ); const elements: Element[] = []; for (let i = 0; i < result.snapshotLength; i++) { const node = result.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { elements.push(node as Element); } } return elements; } else { try { const allElements = Array.from(doc.querySelectorAll(selector)); if (allElements.length > 0) { return allElements; } } catch (err) { console.warn('[RightSidePanel] Full chained selector failed, trying individual selectors:', err); } const selectorParts = selector.split(','); for (const part of selectorParts) { try { const elements = Array.from(doc.querySelectorAll(part.trim())); if (elements.length > 0) { return elements; } } catch (err) { console.warn('[RightSidePanel] Selector part failed:', part.trim(), err); continue; } } return []; } } catch (err) { console.error('[RightSidePanel] Selector evaluation failed:', selector, err); return []; } } const elements = evaluateSelector(detectionResult.selector, iframeDoc); if (elements.length > 0) { elements.forEach((el: Element) => { (el as HTMLElement).style.outline = '3px dashed #ff00c3'; (el as HTMLElement).style.outlineOffset = '2px'; (el as HTMLElement).style.zIndex = '9999'; }); const firstElement = elements[0] as HTMLElement; const elementRect = firstElement.getBoundingClientRect(); const iframeWindow = iframeElement.contentWindow; if (iframeWindow) { const targetY = elementRect.top + iframeWindow.scrollY - (iframeWindow.innerHeight / 2) + (elementRect.height / 2); iframeWindow.scrollTo({ top: targetY, behavior: 'smooth' }); } const paginationTypeLabel = detectionResult.type === 'clickNext' ? 'Next Button' : 'Load More Button'; notify('success', `${paginationTypeLabel} has been auto-detected and highlighted on the page`); } else { console.warn(' No elements found for selector:', detectionResult.selector); } } catch (error) { console.error('Error highlighting pagination button:', error); } } } else { notify("warning", "No pagination detected. If present, please manually select."); setAutoDetectedPagination(null); } } } else { console.error('Scroll test failed:', result.error); setAutoDetectedPagination(null); } socket?.off('paginationScrollTestResult', handleScrollTestResult); }; socket?.on('paginationScrollTestResult', handleScrollTestResult); setTimeout(() => { socket?.off('paginationScrollTestResult', handleScrollTestResult); }, 5000); } catch (error) { console.error('Scroll test failed:', error); setAutoDetectedPagination(null); } } } const shouldSkipPaginationMode = autoDetectedPagination && ( ['scrollDown', 'scrollUp'].includes(autoDetectedPagination.type) || (['clickNext', 'clickLoadMore'].includes(autoDetectedPagination.type) && autoDetectedPagination.selector) ); if (!shouldSkipPaginationMode) { startPaginationMode(); } setShowPaginationOptions(true); setCaptureStage('pagination'); break; case 'pagination': if (!paginationType) { notify('error', t('right_panel.errors.select_pagination')); return; } const currentListStepForPagination = browserSteps.find( step => step.type === 'list' && step.actionId === currentListActionId ) as (BrowserStep & { type: 'list' }) | undefined; if (currentListStepForPagination) { const paginationSelector = currentListStepForPagination.pagination?.selector; if (['clickNext', 'clickLoadMore'].includes(paginationType) && !paginationSelector) { notify('error', t('right_panel.errors.select_pagination_element')); return; } } stopPaginationMode(); setShowPaginationOptions(false); startLimitMode(); setShowLimitOptions(true); setCaptureStage('limit'); break; case 'limit': if (!limitType || (limitType === 'custom' && !customLimit)) { notify('error', t('right_panel.errors.select_limit')); return; } const limit = limitType === 'custom' ? parseInt(customLimit) : parseInt(limitType); if (isNaN(limit) || limit <= 0) { notify('error', t('right_panel.errors.invalid_limit')); return; } const latestListStep = getLatestListStep(browserSteps); if (latestListStep) { updateListStepLimit(latestListStep.id, limit); } stopLimitMode(); setShowLimitOptions(false); stopCaptureAndEmitGetListSettings(); setCaptureStage('complete'); break; case 'complete': setCaptureStage('initial'); break; } }, [captureStage, paginationType, limitType, customLimit, startPaginationMode, setShowPaginationOptions, setCaptureStage, notify, stopPaginationMode, startLimitMode, setShowLimitOptions, stopLimitMode, stopCaptureAndEmitGetListSettings, t, browserSteps, currentListActionId, updateListStepLimit]); const handleBackCaptureList = useCallback(() => { switch (captureStage) { case 'limit': stopLimitMode(); setShowLimitOptions(false); startPaginationMode(); setShowPaginationOptions(true); setCaptureStage('pagination'); break; case 'pagination': stopPaginationMode(); setShowPaginationOptions(false); setAutoDetectedPagination(null); setCaptureStage('initial'); break; } }, [captureStage, stopLimitMode, startPaginationMode, stopPaginationMode]); const handlePaginationSettingSelect = (option: PaginationType) => { updatePaginationType(option); }; const discardGetText = useCallback(() => { stopGetText(); if (currentTextActionId) { deleteStepsByActionId(currentTextActionId); if (socket) { socket.emit('removeAction', { actionId: currentTextActionId }); } } setCurrentTextActionId(''); clientSelectorGenerator.cleanup(); notify('error', t('right_panel.errors.capture_text_discarded')); }, [currentTextActionId, browserSteps, stopGetText, deleteStepsByActionId, notify, t, socket]); const discardGetList = useCallback(() => { stopGetList(); if (currentListActionId) { deleteStepsByActionId(currentListActionId); if (socket) { socket.emit('removeAction', { actionId: currentListActionId }); } } if (autoDetectedPagination?.selector) { const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement; if (iframeElement?.contentDocument) { try { function evaluateSelector(selector: string, doc: Document): Element[] { if (selector.startsWith('//') || selector.startsWith('(//')) { try { const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); const elements: Element[] = []; for (let i = 0; i < result.snapshotLength; i++) { const node = result.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { elements.push(node as Element); } } return elements; } catch (err) { return []; } } else { try { return Array.from(doc.querySelectorAll(selector)); } catch (err) { return []; } } } const elements = evaluateSelector(autoDetectedPagination.selector, iframeElement.contentDocument); elements.forEach((el: Element) => { (el as HTMLElement).style.outline = ''; (el as HTMLElement).style.outlineOffset = ''; (el as HTMLElement).style.zIndex = ''; }); } catch (error) { console.error('Error removing pagination highlight on discard:', error); } } } resetListState(); stopPaginationMode(); stopLimitMode(); setShowPaginationOptions(false); setShowLimitOptions(false); setAutoDetectedPagination(null); setCaptureStage('initial'); setCurrentListActionId(''); clientSelectorGenerator.cleanup(); notify('error', t('right_panel.errors.capture_list_discarded')); }, [currentListActionId, browserSteps, stopGetList, deleteStepsByActionId, resetListState, setShowPaginationOptions, setShowLimitOptions, setCaptureStage, notify, t, stopPaginationMode, stopLimitMode, socket, autoDetectedPagination]); const captureScreenshot = (fullPage: boolean) => { const screenshotCount = browserSteps.filter(s => s.type === 'screenshot').length + 1; const screenshotName = `Screenshot ${screenshotCount}`; const screenshotSettings = { fullPage, type: 'png' as const, timeout: 30000, animations: 'allow' as const, caret: 'hide' as const, scale: 'device' as const, name: screenshotName, actionId: currentScreenshotActionId }; socket?.emit('captureDirectScreenshot', screenshotSettings); addScreenshotStep(fullPage, currentScreenshotActionId); stopGetScreenshot(); resetInterpretationLog(); finishAction('screenshot'); onFinishCapture(); clientSelectorGenerator.cleanup(); }; const theme = useThemeMode(); const isDarkMode = theme.darkMode; return ( {!isAnyActionActive && ( <> {showCaptureList && ( )} {showCaptureText && ( )} {showCaptureScreenshot && ( )} )} {getList && ( {(captureStage === 'pagination' || captureStage === 'limit') && ( )} {showPaginationOptions && ( {t('right_panel.pagination.title')} {autoDetectedPagination && autoDetectedPagination.type !== '' && ( ✓ Auto-detected: { autoDetectedPagination.type === 'clickNext' ? 'Click Next' : autoDetectedPagination.type === 'clickLoadMore' ? 'Click Load More' : autoDetectedPagination.type === 'scrollDown' ? 'Scroll Down' : autoDetectedPagination.type === 'scrollUp' ? 'Scroll Up' : autoDetectedPagination.type } You can continue with this or manually select a different pagination type below. {autoDetectedPagination.selector && ['clickNext', 'clickLoadMore'].includes(autoDetectedPagination.type) && ( )} )} )} {showLimitOptions && ( {t('right_panel.limit.title')} updateLimitType(e.target.value as LimitType)} sx={{ display: 'flex', flexDirection: 'column', width: '100%', }} > } label="10" /> } label="100" />
} label={t('right_panel.limit.custom')} /> {limitType === 'custom' && ( ) => { const value = parseInt(e.target.value); if (e.target.value === '' || value >= 1) { updateCustomLimit(e.target.value); } }} inputProps={{ min: 1, onKeyPress: (e: React.KeyboardEvent) => { const value = (e.target as HTMLInputElement).value + e.key; if (parseInt(value) < 1) { e.preventDefault(); } } }} placeholder={t('right_panel.limit.enter_number')} sx={{ marginLeft: '10px', '& input': { padding: '10px', }, width: '150px', background: isDarkMode ? "#1E2124" : 'white', color: isDarkMode ? "white" : 'black', }} /> )}
)}
)} {getText && ( )} {getScreenshot && ( )}
); }; ================================================ FILE: src/components/recorder/SaveRecording.tsx ================================================ import React, { useCallback, useEffect, useState, useContext } from 'react'; import { Button, Box, LinearProgress, Tooltip } from "@mui/material"; import { GenericModal } from "../ui/GenericModal"; import { stopRecording } from "../../api/recording"; import { useGlobalInfoStore } from "../../context/globalInfo"; import { AuthContext } from '../../context/auth'; import { useSocketStore } from "../../context/socket"; import { TextField, Typography } from "@mui/material"; import { WarningText } from "../ui/texts"; import NotificationImportantIcon from "@mui/icons-material/NotificationImportant"; import { useNavigate } from 'react-router-dom'; import { useTranslation } from 'react-i18next'; interface SaveRecordingProps { fileName: string; } export const SaveRecording = ({ fileName }: SaveRecordingProps) => { const { t } = useTranslation(); const [openModal, setOpenModal] = useState(false); const [needConfirm, setNeedConfirm] = useState(false); const [saveRecordingName, setSaveRecordingName] = useState(fileName); const [waitingForSave, setWaitingForSave] = useState(false); const { browserId, setBrowserId, notify, recordings, isLogin, recordingName, retrainRobotId, currentWorkflowActionsState } = useGlobalInfoStore(); const { socket } = useSocketStore(); const { state, dispatch } = useContext(AuthContext); const { user } = state; const navigate = useNavigate(); useEffect(() => { if (recordingName) { setSaveRecordingName(recordingName); } }, [recordingName]); const handleChangeOfTitle = (event: React.ChangeEvent) => { const { value } = event.target; if (needConfirm) { setNeedConfirm(false); } setSaveRecordingName(value); } const handleSaveRecording = async (event: React.SyntheticEvent) => { event.preventDefault(); if (recordings.includes(saveRecordingName)) { if (needConfirm) { return; } setNeedConfirm(true); } else { await saveRecording(); } }; const handleFinishClick = () => { const { hasScrapeListAction, hasScreenshotAction, hasScrapeSchemaAction } = currentWorkflowActionsState; const hasAnyAction = hasScrapeListAction || hasScreenshotAction || hasScrapeSchemaAction; if (!hasAnyAction) { notify('warning', t('save_recording.errors.no_actions_performed')); return; } if (recordingName && !recordings.includes(recordingName)) { saveRecording(); } else { setOpenModal(true); } }; const exitRecording = useCallback(async (data?: { actionType: string }) => { let successMessage = t('save_recording.notifications.save_success'); if (data && data.actionType) { if (data.actionType === 'retrained') { successMessage = t('save_recording.notifications.retrain_success'); } else if (data.actionType === 'saved') { successMessage = t('save_recording.notifications.save_success'); } else if (data.actionType === 'error') { successMessage = t('save_recording.notifications.save_error'); } } const notificationData = { type: data?.actionType === 'error' ? 'error' : 'success', message: successMessage, timestamp: Date.now() }; window.sessionStorage.setItem('pendingNotification', JSON.stringify(notificationData)); if (window.opener) { window.opener.postMessage({ type: 'recording-notification', notification: notificationData }, '*'); window.opener.postMessage({ type: 'session-data-clear', timestamp: Date.now() }, '*'); } if (browserId) { await stopRecording(browserId); } setBrowserId(null); window.close(); }, [setBrowserId, browserId, t]); // notifies backed to save the recording in progress, // releases resources and changes the view for main page by clearing the global browserId const saveRecording = async () => { if (user) { const { hasScrapeListAction, hasScreenshotAction, hasScrapeSchemaAction } = currentWorkflowActionsState; const hasAnyAction = hasScrapeListAction || hasScreenshotAction || hasScrapeSchemaAction; if (!hasAnyAction) { notify('warning', t('save_recording.errors.no_actions_performed')); return; } const payload = { fileName: saveRecordingName || recordingName, userId: user.id, isLogin: isLogin, robotId: retrainRobotId, }; socket?.emit('save', payload); setWaitingForSave(true); console.log(`Saving the recording as ${saveRecordingName || recordingName} for userId ${user.id}`); } else { console.error(t('save_recording.notifications.user_not_logged')); } }; useEffect(() => { socket?.on('fileSaved', exitRecording); return () => { socket?.off('fileSaved', exitRecording); } }, [socket, exitRecording]); return (
setOpenModal(false)} modalStyle={modalStyle}>
{t('save_recording.title')} {needConfirm ? ( {t('save_recording.errors.exists_warning')} ) : } {waitingForSave && }
); } const modalStyle = { top: '25%', left: '50%', transform: 'translate(-50%, -50%)', width: '30%', backgroundColor: 'background.paper', p: 4, height: 'fit-content', display: 'block', padding: '20px', }; ================================================ FILE: src/components/recorder/SidePanelHeader.tsx ================================================ import React, { FC, useState } from 'react'; import { InterpretationButtons } from "../run/InterpretationButtons"; import { useSocketStore } from "../../context/socket"; interface SidePanelHeaderProps { onPreviewClick?: () => void; } export const SidePanelHeader = ({ onPreviewClick }: SidePanelHeaderProps) => { const [steppingIsDisabled, setSteppingIsDisabled] = useState(true); const { socket } = useSocketStore(); const handleStep = () => { socket?.emit('step'); }; return (
setSteppingIsDisabled(!isPaused)} onPreviewComplete={onPreviewClick} />
); }; ================================================ FILE: src/components/robot/Recordings.tsx ================================================ import React, { useEffect, useState } from "react"; import { RecordingsTable } from "./RecordingsTable"; import { Grid } from "@mui/material"; import { RunSettings, RunSettingsModal } from "../run/RunSettings"; import { ScheduleSettings, ScheduleSettingsPage, } from "./pages/ScheduleSettingsPage"; import { RobotIntegrationPage } from "./pages/RobotIntegrationPage"; import { RobotSettingsPage } from "./pages/RobotSettingsPage"; import { RobotEditPage } from "./pages/RobotEditPage"; import { RobotDuplicatePage } from "./pages/RobotDuplicatePage"; import { useNavigate, useLocation, useParams } from "react-router-dom"; import { useGlobalInfoStore } from "../../context/globalInfo"; import { useTranslation } from "react-i18next"; interface RecordingsProps { handleEditRecording: (id: string, fileName: string) => void; handleRunRecording: (settings: RunSettings) => void; handleScheduleRecording: (settings: ScheduleSettings) => Promise; setRecordingInfo: (id: string, name: string) => void; } export const Recordings = ({ handleEditRecording, handleRunRecording, setRecordingInfo, handleScheduleRecording, }: RecordingsProps) => { const navigate = useNavigate(); const location = useLocation(); const [params, setParams] = useState([]); const { notify } = useGlobalInfoStore(); const { t } = useTranslation(); const handleNavigate = ( path: string, id: string, name: string, params: string[] ) => { setParams(params); setRecordingInfo(id, name); navigate(path); }; const handleClose = () => { setParams([]); setRecordingInfo("", ""); navigate("/robots"); // Navigate back to the main robots page }; useEffect(() => { // Helper function to get and clear a cookie const getAndClearCookie = (name: string) => { const value = document.cookie .split("; ") .find((row) => row.startsWith(`${name}=`)) ?.split("=")[1]; if (value) { document.cookie = `${name}=; expires=Thu, 01 Jan 1970 00:00:00 GMT; path=/`; } return value; }; const authStatus = getAndClearCookie("robot_auth_status"); const airtableAuthStatus = getAndClearCookie("airtable_auth_status"); const robotId = getAndClearCookie("robot_auth_robotId"); if (airtableAuthStatus === "success" && robotId) { console.log("Airtable Auth Status:", airtableAuthStatus); notify( airtableAuthStatus, t("recordingtable.notifications.auth_success") ); handleNavigate(`/robots/${robotId}/integrate/airtable`, robotId, "", []); } else if (authStatus === "success" && robotId) { console.log("Google Auth Status:", authStatus); notify(authStatus, t("recordingtable.notifications.auth_success")); handleNavigate(`/robots/${robotId}/integrate/googleSheets`, robotId, "", []); } }, []); const getCurrentPageComponent = () => { const currentPath = location.pathname; if (currentPath.endsWith("/run")) { return ( ); } else if (currentPath.endsWith("/schedule")) { return ; } else if (currentPath.includes("/integrate")) { return ( {}} robotPath={"robots"} /> ); } else if (currentPath.endsWith("/settings")) { return {}} />; } else if (currentPath.endsWith("/edit")) { return {}} />; } else if (currentPath.endsWith("/duplicate")) { return {}} />; } return null; }; const currentPath = location.pathname; const isConfigPage = currentPath.includes("/schedule") || currentPath.includes("/integrate") || currentPath.includes("/settings") || currentPath.includes("/edit") || currentPath.includes("/duplicate") || currentPath.includes("/run"); if (isConfigPage) { return getCurrentPageComponent(); } return ( handleNavigate(`/robots/${id}/run`, id, name, params) } handleScheduleRecording={(id, name, params) => handleNavigate(`/robots/${id}/schedule`, id, name, params) } handleIntegrateRecording={(id, name, params) => handleNavigate(`/robots/${id}/integrate`, id, name, params) } handleSettingsRecording={(id, name, params) => handleNavigate(`/robots/${id}/settings`, id, name, params) } handleEditRobot={(id, name, params) => handleNavigate(`/robots/${id}/edit`, id, name, params) } handleDuplicateRobot={(id, name, params) => handleNavigate(`/robots/${id}/duplicate`, id, name, params) } /> ); }; ================================================ FILE: src/components/robot/RecordingsTable.tsx ================================================ import * as React from 'react'; import { useTranslation } from 'react-i18next'; import Paper from '@mui/material/Paper'; import Table from '@mui/material/Table'; import TableBody from '@mui/material/TableBody'; import TableCell from '@mui/material/TableCell'; import TableContainer from '@mui/material/TableContainer'; import TablePagination from '@mui/material/TablePagination'; import TableRow from '@mui/material/TableRow'; import { memo, useCallback, useEffect, useMemo } from "react"; import { WorkflowFile } from "maxun-core"; import SearchIcon from '@mui/icons-material/Search'; import { IconButton, Button, Box, Typography, TextField, MenuItem, Menu, ListItemIcon, ListItemText, CircularProgress, FormControlLabel, Checkbox, } from "@mui/material"; import { Schedule, DeleteForever, Edit, PlayCircle, Settings, Power, MoreHoriz, Refresh, ContentCopy, } from "@mui/icons-material"; import { useGlobalInfoStore, useCachedRecordings } from "../../context/globalInfo"; import { checkRunsForRecording, deleteRecordingFromStorage } from "../../api/storage"; import { Add } from "@mui/icons-material"; import { useNavigate } from 'react-router-dom'; import { canCreateBrowserInState, getActiveBrowserId, stopRecording } from "../../api/recording"; import { GenericModal } from '../ui/GenericModal'; import { useTheme } from '@mui/material/styles'; declare global { interface Window { openedRecordingWindow?: Window | null; } } interface Column { id: 'interpret' | 'name' | 'options' | 'schedule' | 'integrate' | 'settings'; label: string; minWidth?: number; align?: 'right'; format?: (value: string) => string; } interface Data { id: string; name: string; createdAt: string; updatedAt: string; content: WorkflowFile; params: string[]; } interface RecordingsTableProps { handleEditRecording: (id: string, fileName: string) => void; handleRunRecording: (id: string, fileName: string, params: string[]) => void; handleScheduleRecording: (id: string, fileName: string, params: string[]) => void; handleIntegrateRecording: (id: string, fileName: string, params: string[]) => void; handleSettingsRecording: (id: string, fileName: string, params: string[]) => void; handleEditRobot: (id: string, name: string, params: string[]) => void; handleDuplicateRobot: (id: string, name: string, params: string[]) => void; } const LoadingRobotRow = memo(({ row, columns }: any) => { return ( {columns.map((column: Column) => { if (column.id === 'name') { return ( {row.name} (Creating...) ); } else if (column.id === 'interpret') { return ( - ); } else { return ( - ); } })} ); }); // Virtualized row component for efficient rendering const TableRowMemoized = memo(({ row, columns, handlers }: any) => { if (row.isLoading) { return ; } return ( {columns.map((column: Column) => { const value: any = row[column.id]; if (value !== undefined) { return ( {value} ); } else { switch (column.id) { case 'interpret': return ( handlers.handleRunRecording(row.id, row.name, row.params || [])} /> ); case 'schedule': return ( handlers.handleScheduleRecording(row.id, row.name, row.params || [])} /> ); case 'integrate': return ( handlers.handleIntegrateRecording(row.id, row.name, row.params || [])} /> ); case 'options': return ( handlers.handleRetrainRobot(row.id, row.name)} handleEdit={() => handlers.handleEditRobot(row.id, row.name, row.params || [])} handleDuplicate={() => handlers.handleDuplicateRobot(row.id, row.name, row.params || [])} handleDelete={() => handlers.handleDelete(row.id)} robotType={row.type} /> ); case 'settings': return ( handlers.handleSettingsRecording(row.id, row.name, row.params || [])} /> ); default: return null; } } })} ); }); export const RecordingsTable = ({ handleEditRecording, handleRunRecording, handleScheduleRecording, handleIntegrateRecording, handleSettingsRecording, handleEditRobot, handleDuplicateRobot, }: RecordingsTableProps) => { const { t } = useTranslation(); const theme = useTheme(); const [page, setPage] = React.useState(0); const [rowsPerPage, setRowsPerPage] = React.useState(10); const { data: recordingsData = [], isLoading: isFetching, error, refetch } = useCachedRecordings(); const [isModalOpen, setModalOpen] = React.useState(false); const [searchTerm, setSearchTerm] = React.useState(''); const [isWarningModalOpen, setWarningModalOpen] = React.useState(false); const [isDeleteConfirmOpen, setDeleteConfirmOpen] = React.useState(false); const [pendingDeleteId, setPendingDeleteId] = React.useState(null); const [activeBrowserId, setActiveBrowserId] = React.useState(''); const columns = useMemo(() => [ { id: 'interpret', label: t('recordingtable.run'), minWidth: 80 }, { id: 'name', label: t('recordingtable.name'), minWidth: 80 }, { id: 'schedule', label: t('recordingtable.schedule'), minWidth: 80 }, { id: 'integrate', label: t('recordingtable.integrate'), minWidth: 80 }, { id: 'settings', label: t('recordingtable.settings'), minWidth: 80 }, { id: 'options', label: t('recordingtable.options'), minWidth: 80 }, ], [t]); const { notify, setRecordings, browserId, setBrowserId, setInitialUrl, recordingUrl, setRecordingUrl, isLogin, setIsLogin, rerenderRobots, setRerenderRobots, recordingName, setRecordingName, recordingId, setRecordingId } = useGlobalInfoStore(); const navigate = useNavigate(); useEffect(() => { const handleMessage = (event: any) => { if (event.origin === window.location.origin && event.data && event.data.type === 'recording-notification') { const notificationData = event.data.notification; if (notificationData) { notify(notificationData.type, notificationData.message); if ((notificationData.type === 'success' && (notificationData.message.includes('saved') || notificationData.message.includes('retrained'))) || (notificationData.type === 'warning' && notificationData.message.includes('terminated'))) { setRerenderRobots(true); } } } if (event.origin === window.location.origin && event.data && event.data.type === 'session-data-clear') { window.sessionStorage.removeItem('browserId'); window.sessionStorage.removeItem('robotToRetrain'); window.sessionStorage.removeItem('robotName'); window.sessionStorage.removeItem('recordingUrl'); window.sessionStorage.removeItem('recordingSessionId'); window.sessionStorage.removeItem('pendingSessionData'); window.sessionStorage.removeItem('nextTabIsRecording'); window.sessionStorage.removeItem('initialUrl'); } }; window.addEventListener('message', handleMessage); return () => { window.removeEventListener('message', handleMessage); }; }, [notify, setRerenderRobots]); const handleChangePage = useCallback((event: unknown, newPage: number) => { setPage(newPage); }, []); const handleChangeRowsPerPage = (event: React.ChangeEvent) => { setRowsPerPage(+event.target.value); setPage(0); }; const handleSearchChange = useCallback((event: React.ChangeEvent) => { setSearchTerm(event.target.value); setPage(0); }, []); const parseDateString = (dateStr: string): Date => { try { if (dateStr.includes('PM') || dateStr.includes('AM')) { return new Date(dateStr); } return new Date(dateStr.replace(/(\d+)\/(\d+)\//, '$2/$1/')) } catch { return new Date(0); } }; const rows = useMemo(() => { if (!recordingsData) return []; const parsedRows = recordingsData .map((recording: any, index: number) => { if (recording?.recording_meta) { const parsedDate = parseDateString(recording.recording_meta.updatedAt); return { id: index, ...recording.recording_meta, content: recording.recording, parsedDate, isLoading: recording.isLoading || false, isOptimistic: recording.isOptimistic || false }; } return null; }) .filter(Boolean) .sort((a, b) => b.parsedDate.getTime() - a.parsedDate.getTime()); return parsedRows; }, [recordingsData]); useEffect(() => { if (rows.length > 0) { setRecordings(rows.map((recording) => recording.name)); } }, [rows, setRecordings]); const handleNewRecording = useCallback(async () => { navigate('/robots/create'); }, [navigate]); const notifyRecordingTabsToClose = (browserId: string) => { const closeMessage = { action: 'close-recording-tab', browserId: browserId, timestamp: Date.now() }; window.sessionStorage.setItem('recordingTabCloseMessage', JSON.stringify(closeMessage)); if (window.openedRecordingWindow && !window.openedRecordingWindow.closed) { try { window.openedRecordingWindow.close(); } catch (e) { console.log('Could not directly close recording window:', e); } } }; const handleDiscardAndCreate = async () => { if (activeBrowserId) { await stopRecording(activeBrowserId); notify('warning', t('browser_recording.notifications.terminated')); notifyRecordingTabsToClose(activeBrowserId); } setWarningModalOpen(false); setModalOpen(true); }; const handleRetrainRobot = useCallback(async (id: string, name: string) => { const robot = rows.find(row => row.id === id); let targetUrl; if (robot?.content?.workflow && robot.content.workflow.length > 0) { const lastPair = robot.content.workflow[robot.content.workflow.length - 1]; if (lastPair?.what) { if (Array.isArray(lastPair.what)) { const gotoAction = lastPair.what.find((action: any) => action && typeof action === 'object' && 'action' in action && action.action === "goto" ) as any; if (gotoAction?.args?.[0]) { targetUrl = gotoAction.args[0]; } } } } if (targetUrl) { setInitialUrl(targetUrl); setRecordingUrl(targetUrl); window.sessionStorage.setItem('initialUrl', targetUrl); } const canCreateRecording = await canCreateBrowserInState("recording"); if (!canCreateRecording) { const activeBrowserId = await getActiveBrowserId(); if (activeBrowserId) { setActiveBrowserId(activeBrowserId); setWarningModalOpen(true); } else { notify('warning', t('recordingtable.notifications.browser_limit_warning')); } } else { startRetrainRecording(id, name, targetUrl); } }, [rows, setInitialUrl, setRecordingUrl]); const startRetrainRecording = (id: string, name: string, url?: string) => { setBrowserId('new-recording'); setRecordingName(name); setRecordingId(id); window.sessionStorage.setItem('browserId', 'new-recording'); window.sessionStorage.setItem('robotToRetrain', id); window.sessionStorage.setItem('robotName', name); window.sessionStorage.setItem('recordingUrl', url || recordingUrl); const sessionId = Date.now().toString(); window.sessionStorage.setItem('recordingSessionId', sessionId); window.openedRecordingWindow = window.open(`/recording-setup?session=${sessionId}`, '_blank'); window.sessionStorage.setItem('nextTabIsRecording', 'true'); }; const startRecording = () => { setModalOpen(false); // Set local state setBrowserId('new-recording'); setRecordingName(''); setRecordingId(''); window.sessionStorage.setItem('browserId', 'new-recording'); const sessionId = Date.now().toString(); window.sessionStorage.setItem('recordingSessionId', sessionId); window.sessionStorage.setItem('recordingUrl', recordingUrl); window.openedRecordingWindow = window.open(`/recording-setup?session=${sessionId}`, '_blank'); window.sessionStorage.setItem('nextTabIsRecording', 'true'); }; const setBrowserRecordingUrl = (event: React.ChangeEvent) => { setInitialUrl(event.target.value); setRecordingUrl(event.target.value); window.sessionStorage.setItem('initialUrl', event.target.value); } useEffect(() => { if (rerenderRobots) { refetch(); setRerenderRobots(false); } }, [rerenderRobots, setRerenderRobots, refetch]); function useDebounce(value: T, delay: number): T { const [debouncedValue, setDebouncedValue] = React.useState(value); useEffect(() => { const handler = setTimeout(() => { setDebouncedValue(value); }, delay); return () => { clearTimeout(handler); }; }, [value, delay]); return debouncedValue; } const debouncedSearchTerm = useDebounce(searchTerm, 300); // Filter rows based on search term const filteredRows = useMemo(() => { const searchLower = debouncedSearchTerm.toLowerCase(); return debouncedSearchTerm ? rows.filter(row => row.name.toLowerCase().includes(searchLower)) : rows; }, [rows, debouncedSearchTerm]); const visibleRows = useMemo(() => { const start = page * rowsPerPage; return filteredRows.slice(start, start + rowsPerPage); }, [filteredRows, page, rowsPerPage]); const openDeleteConfirm = React.useCallback((id: string) => { setPendingDeleteId(String(id)); setDeleteConfirmOpen(true); }, []); const confirmDeleteRecording = React.useCallback(async () => { if (!pendingDeleteId) return; const hasRuns = await checkRunsForRecording(pendingDeleteId); if (hasRuns) { notify('warning', t('recordingtable.notifications.delete_warning')); setDeleteConfirmOpen(false); setPendingDeleteId(null); return; } const success = await deleteRecordingFromStorage(pendingDeleteId); if (success) { notify('success', t('recordingtable.notifications.delete_success')); refetch(); } setDeleteConfirmOpen(false); setPendingDeleteId(null); }, [pendingDeleteId, notify, t, refetch]); const pendingRow = pendingDeleteId ? rows.find(r => String(r.id) === pendingDeleteId) : null; const handlers = useMemo(() => ({ handleRunRecording, handleScheduleRecording, handleIntegrateRecording, handleSettingsRecording, handleEditRobot, handleDuplicateRobot, handleRetrainRobot, handleDelete: async (id: string) => openDeleteConfirm(id) }), [handleRunRecording, handleScheduleRecording, handleIntegrateRecording, handleSettingsRecording, handleEditRobot, handleDuplicateRobot, handleRetrainRobot, notify, t, refetch]); return ( {t('recordingtable.heading')} }} sx={{ width: '250px' }} /> {t('recordingtable.new')} {isFetching ? ( ) : filteredRows.length === 0 ? ( {debouncedSearchTerm ? t('recordingtable.placeholder.search') : t('recordingtable.placeholder.title')} {debouncedSearchTerm ? t('recordingtable.search_criteria') : t('recordingtable.placeholder.body') } ) : ( <> {columns.map((column) => ( {column.label} ))} {visibleRows.map((row) => ( ))}
)} setWarningModalOpen(false)} modalStyle={modalStyle}>
{t('recordingtable.warning_modal.title')} {t('recordingtable.warning_modal.message')}
setModalOpen(false)} modalStyle={modalStyle}>
{t('recordingtable.modal.title')} setIsLogin(e.target.checked)} color="primary" /> } label={t('recordingtable.modal.login_title')} style={{ marginBottom: '10px' }} />
{ setDeleteConfirmOpen(false); setPendingDeleteId(null); }} modalStyle={{ ...modalStyle, padding: 0, backgroundColor: 'transparent', width: 'auto', maxWidth: '520px' }} > {t('recordingtable.delete_confirm.title', { name: pendingRow?.name, defaultValue: 'Delete {{name}}?' })} {t('recordingtable.delete_confirm.message', { name: pendingRow?.name, defaultValue: 'Are you sure you want to delete the robot "{{name}}"?' })}
); } interface InterpretButtonProps { handleInterpret: () => void; } const InterpretButton = ({ handleInterpret }: InterpretButtonProps) => { return ( { handleInterpret(); }} > ) } interface ScheduleButtonProps { handleSchedule: () => void; } const ScheduleButton = ({ handleSchedule }: ScheduleButtonProps) => { return ( { handleSchedule(); }} > ) } interface IntegrateButtonProps { handleIntegrate: () => void; } const IntegrateButton = ({ handleIntegrate }: IntegrateButtonProps) => { return ( { handleIntegrate(); }} > ) } interface SettingsButtonProps { handleSettings: () => void; } const SettingsButton = ({ handleSettings }: SettingsButtonProps) => { return ( { handleSettings(); }} > ) } interface OptionsButtonProps { handleRetrain: () => void; handleEdit: () => void; handleDuplicate: () => void; handleDelete: () => void; robotType: string; } const OptionsButton = ({ handleRetrain, handleEdit, handleDuplicate, handleDelete, robotType }: OptionsButtonProps) => { const [anchorEl, setAnchorEl] = React.useState(null); const handleClick = (event: React.MouseEvent) => { setAnchorEl(event.currentTarget); }; const handleClose = () => { setAnchorEl(null); }; const { t } = useTranslation(); return ( <> {robotType !== 'scrape' && ( { handleRetrain(); handleClose(); }}> Retrain )} { handleEdit(); handleClose(); }}> Edit {robotType === 'extract' && ( { handleDuplicate(); handleClose(); }}> Duplicate )} { handleDelete(); handleClose(); }}> Delete ); }; const MemoizedTableCell = memo(TableCell); // Memoized action buttons const MemoizedInterpretButton = memo(InterpretButton); const MemoizedScheduleButton = memo(ScheduleButton); const MemoizedIntegrateButton = memo(IntegrateButton); const MemoizedSettingsButton = memo(SettingsButton); const MemoizedOptionsButton = memo(OptionsButton); const modalStyle = { top: '50%', left: '50%', transform: 'translate(-50%, -50%)', width: '30%', backgroundColor: 'background.paper', p: 4, height: 'fit-content', display: 'block', padding: '20px', }; ================================================ FILE: src/components/robot/ToggleButton.tsx ================================================ import React, { FC } from "react"; import styled from "styled-components"; interface ToggleButtonProps { isChecked?: boolean; onChange: () => void; }; export const ToggleButton: FC = ({ isChecked = false, onChange }) => ( ); const CheckBoxWrapper = styled.div` position: relative; `; const CheckBoxLabel = styled.label` position: absolute; top: 0; left: 0; width: 42px; height: 26px; border-radius: 15px; background: #bebebe; cursor: pointer; &::after { content: ""; display: block; border-radius: 50%; width: 18px; height: 18px; margin: 3px; background: #ffffff; box-shadow: 1px 3px 3px 1px rgba(0, 0, 0, 0.2); transition: 0.2s; } `; const CheckBox = styled.input` opacity: 0; z-index: 1; border-radius: 15px; width: 42px; height: 26px; &:checked + ${CheckBoxLabel} { background: #2196F3; &::after { content: ""; display: block; border-radius: 50%; width: 18px; height: 18px; margin-left: 21px; transition: 0.2s; } } `; ================================================ FILE: src/components/robot/pages/RobotConfigPage.tsx ================================================ import React from 'react'; import { Box, Typography, Button, IconButton, Divider, useTheme } from '@mui/material'; import { ArrowBack } from '@mui/icons-material'; import { useNavigate, useLocation } from 'react-router-dom'; import { useTranslation } from 'react-i18next'; interface RobotConfigPageProps { title: string; children: React.ReactNode; onSave?: () => void; onCancel?: () => void; saveButtonText?: string; cancelButtonText?: string; showSaveButton?: boolean; showCancelButton?: boolean; isLoading?: boolean; icon?: React.ReactNode; onBackToSelection?: () => void; backToSelectionText?: string; onArrowBack?: () => void; // Optional prop for custom back action } export const RobotConfigPage: React.FC = ({ title, children, onSave, onCancel, saveButtonText, cancelButtonText, showSaveButton = true, showCancelButton = true, isLoading = false, icon, onBackToSelection, backToSelectionText, onArrowBack, }) => { const navigate = useNavigate(); const location = useLocation(); const theme = useTheme(); const { t } = useTranslation(); const handleBack = () => { if (onCancel) { onCancel(); } else { // Try to determine the correct path based on current URL const currentPath = location.pathname; const basePath = currentPath.includes('/prebuilt-robots') ? '/prebuilt-robots' : '/robots'; navigate(basePath); } }; return ( {icon && ( {icon} )} {title} {children} {(showSaveButton || showCancelButton || onBackToSelection) && ( {onBackToSelection && ( )} {/* {showCancelButton && ( )} */} {showSaveButton && onSave && ( )} )} ); } ================================================ FILE: src/components/robot/pages/RobotCreate.tsx ================================================ import React, { useState } from 'react'; import { useNavigate } from 'react-router-dom'; import { useTranslation } from 'react-i18next'; import { Box, Typography, TextField, Button, Checkbox, IconButton, Card, CircularProgress, Container, CardContent, Tabs, Tab, FormControl, Select, MenuItem, InputLabel, Collapse, FormControlLabel } from '@mui/material'; import { ArrowBack, AutoAwesome, HighlightAlt } from '@mui/icons-material'; import { useGlobalInfoStore, useCacheInvalidation } from '../../../context/globalInfo'; import { canCreateBrowserInState, getActiveBrowserId, stopRecording } from '../../../api/recording'; import { createScrapeRobot, createLLMRobot, createAndRunRecording, createCrawlRobot, createSearchRobot } from "../../../api/storage"; import { AuthContext } from '../../../context/auth'; import { GenericModal } from '../../ui/GenericModal'; interface TabPanelProps { children?: React.ReactNode; index: number; value: number; } function TabPanel(props: TabPanelProps) { const { children, value, index, ...other } = props; return ( ); } const RobotCreate: React.FC = () => { const { t } = useTranslation(); const navigate = useNavigate(); const { setBrowserId, setRecordingUrl, notify, setRecordingId, setRerenderRobots } = useGlobalInfoStore(); const [tabValue, setTabValue] = useState(0); const [url, setUrl] = useState(''); const [scrapeRobotName, setScrapeRobotName] = useState(''); const [extractRobotName, setExtractRobotName] = useState(''); const [needsLogin, setNeedsLogin] = useState(false); const [isLoading, setIsLoading] = useState(false); const [isWarningModalOpen, setWarningModalOpen] = useState(false); const [activeBrowserId, setActiveBrowserId] = useState(''); const [outputFormats, setOutputFormats] = useState([]); const [generationMode, setGenerationMode] = useState<'agent' | 'recorder' | null>('recorder'); const [aiPrompt, setAiPrompt] = useState(''); const [llmProvider, setLlmProvider] = useState<'anthropic' | 'openai' | 'ollama'>('ollama'); const [llmModel, setLlmModel] = useState('default'); const [llmApiKey, setLlmApiKey] = useState(''); const [llmBaseUrl, setLlmBaseUrl] = useState(''); const [aiRobotName, setAiRobotName] = useState(''); const [crawlRobotName, setCrawlRobotName] = useState(''); const [crawlUrl, setCrawlUrl] = useState(''); const [crawlMode, setCrawlMode] = useState<'domain' | 'subdomain' | 'path'>('domain'); const [crawlLimit, setCrawlLimit] = useState(50); const [crawlMaxDepth, setCrawlMaxDepth] = useState(3); const [crawlIncludePaths, setCrawlIncludePaths] = useState(''); const [crawlExcludePaths, setCrawlExcludePaths] = useState(''); const [crawlUseSitemap, setCrawlUseSitemap] = useState(true); const [crawlFollowLinks, setCrawlFollowLinks] = useState(true); const [crawlRespectRobots, setCrawlRespectRobots] = useState(true); const [showCrawlAdvanced, setShowCrawlAdvanced] = useState(false); const [searchRobotName, setSearchRobotName] = useState(''); const [searchQuery, setSearchQuery] = useState(''); const [searchLimit, setSearchLimit] = useState(10); const [searchProvider] = useState<'duckduckgo'>('duckduckgo'); const [searchMode, setSearchMode] = useState<'discover' | 'scrape'>('discover'); const [searchTimeRange, setSearchTimeRange] = useState<'day' | 'week' | 'month' | 'year' | ''>(''); const { state } = React.useContext(AuthContext); const { user } = state; const { addOptimisticRobot, removeOptimisticRobot, invalidateRecordings, invalidateRuns, addOptimisticRun } = useCacheInvalidation(); const handleTabChange = (event: React.SyntheticEvent, newValue: number) => { setTabValue(newValue); }; const handleStartRecording = async () => { if (!url.trim()) { notify('error', 'Please enter a valid URL'); return; } setIsLoading(true); try { const canCreateRecording = await canCreateBrowserInState("recording"); if (!canCreateRecording) { const activeBrowser = await getActiveBrowserId(); if (activeBrowser) { setActiveBrowserId(activeBrowser); setWarningModalOpen(true); } else { notify('warning', t('recordingtable.notifications.browser_limit_warning')); } setIsLoading(false); return; } setBrowserId('new-recording'); setRecordingUrl(url); window.sessionStorage.setItem('browserId', 'new-recording'); window.sessionStorage.setItem('recordingUrl', url); window.sessionStorage.setItem('initialUrl', url); window.sessionStorage.setItem('needsLogin', needsLogin.toString()); const sessionId = Date.now().toString(); window.sessionStorage.setItem('recordingSessionId', sessionId); window.open(`/recording-setup?session=${sessionId}`, '_blank'); window.sessionStorage.setItem('nextTabIsRecording', 'true'); // Reset loading state immediately after opening new tab setIsLoading(false); navigate('/robots'); } catch (error) { console.error('Error starting recording:', error); notify('error', 'Failed to start recording. Please try again.'); setIsLoading(false); } }; const handleDiscardAndCreate = async () => { if (activeBrowserId) { await stopRecording(activeBrowserId); notify('warning', t('browser_recording.notifications.terminated')); } setWarningModalOpen(false); setIsLoading(false); // Continue with the original Recording logic setBrowserId('new-recording'); setRecordingUrl(url); window.sessionStorage.setItem('browserId', 'new-recording'); window.sessionStorage.setItem('recordingUrl', url); window.sessionStorage.setItem('initialUrl', url); window.sessionStorage.setItem('needsLogin', needsLogin.toString()); const sessionId = Date.now().toString(); window.sessionStorage.setItem('recordingSessionId', sessionId); window.open(`/recording-setup?session=${sessionId}`, '_blank'); window.sessionStorage.setItem('nextTabIsRecording', 'true'); navigate('/robots'); }; const handleCreateCrawlRobot = async () => { if (!crawlUrl.trim()) { notify('error', 'Please enter a valid URL'); return; } if (!crawlRobotName.trim()) { notify('error', 'Please enter a robot name'); return; } setIsLoading(true); const result = await createCrawlRobot( crawlUrl, crawlRobotName, { mode: crawlMode, limit: crawlLimit, maxDepth: crawlMaxDepth, includePaths: crawlIncludePaths ? crawlIncludePaths.split(',').map(p => p.trim()) : [], excludePaths: crawlExcludePaths ? crawlExcludePaths.split(',').map(p => p.trim()) : [], useSitemap: crawlUseSitemap, followLinks: crawlFollowLinks, respectRobots: crawlRespectRobots } ); setIsLoading(false); if (result) { invalidateRecordings(); notify('success', `${crawlRobotName} created successfully!`); navigate('/robots'); } else { notify('error', 'Failed to create crawl robot'); } }; const handleCreateSearchRobot = async () => { if (!searchQuery.trim()) { notify('error', 'Please enter a search query'); return; } if (!searchRobotName.trim()) { notify('error', 'Please enter a robot name'); return; } setIsLoading(true); const result = await createSearchRobot( searchRobotName, { query: searchQuery, limit: searchLimit, provider: searchProvider, filters: { timeRange: searchTimeRange ? searchTimeRange as 'day' | 'week' | 'month' | 'year' : undefined }, mode: searchMode } ); setIsLoading(false); if (result) { invalidateRecordings(); notify('success', `${searchRobotName} created successfully!`); navigate('/robots'); } else { notify('error', 'Failed to create search robot'); } }; return ( navigate('/robots')} sx={{ ml: -1, mr: 1, color: theme => theme.palette.text.primary, backgroundColor: 'transparent !important', '&:hover': { backgroundColor: 'transparent !important', }, '&:active': { backgroundColor: 'transparent !important', }, '&:focus': { backgroundColor: 'transparent !important', }, '&:focus-visible': { backgroundColor: 'transparent !important', }, }} disableRipple aria-label="Go back" > Create New Robot Maxun Logo Extract structured data from websites using AI or record your own extraction workflow. Choose How to Build setGenerationMode('recorder')} sx={{ flex: 1, cursor: 'pointer', border: '2px solid', borderColor: generationMode === 'recorder' ? '#ff00c3' : 'divider', transition: 'all 0.2s', '&:hover': { borderColor: '#ff00c3', } }} > Recorder Mode Record your actions into a workflow. setGenerationMode('agent')} sx={{ flex: 1, cursor: 'pointer', border: '2px solid', borderColor: generationMode === 'agent' ? '#ff00c3' : 'divider', transition: 'all 0.2s', '&:hover': { borderColor: '#ff00c3', }, position: 'relative' }} > Beta AI Mode Describe the task. It builds it for you. {generationMode === 'agent' && ( setExtractRobotName(e.target.value)} label="Name" /> setAiPrompt(e.target.value)} label="Extraction Prompt" /> setUrl(e.target.value)} label="Website URL (Optional)" /> LLM Provider Model {/* API Key for non-Ollama providers */} {llmProvider !== 'ollama' && ( setLlmApiKey(e.target.value)} label="API Key (Optional if set in .env)" /> )} {llmProvider === 'ollama' && ( setLlmBaseUrl(e.target.value)} label="Ollama Base URL (Optional)" /> )} )} {generationMode === 'recorder' && ( <> setUrl(e.target.value)} label="Website URL" /> )} Maxun Logo Turn websites into LLM-ready Markdown, clean HTML, or screenshots for AI apps. setScrapeRobotName(e.target.value)} sx={{ mb: 2 }} label="Name" /> setUrl(e.target.value)} label="Website URL" sx={{ mb: 2 }} /> Output Formats * Maxun Logo Crawl entire websites and gather data from multiple pages automatically. setCrawlRobotName(e.target.value)} sx={{ mb: 2 }} /> setCrawlUrl(e.target.value)} sx={{ mb: 2 }} /> setCrawlLimit(parseInt(e.target.value) || 10)} sx={{ mb: 2 }} /> Crawl Scope setCrawlMaxDepth(parseInt(e.target.value) || 3)} sx={{ mb: 2 }} helperText="How many links deep to follow (default: 3)" FormHelperTextProps={{ sx: { ml: 0 } }} /> setCrawlIncludePaths(e.target.value)} sx={{ mb: 2 }} helperText="Only crawl URLs matching these paths (comma-separated)" FormHelperTextProps={{ sx: { ml: 0 } }} /> setCrawlExcludePaths(e.target.value)} sx={{ mb: 2 }} helperText="Skip URLs matching these paths (comma-separated)" FormHelperTextProps={{ sx: { ml: 0 } }} /> setCrawlUseSitemap(e.target.checked)} /> } label="Use sitemap.xml for URL discovery" /> setCrawlFollowLinks(e.target.checked)} /> } label="Follow links on pages" /> setCrawlRespectRobots(e.target.checked)} /> } label="Respect robots.txt" /> Maxun Logo Search the web and gather data from relevant results. setSearchRobotName(e.target.value)} sx={{ mb: 2 }} /> setSearchQuery(e.target.value)} sx={{ mb: 2 }} /> setSearchLimit(parseInt(e.target.value) || 10)} sx={{ mb: 2 }} /> Mode Time Range { setWarningModalOpen(false); setIsLoading(false); }} modalStyle={modalStyle}>
{t('recordingtable.warning_modal.title')} {t('recordingtable.warning_modal.message')}
); }; export default RobotCreate; const modalStyle = { top: '50%', left: '50%', transform: 'translate(-50%, -50%)', width: '30%', backgroundColor: 'background.paper', p: 4, height: 'fit-content', display: 'block', padding: '20px', }; ================================================ FILE: src/components/robot/pages/RobotDuplicatePage.tsx ================================================ import React, { useState, useEffect } from "react"; import { TextField, Box } from "@mui/material"; import { useGlobalInfoStore } from "../../../context/globalInfo"; import { duplicateRecording, getStoredRecording } from "../../../api/storage"; import { useTranslation, Trans } from "react-i18next"; import { RobotConfigPage } from "./RobotConfigPage"; import { useNavigate, useLocation } from "react-router-dom"; interface RobotDuplicatePageProps { handleStart: (settings: any) => void; } export const RobotDuplicatePage = ({ handleStart }: RobotDuplicatePageProps) => { const { t } = useTranslation(); const navigate = useNavigate(); const location = useLocation(); const [targetUrl, setTargetUrl] = useState(""); const [robot, setRobot] = useState(null); const [isLoading, setIsLoading] = useState(false); const { recordingId, notify, setRerenderRobots } = useGlobalInfoStore(); const robotIdFromUrl = location.pathname.split('/').filter(Boolean)[1] ?? null; const effectiveId = recordingId || robotIdFromUrl; useEffect(() => { getRobot(); }, []); useEffect(() => { if (robot) { let url = robot.recording_meta?.url; if (!url && robot.recording?.workflow?.length) { const lastPair = robot.recording.workflow[robot.recording.workflow.length - 1]; url = lastPair?.what?.find((action: any) => action.action === "goto")?.args?.[0]; } if (url) setTargetUrl(url); } }, [robot]); const getRobot = async () => { if (!effectiveId) { notify("error", t("robot_duplication.notifications.robot_not_found")); return; } const data = await getStoredRecording(effectiveId); if (!data) { notify("error", t("robot_duplication.notifications.robot_not_found")); return; } setRobot(data); }; const handleSave = async () => { if (!robot || !targetUrl) { notify("error", t("robot_duplication.notifications.url_required")); return; } setIsLoading(true); try { const result = await duplicateRecording(robot.recording_meta.id, targetUrl); if (result) { setRerenderRobots(true); notify("success", t("robot_duplication.notifications.duplicate_success")); handleStart(robot); navigate("/robots"); } else { notify("error", t("robot_duplication.notifications.duplicate_error")); } } catch (error) { notify("error", t("robot_duplication.notifications.unknown_error")); console.error("Error duplicating robot:", error); } finally { setIsLoading(false); } }; return ( <> {robot && ( <> {t("robot_duplication.descriptions.purpose")}
, ]} />
{t("robot_duplication.descriptions.warning")} setTargetUrl(e.target.value)} style={{ marginBottom: "20px", marginTop: "30px" }} /> )}
); }; ================================================ FILE: src/components/robot/pages/RobotEditPage.tsx ================================================ import { useState, useEffect } from "react"; import { useTranslation } from "react-i18next"; import { TextField, Typography, Box, Button, IconButton, InputAdornment, FormControl, InputLabel, Select, MenuItem, FormControlLabel, Checkbox, Collapse } from "@mui/material"; import { Visibility, VisibilityOff } from "@mui/icons-material"; import { useGlobalInfoStore } from "../../../context/globalInfo"; import { getStoredRecording, updateRecording } from "../../../api/storage"; import { WhereWhatPair } from "maxun-core"; import { RobotConfigPage } from "./RobotConfigPage"; import { useNavigate, useLocation } from "react-router-dom"; interface RobotMeta { name: string; id: string; prebuiltId?: string; createdAt: string; pairs: number; updatedAt: string; params: any[]; type?: 'extract' | 'scrape' | 'crawl' | 'search'; url?: string; formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[]; isLLM?: boolean; } interface RobotWorkflow { workflow: WhereWhatPair[]; } interface ScheduleConfig { runEvery: number; runEveryUnit: "MINUTES" | "HOURS" | "DAYS" | "WEEKS" | "MONTHS"; startFrom: | "SUNDAY" | "MONDAY" | "TUESDAY" | "WEDNESDAY" | "THURSDAY" | "FRIDAY" | "SATURDAY"; atTimeStart?: string; atTimeEnd?: string; timezone: string; lastRunAt?: Date; nextRunAt?: Date; cronExpression?: string; } export interface RobotSettings { id: string; userId?: number; recording_meta: RobotMeta; recording: RobotWorkflow; google_sheet_email?: string | null; google_sheet_name?: string | null; google_sheet_id?: string | null; google_access_token?: string | null; google_refresh_token?: string | null; schedule?: ScheduleConfig | null; } interface RobotSettingsProps { handleStart: (settings: RobotSettings) => void; } interface CredentialInfo { value: string; type: string; } interface Credentials { [key: string]: CredentialInfo; } interface CredentialVisibility { [key: string]: boolean; } interface GroupedCredentials { passwords: string[]; emails: string[]; usernames: string[]; others: string[]; } interface ScrapeListLimit { pairIndex: number; actionIndex: number; argIndex: number; currentLimit: number; } interface CrawlConfig { mode?: string; limit?: number; maxDepth?: number; useSitemap?: boolean; followLinks?: boolean; excludePaths?: string[]; includePaths?: string[]; respectRobots?: boolean; } interface SearchConfig { mode?: 'discover' | 'scrape'; limit?: number; query?: string; filters?: Record; provider?: string; } export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => { const { t } = useTranslation(); const navigate = useNavigate(); const location = useLocation(); const [credentials, setCredentials] = useState({}); const { recordingId, notify, setRerenderRobots } = useGlobalInfoStore(); const [robot, setRobot] = useState(null); const [credentialGroups, setCredentialGroups] = useState({ passwords: [], emails: [], usernames: [], others: [], }); const [showPasswords, setShowPasswords] = useState({}); const [scrapeListLimits, setScrapeListLimits] = useState( [] ); const [isLoading, setIsLoading] = useState(false); const [crawlConfig, setCrawlConfig] = useState({}); const [searchConfig, setSearchConfig] = useState({}); const [showCrawlAdvanced, setShowCrawlAdvanced] = useState(false); const isEmailPattern = (value: string): boolean => { return value.includes("@"); }; const isUsernameSelector = (selector: string): boolean => { return ( selector.toLowerCase().includes("username") || selector.toLowerCase().includes("user") || selector.toLowerCase().includes("email") ); }; const determineCredentialType = ( selector: string, info: CredentialInfo ): "password" | "email" | "username" | "other" => { if ( info.type === "password" || selector.toLowerCase().includes("password") ) { return "password"; } if ( isEmailPattern(info.value) || selector.toLowerCase().includes("email") ) { return "email"; } if (isUsernameSelector(selector)) { return "username"; } return "other"; }; useEffect(() => { getRobot(); }, []); useEffect(() => { if (robot?.recording?.workflow) { const extractedCredentials = extractInitialCredentials( robot.recording.workflow ); setCredentials(extractedCredentials); setCredentialGroups(groupCredentialsByType(extractedCredentials)); findScrapeListLimits(robot.recording.workflow); extractCrawlConfig(robot.recording.workflow); extractSearchConfig(robot.recording.workflow); } }, [robot]); const findScrapeListLimits = (workflow: WhereWhatPair[]) => { const limits: ScrapeListLimit[] = []; workflow.forEach((pair, pairIndex) => { if (!pair.what) return; pair.what.forEach((action, actionIndex) => { if ( action.action === "scrapeList" && action.args && action.args.length > 0 ) { // Check if first argument has a limit property const arg = action.args[0]; if (arg && typeof arg === "object" && "limit" in arg) { limits.push({ pairIndex, actionIndex, argIndex: 0, currentLimit: arg.limit, }); } } }); }); setScrapeListLimits(limits); }; const extractCrawlConfig = (workflow: WhereWhatPair[]) => { workflow.forEach((pair) => { if (!pair.what) return; pair.what.forEach((action: any) => { if (action.action === "crawl" && action.args && action.args.length > 0) { const config = action.args[0]; if (config && typeof config === "object") { setCrawlConfig(config as CrawlConfig); } } }); }); }; const extractSearchConfig = (workflow: WhereWhatPair[]) => { workflow.forEach((pair) => { if (!pair.what) return; pair.what.forEach((action: any) => { if (action.action === "search" && action.args && action.args.length > 0) { const config = action.args[0]; if (config && typeof config === "object") { setSearchConfig(config as SearchConfig); } } }); }); }; function extractInitialCredentials(workflow: any[]): Credentials { const credentials: Credentials = {}; const isPrintableCharacter = (char: string): boolean => { return char.length === 1 && !!char.match(/^[\x20-\x7E]$/); }; workflow.forEach((step) => { if (!step.what) return; let currentSelector = ""; let currentValue = ""; let currentType = ""; let i = 0; while (i < step.what.length) { const action = step.what[i]; if (!action.action || !action.args?.[0]) { i++; continue; } const selector = action.args[0]; // Handle full word type actions first if ( action.action === "type" && action.args?.length >= 2 && typeof action.args[1] === "string" && action.args[1].length > 1 ) { if (!credentials[selector]) { credentials[selector] = { value: action.args[1], type: action.args[2] || "text", }; } i++; continue; } // Handle character-by-character sequences (both type and press) if ( (action.action === "type" || action.action === "press") && action.args?.length >= 2 && typeof action.args[1] === "string" ) { if (selector !== currentSelector) { if (currentSelector && currentValue) { credentials[currentSelector] = { value: currentValue, type: currentType || "text", }; } currentSelector = selector; currentValue = credentials[selector]?.value || ""; currentType = action.args[2] || credentials[selector]?.type || "text"; } const character = action.args[1]; if (isPrintableCharacter(character)) { currentValue += character; } else if (character === "Backspace") { currentValue = currentValue.slice(0, -1); } if (!currentType && action.args[2]?.toLowerCase() === "password") { currentType = "password"; } let j = i + 1; while (j < step.what.length) { const nextAction = step.what[j]; if ( !nextAction.action || !nextAction.args?.[0] || nextAction.args[0] !== selector || (nextAction.action !== "type" && nextAction.action !== "press") ) { break; } if (nextAction.args[1] === "Backspace") { currentValue = currentValue.slice(0, -1); } else if (isPrintableCharacter(nextAction.args[1])) { currentValue += nextAction.args[1]; } j++; } credentials[currentSelector] = { value: currentValue, type: currentType, }; i = j; } else { i++; } } if (currentSelector && currentValue) { credentials[currentSelector] = { value: currentValue, type: currentType || "text", }; } }); return credentials; } const groupCredentialsByType = ( credentials: Credentials ): GroupedCredentials => { return Object.entries(credentials).reduce( (acc: GroupedCredentials, [selector, info]) => { const credentialType = determineCredentialType(selector, info); switch (credentialType) { case "password": acc.passwords.push(selector); break; case "email": acc.emails.push(selector); break; case "username": acc.usernames.push(selector); break; default: acc.others.push(selector); } return acc; }, { passwords: [], emails: [], usernames: [], others: [] } ); }; const getRobot = async () => { if (recordingId) { try { const robot = await getStoredRecording(recordingId); setRobot(robot); } catch (error) { notify("error", t("robot_edit.notifications.update_failed")); } } else { notify("error", t("robot_edit.notifications.update_failed")); } }; const handleClickShowPassword = (selector: string) => { setShowPasswords((prev) => ({ ...prev, [selector]: !prev[selector], })); }; const handleRobotNameChange = (newName: string) => { setRobot((prev) => prev ? { ...prev, recording_meta: { ...prev.recording_meta, name: newName } } : prev ); }; const handleCredentialChange = (selector: string, value: string) => { setCredentials((prev) => ({ ...prev, [selector]: { ...prev[selector], value, }, })); }; const handleLimitChange = ( pairIndex: number, actionIndex: number, argIndex: number, newLimit: number ) => { setRobot((prev) => { if (!prev) return prev; const updatedWorkflow = [...prev.recording.workflow]; const pair = updatedWorkflow[pairIndex]; const action = pair?.what?.[actionIndex]; if ( updatedWorkflow.length > pairIndex && pair?.what && pair.what.length > actionIndex && action?.args && action.args.length > argIndex ) { if (action.args[argIndex]) { action.args[argIndex].limit = newLimit; } setScrapeListLimits((prev) => { return prev.map((item) => { if ( item.pairIndex === pairIndex && item.actionIndex === actionIndex && item.argIndex === argIndex ) { return { ...item, currentLimit: newLimit }; } return item; }); }); } return { ...prev, recording: { ...prev.recording, workflow: updatedWorkflow }, }; }); }; const handleActionNameChange = ( pairIndex: number, actionIndex: number, newName: string ) => { setRobot((prev) => { if (!prev) return prev; const updatedWorkflow = [...prev.recording.workflow]; if ( updatedWorkflow.length > pairIndex && updatedWorkflow[pairIndex]?.what && updatedWorkflow[pairIndex].what.length > actionIndex ) { const action = { ...updatedWorkflow[pairIndex].what[actionIndex] }; // update the standard name field action.name = newName; updatedWorkflow[pairIndex].what[actionIndex] = action; } return { ...prev, recording: { ...prev.recording, workflow: updatedWorkflow }, }; }); }; const handleTargetUrlChange = (newUrl: string) => { setRobot((prev) => { if (!prev) return prev; return { ...prev, recording_meta: { ...prev.recording_meta, url: newUrl }, }; }); }; const renderAllCredentialFields = () => { return ( <> {renderCredentialFields( credentialGroups.usernames, t("Username") )} {renderCredentialFields(credentialGroups.emails, t("Email"))} {renderCredentialFields( credentialGroups.passwords, t("Password") )} {renderCredentialFields(credentialGroups.others, t("Other"))} ); }; const renderScrapeListLimitFields = () => { if (scrapeListLimits.length === 0) return null; return ( <> {t("List Limits")} {scrapeListLimits.map((limitInfo, index) => { const scrapeListAction = robot?.recording?.workflow?.[limitInfo.pairIndex]?.what?.[limitInfo.actionIndex]; const actionName = scrapeListAction?.name || `List Limit ${index + 1}`; return ( { const value = parseInt(e.target.value, 10); if (value >= 1) { handleLimitChange( limitInfo.pairIndex, limitInfo.actionIndex, limitInfo.argIndex, value ); } }} inputProps={{ min: 1 }} style={{ marginBottom: "20px" }} /> ); })} ); }; const renderActionNameFields = () => { if (!robot || !robot.recording || !robot.recording.workflow) return null; const editableActions = new Set(['screenshot', 'scrapeList', 'scrapeSchema']); const textInputs: JSX.Element[] = []; const screenshotInputs: JSX.Element[] = []; const listInputs: JSX.Element[] = []; let screenshotCount = 0; let listCount = 0; robot.recording.workflow.forEach((pair, pairIndex) => { if (!pair.what) return; pair.what.forEach((action, actionIndex) => { if (!editableActions.has(String(action.action))) return; let currentName = action.name || ''; if (!currentName) { switch (action.action) { case 'scrapeSchema': currentName = 'Texts'; break; case 'screenshot': screenshotCount++; currentName = `Screenshot ${screenshotCount}`; break; case 'scrapeList': listCount++; currentName = `List ${listCount}`; break; } } else { switch (action.action) { case 'screenshot': screenshotCount++; break; case 'scrapeList': listCount++; break; } } const textField = ( handleActionNameChange(pairIndex, actionIndex, e.target.value)} style={{ marginBottom: '12px' }} fullWidth /> ); switch (action.action) { case 'scrapeSchema': { const existingName = currentName || "Texts"; if (!textInputs.length) { textInputs.push( { const newName = e.target.value; setRobot((prev) => { if (!prev?.recording?.workflow) return prev; const updated = { ...prev }; updated.recording = { ...prev.recording }; updated.recording.workflow = prev.recording.workflow.map((p) => ({ ...p, what: p.what?.map((a) => { if (a.action === "scrapeSchema") { const updatedAction = { ...a }; updatedAction.name = newName; return updatedAction; } return a; }), })); return updated; }); }} style={{ marginBottom: "12px" }} fullWidth /> ); } break; } case 'screenshot': screenshotInputs.push(textField); break; case 'scrapeList': listInputs.push(textField); break; } }); }); const hasAnyInputs = textInputs.length > 0 || screenshotInputs.length > 0 || listInputs.length > 0; if (!hasAnyInputs) return null; return ( <> {t('Actions')} {textInputs.length > 0 && ( <> Texts {textInputs} )} {screenshotInputs.length > 0 && ( <> 0 ? '16px' : '0' }}> Screenshots {screenshotInputs} )} {listInputs.length > 0 && ( <> 0 || screenshotInputs.length > 0) ? '16px' : '0' }}> Lists {listInputs} )} ); }; const renderCredentialFields = ( selectors: string[], headerText: string, ) => { if (selectors.length === 0) return null; return ( <> {selectors.map((selector, index) => { const isVisible = showPasswords[selector]; return ( handleCredentialChange(selector, e.target.value)} fullWidth style={{ marginBottom: "20px" }} InputProps={{ endAdornment: ( handleClickShowPassword(selector)} edge="end" disabled={!credentials[selector]?.value} > {isVisible ? : } ), }} /> ); })} ); }; const getTargetUrl = () => { let url = robot?.recording_meta.url; if (!url) { const lastPair = robot?.recording.workflow[robot?.recording.workflow.length - 1]; url = lastPair?.what.find((action) => action.action === "goto") ?.args?.[0]; } return url; }; const renderCrawlConfigFields = () => { if (robot?.recording_meta.type !== 'crawl') return null; return ( <> { const value = parseInt(e.target.value, 10); if (value >= 1) { setCrawlConfig((prev) => ({ ...prev, limit: value })); } }} inputProps={{ min: 1 }} style={{ marginBottom: "20px" }} /> Crawl Scope { const value = parseInt(e.target.value, 10); if (value >= 1) { setCrawlConfig((prev) => ({ ...prev, maxDepth: value })); } }} inputProps={{ min: 1 }} sx={{ mb: 2 }} helperText="How many links deep to follow (default: 3)" /> { const paths = e.target.value ? e.target.value.split(',').map(p => p.trim()) : []; setCrawlConfig((prev) => ({ ...prev, includePaths: paths })); }} sx={{ mb: 2 }} helperText="Only crawl URLs matching these paths (comma-separated)" /> { const paths = e.target.value ? e.target.value.split(',').map(p => p.trim()) : []; setCrawlConfig((prev) => ({ ...prev, excludePaths: paths })); }} sx={{ mb: 2 }} helperText="Skip URLs matching these paths (comma-separated)" /> setCrawlConfig((prev) => ({ ...prev, useSitemap: e.target.checked }))} /> } label="Use sitemap.xml for URL discovery" /> setCrawlConfig((prev) => ({ ...prev, followLinks: e.target.checked }))} /> } label="Follow links on pages" /> setCrawlConfig((prev) => ({ ...prev, respectRobots: e.target.checked }))} /> } label="Respect robots.txt" /> ); }; const renderSearchConfigFields = () => { if (robot?.recording_meta.type !== 'search') return null; return ( <> { setSearchConfig((prev) => ({ ...prev, query: e.target.value })); }} sx={{ mb: 2 }} /> { const value = parseInt(e.target.value, 10); if (value >= 1) { setSearchConfig((prev) => ({ ...prev, limit: value })); } }} inputProps={{ min: 1 }} sx={{ mb: 2 }} /> Mode Time Range ); }; const handleSave = async () => { if (!robot) return; setIsLoading(true); try { const credentialsForPayload = Object.entries(credentials).reduce( (acc, [selector, info]) => { const enforceType = info.type === "password" ? "password" : "text"; acc[selector] = { value: info.value, type: enforceType, }; return acc; }, {} as Record ); const targetUrl = getTargetUrl(); let updatedWorkflow = robot.recording.workflow; if (robot.recording_meta.type === 'crawl') { updatedWorkflow = updatedWorkflow.map((pair: any) => { if (!pair.what) return pair; return { ...pair, what: pair.what.map((action: any) => { if (action.action === 'crawl') { return { ...action, args: [{ ...crawlConfig }] }; } return action; }) }; }); } if (robot.recording_meta.type === 'search') { updatedWorkflow = updatedWorkflow.map((pair: any) => { if (!pair.what) return pair; return { ...pair, what: pair.what.map((action: any) => { if (action.action === 'search') { return { ...action, args: [{ ...searchConfig, provider: 'duckduckgo' }] }; } return action; }) }; }); } const payload: any = { name: robot.recording_meta.name, limits: scrapeListLimits.map((limit) => ({ pairIndex: limit.pairIndex, actionIndex: limit.actionIndex, argIndex: limit.argIndex, limit: limit.currentLimit, })), credentials: credentialsForPayload, targetUrl: targetUrl, workflow: updatedWorkflow, }; const success = await updateRecording(robot.recording_meta.id, payload); if (success) { setRerenderRobots(true); notify("success", t("robot_edit.notifications.update_success")); handleStart(robot); const basePath = "/robots"; navigate(basePath); } else { notify("error", t("robot_edit.notifications.update_failed")); } } catch (error) { notify("error", t("robot_edit.notifications.update_error")); console.error("Error updating robot:", error); } finally { setIsLoading(false); } }; const handleCancel = () => { const basePath = "/robots"; navigate(basePath); }; return ( <> {robot && ( <> handleRobotNameChange(e.target.value)} style={{ marginBottom: "20px" }} /> {robot.recording_meta.type !== 'search' && ( handleTargetUrlChange(e.target.value)} style={{ marginBottom: "20px" }} /> )} {renderCrawlConfigFields()} {renderSearchConfigFields()} {renderScrapeListLimitFields()} {renderActionNameFields()} {renderAllCredentialFields()} )} ); }; ================================================ FILE: src/components/robot/pages/RobotIntegrationPage.tsx ================================================ import React, { useState, useEffect } from "react"; import { MenuItem, Typography, CircularProgress, Alert, AlertTitle, Button, TextField, IconButton, Box, Chip, Card, CardContent, CardActions, Switch, FormControlLabel, Table, TableBody, TableCell, TableContainer, TableHead, TableRow, Paper, } from "@mui/material"; import { Add as AddIcon, Delete as DeleteIcon, Edit as EditIcon, Science as ScienceIcon, } from "@mui/icons-material"; import axios from "axios"; import { useGlobalInfoStore } from "../../../context/globalInfo"; import { getStoredRecording } from "../../../api/storage"; import { apiUrl } from "../../../apiConfig.js"; import { v4 as uuid } from "uuid"; import { useTranslation } from "react-i18next"; import { useNavigate, useLocation } from "react-router-dom"; import { addWebhook, updateWebhook, removeWebhook, getWebhooks, testWebhook, WebhookConfig, } from "../../../api/webhook"; import { RobotConfigPage } from "./RobotConfigPage"; interface IntegrationProps { handleStart: (data: IntegrationSettings) => void; robotPath?: string; preSelectedIntegrationType?: "googleSheets" | "airtable" | "webhook" | null; } export interface IntegrationSettings { spreadsheetId?: string; spreadsheetName?: string; airtableBaseId?: string; airtableBaseName?: string; airtableTableName?: string; airtableTableId?: string; webhooks?: WebhookConfig[]; data: string; integrationType: "googleSheets" | "airtable" | "webhook"; } export const RobotIntegrationPage = ({ handleStart, robotPath = "robots", preSelectedIntegrationType = null, }: IntegrationProps) => { const { t } = useTranslation(); const navigate = useNavigate(); const location = useLocation(); const pathSegments = location.pathname.split('/'); const robotsIndex = pathSegments.findIndex(segment => segment === 'robots' || segment === 'prebuilt-robots'); const integrateIndex = pathSegments.findIndex(segment => segment === 'integrate'); const robotIdFromUrl = robotsIndex !== -1 && robotsIndex + 1 < pathSegments.length ? pathSegments[robotsIndex + 1] : null; const integrationType = integrateIndex !== -1 && integrateIndex + 1 < pathSegments.length ? pathSegments[integrateIndex + 1] as "googleSheets" | "airtable" | "webhook" : preSelectedIntegrationType || null; const [settings, setSettings] = useState({ spreadsheetId: "", spreadsheetName: "", airtableBaseId: "", airtableBaseName: "", airtableTableName: "", airtableTableId: "", webhooks: [], data: "", integrationType: integrationType || "airtable", }); const [spreadsheets, setSpreadsheets] = useState<{ id: string; name: string }[]>([]); const [airtableBases, setAirtableBases] = useState<{ id: string; name: string }[]>([]); const [airtableTables, setAirtableTables] = useState<{ id: string; name: string }[]>([]); const [loading, setLoading] = useState(false); const [error, setError] = useState(null); const [showWebhookForm, setShowWebhookForm] = useState(false); const [editingWebhook, setEditingWebhook] = useState(null); const [newWebhook, setNewWebhook] = useState({ id: "", url: "", events: ["run_completed"], active: true, }); const [urlError, setUrlError] = useState(null); const { recordingId: recordingIdFromStore, notify, setRerenderRobots, setRecordingId } = useGlobalInfoStore(); const recordingId = robotIdFromUrl || recordingIdFromStore; useEffect(() => { if (robotIdFromUrl && robotIdFromUrl !== recordingIdFromStore) { setRecordingId(robotIdFromUrl); } }, [robotIdFromUrl, recordingIdFromStore, setRecordingId]); const [recording, setRecording] = useState(null); const [selectedIntegrationType, setSelectedIntegrationType] = useState< "googleSheets" | "airtable" | "webhook" | null >(integrationType); const isScrapeRobot = recording?.recording_meta?.type === "scrape"; const authenticateWithGoogle = () => { if (!recordingId) { console.error("Cannot authenticate: recordingId is null"); return; } const basePath = robotPath === "prebuilt-robots" ? "/prebuilt-robots" : "/robots"; const redirectUrl = `${window.location.origin}${basePath}/${recordingId}/integrate/googleSheets`; window.location.href = `${apiUrl}/auth/google?robotId=${recordingId}&redirectUrl=${encodeURIComponent(redirectUrl)}`; }; const authenticateWithAirtable = () => { if (!recordingId) { console.error("Cannot authenticate: recordingId is null"); return; } const basePath = robotPath === "prebuilt-robots" ? "/prebuilt-robots" : "/robots"; const redirectUrl = `${window.location.origin}${basePath}/${recordingId}/integrate/airtable`; window.location.href = `${apiUrl}/auth/airtable?robotId=${recordingId}&redirectUrl=${encodeURIComponent(redirectUrl)}`; }; const validateWebhookData = ( url: string, events: string[], excludeId?: string ) => { if (!url) { setUrlError("Please provide webhook URL"); return false; } try { new URL(url); } catch { setUrlError("Please provide a valid URL"); return false; } const existingWebhook = settings.webhooks?.find( (webhook) => webhook.url === url && webhook.id !== excludeId ); if (existingWebhook) { setUrlError("This webhook URL is already in use"); return false; } if (!events || events.length === 0) { setUrlError("Please select at least one event"); return false; } setUrlError(null); return true; }; const fetchWebhooks = async () => { try { setLoading(true); if (!recordingId) return; const response = await getWebhooks(recordingId); if (response.ok && response.webhooks) { setSettings((prev) => ({ ...prev, webhooks: response.webhooks })); } setLoading(false); } catch (error: any) { setLoading(false); console.error("Error fetching webhooks:", error); } }; const addWebhookSetting = async () => { if (!validateWebhookData(newWebhook.url, newWebhook.events)) { if (!newWebhook.url) notify("error", "Please provide webhook URL"); else if (!newWebhook.events || newWebhook.events.length === 0) notify("error", "Please select at least one event"); return; } if (!recordingId) return; try { setLoading(true); const webhookWithId = { ...newWebhook, id: uuid() }; const response = await addWebhook(webhookWithId, recordingId); if (response.ok) { setSettings((prev) => ({ ...prev, webhooks: [...(prev.webhooks || []), webhookWithId] })); setNewWebhook({ id: "", url: "", events: ["run_completed"], active: true }); setShowWebhookForm(false); notify("success", "Webhook added successfully"); } else { notify("error", response.message || "Failed to add webhook"); } setLoading(false); } catch (error: any) { setLoading(false); notify("error", "Failed to add webhook"); console.error("Error adding webhook:", error); } }; const updateWebhookSetting = async () => { if (!validateWebhookData(newWebhook.url, newWebhook.events, editingWebhook || undefined)) return; if (!recordingId || !editingWebhook) return; try { setLoading(true); const response = await updateWebhook({ ...newWebhook, id: editingWebhook }, recordingId); if (response.ok) { setSettings((prev) => ({ ...prev, webhooks: (prev.webhooks || []).map((webhook) => webhook.id === editingWebhook ? { ...newWebhook, id: editingWebhook } : webhook ), })); setNewWebhook({ id: "", url: "", events: ["run_completed"], active: true }); setEditingWebhook(null); setShowWebhookForm(false); notify("success", "Webhook updated successfully"); } else { notify("error", response.message || "Failed to update webhook"); } setLoading(false); } catch (error: any) { setLoading(false); notify("error", "Failed to update webhook"); console.error("Error updating webhook:", error); } }; const deleteWebhookSetting = async (webhookId: string) => { if (!recordingId) return; try { setLoading(true); const response = await removeWebhook(webhookId, recordingId); if (response.ok) { setSettings((prev) => ({ ...prev, webhooks: (prev.webhooks || []).filter((webhook) => webhook.id !== webhookId) })); if (recordingId) { const updatedRecording = await getStoredRecording(recordingId); setRecording(updatedRecording); } setRerenderRobots(true); notify("success", "Webhook removed successfully"); } else { notify("error", response.error || "Failed to remove webhook"); } setLoading(false); } catch (error: any) { setLoading(false); notify("error", "Failed to remove webhook"); console.error("Error removing webhook:", error); } }; const testWebhookSetting = async (webhookId: string) => { if (!recordingId) return; const webhook = settings.webhooks?.find(w => w.id === webhookId); if (!webhook) return; try { setLoading(true); const response = await testWebhook(webhook, recordingId); if (response.ok) { notify("success", "Test webhook sent successfully"); } else { notify("error", response.message || "Failed to test webhook"); } setLoading(false); } catch (error: any) { setLoading(false); notify("error", "Failed to test webhook"); console.error("Error testing webhook:", error); } }; useEffect(() => { setSelectedIntegrationType(integrationType); setSettings(prev => ({ ...prev, integrationType: integrationType || "airtable" })); }, [integrationType]); useEffect(() => { const fetchRecording = async () => { if (recordingId) { try { const recordingData = await getStoredRecording(recordingId); setRecording(recordingData); } catch (error) { console.error("Failed to fetch recording:", error); } } }; fetchRecording(); if (selectedIntegrationType === "webhook") { fetchWebhooks(); } }, [recordingId, selectedIntegrationType]); const handleCancel = () => { const basePath = robotPath === "prebuilt-robots" ? "/prebuilt-robots" : "/robots"; navigate(basePath); }; const fetchSpreadsheetFiles = async () => { try { setLoading(true); const response = await axios.get(`${apiUrl}/auth/gsheets/files?robotId=${recordingId}`, { withCredentials: true }); setSpreadsheets(response.data); setLoading(false); } catch (error: any) { setLoading(false); console.error("Error fetching spreadsheet files:", error); notify("error", t("integration_settings.google.errors.fetch_error", { message: error.response?.data?.message || error.message, })); } }; const handleSpreadsheetSelect = (e: React.ChangeEvent) => { const selectedSheet = spreadsheets.find((sheet) => sheet.id === e.target.value); if (selectedSheet) { setSettings({ ...settings, spreadsheetId: selectedSheet.id, spreadsheetName: selectedSheet.name }); } }; const updateGoogleSheetId = async () => { try { setLoading(true); await axios.post(`${apiUrl}/auth/gsheets/update`, { spreadsheetId: settings.spreadsheetId, spreadsheetName: settings.spreadsheetName, robotId: recordingId, }, { withCredentials: true }); if (recordingId) { const updatedRecording = await getStoredRecording(recordingId); setRecording(updatedRecording); } setRerenderRobots(true); notify("success", t("integration_settings.google.notifications.sheet_selected")); setLoading(false); } catch (error: any) { setLoading(false); console.error("Error updating Google Sheet ID:", error); notify("error", t("integration_settings.google.errors.update_error", { message: error.response?.data?.message || error.message, })); } }; const removeGoogleSheetsIntegration = async () => { try { setLoading(true); await axios.post(`${apiUrl}/auth/gsheets/remove`, { robotId: recordingId }, { withCredentials: true }); setSpreadsheets([]); setSettings({ ...settings, spreadsheetId: "", spreadsheetName: "" }); if (recordingId) { const updatedRecording = await getStoredRecording(recordingId); setRecording(updatedRecording); } setRerenderRobots(true); notify("success", t("integration_settings.google.notifications.integration_removed")); setLoading(false); } catch (error: any) { setLoading(false); console.error("Error removing Google Sheets integration:", error); notify("error", t("integration_settings.google.errors.remove_error", { message: error.response?.data?.message || error.message, })); } }; const fetchAirtableBases = async () => { try { setLoading(true); const response = await axios.get(`${apiUrl}/auth/airtable/bases?robotId=${recordingId}`, { withCredentials: true }); setAirtableBases(response.data); setLoading(false); } catch (error: any) { setLoading(false); console.error("Error fetching Airtable bases:", error); notify("error", t("integration_settings.airtable.errors.fetch_error", { message: error.response?.data?.message || error.message, })); } }; const fetchAirtableTables = async (baseId: string, recordingId: string) => { try { setLoading(true); const response = await axios.get(`${apiUrl}/auth/airtable/tables?robotId=${recordingId}&baseId=${baseId}`, { withCredentials: true }); setAirtableTables(response.data); setLoading(false); } catch (error: any) { setLoading(false); console.error("Error fetching Airtable tables:", error); notify("error", t("integration_settings.airtable.errors.fetch_tables_error", { message: error.response?.data?.message || error.message, })); } }; const handleAirtableBaseSelect = async (e: React.ChangeEvent) => { const selectedBase = airtableBases.find((base) => base.id === e.target.value); if (selectedBase) { setSettings((prevSettings) => ({ ...prevSettings, airtableBaseId: selectedBase.id, airtableBaseName: selectedBase.name })); if (recordingId) await fetchAirtableTables(selectedBase.id, recordingId); } }; const handleAirtabletableSelect = (e: React.ChangeEvent) => { const selectedTable = airtableTables.find((table) => table.id === e.target.value); if (selectedTable) { setSettings((prevSettings) => ({ ...prevSettings, airtableTableId: e.target.value, airtableTableName: selectedTable?.name || "" })); } }; const updateAirtableBase = async () => { try { setLoading(true); await axios.post(`${apiUrl}/auth/airtable/update`, { baseId: settings.airtableBaseId, baseName: settings.airtableBaseName, robotId: recordingId, tableName: settings.airtableTableName, tableId: settings.airtableTableId, }, { withCredentials: true }); if (recordingId) { const updatedRecording = await getStoredRecording(recordingId); setRecording(updatedRecording); } setRerenderRobots(true); notify("success", t("integration_settings.airtable.notifications.base_selected")); setLoading(false); } catch (error: any) { setLoading(false); console.error("Error updating Airtable base:", error); notify("error", t("integration_settings.airtable.errors.update_error", { message: error.response?.data?.message || error.message, })); } }; const removeAirtableIntegration = async () => { try { setLoading(true); await axios.post(`${apiUrl}/auth/airtable/remove`, { robotId: recordingId }, { withCredentials: true }); setAirtableBases([]); setAirtableTables([]); setSettings({ ...settings, airtableBaseId: "", airtableBaseName: "", airtableTableName: "", airtableTableId: "" }); if (recordingId) { const updatedRecording = await getStoredRecording(recordingId); setRecording(updatedRecording); } setRerenderRobots(true); notify("success", t("integration_settings.airtable.notifications.integration_removed")); setLoading(false); } catch (error: any) { setLoading(false); console.error("Error removing Airtable integration:", error); notify("error", t("integration_settings.airtable.errors.remove_error", { message: error.response?.data?.message || error.message, })); } }; const renderGoogleSheetsIntegration = () => ( <> {t("integration_settings.google.title")} {recording?.google_sheet_id ? ( <> {t("integration_settings.google.alerts.success.title")} {t("integration_settings.google.alerts.success.content", { sheetName: recording.google_sheet_name })} {t("integration_settings.google.alerts.success.here")} ) : ( <> {!recording?.google_sheet_email ? ( <>

{t("integration_settings.google.descriptions.sync_info")}

) : ( <> {t("integration_settings.google.descriptions.authenticated_as", { email: recording.google_sheet_email })} {loading ? ( ) : error ? ( {error} ) : spreadsheets.length === 0 ? ( ) : ( <> {spreadsheets.map((sheet) => ({sheet.name}))} )} )} )} ); const renderAirtableIntegration = () => ( <> {t("integration_settings.airtable.title")} {recording?.airtable_base_id ? ( <> {t("integration_settings.airtable.alerts.success.title")} {t("integration_settings.airtable.alerts.success.content", { baseName: recording.airtable_base_name, tableName: recording.airtable_table_name })} {t("integration_settings.airtable.alerts.success.here")} ) : ( <> {!recording?.airtable_access_token ? ( <>

{t("integration_settings.airtable.descriptions.sync_info")}

) : ( <> {t("integration_settings.airtable.descriptions.authenticated_as")} {loading ? ( ) : error ? ( {error} ) : airtableBases.length === 0 ? ( ) : ( <> {airtableBases.map((base) => ({base.name}))} {airtableTables.map((table) => ({table.name}))} )} )} )} ); const getIntegrationTitle = () => { switch (selectedIntegrationType) { case "googleSheets": return "Google Sheets Integration"; case "airtable": return "Airtable Integration"; case "webhook": return "Webhook Integration"; default: return "Integrations"; } }; const editWebhookSetting = (webhook: WebhookConfig) => { setNewWebhook(webhook); setEditingWebhook(webhook.id); setShowWebhookForm(true); }; const resetWebhookForm = () => { setNewWebhook({ id: "", url: "", events: ["run_completed"], active: true }); setShowWebhookForm(false); setEditingWebhook(null); setUrlError(null); }; const toggleWebhookStatusSetting = async (webhookId: string) => { if (!recordingId) return; try { const webhook = settings.webhooks?.find((w) => w.id === webhookId); if (!webhook) return; const updatedWebhook = { ...webhook, active: !webhook.active }; const response = await updateWebhook(updatedWebhook, recordingId); if (response.ok) { const updatedWebhooks = (settings.webhooks || []).map((w) => w.id === webhookId ? updatedWebhook : w); setSettings({ ...settings, webhooks: updatedWebhooks }); if (recordingId) { const updatedRecording = await getStoredRecording(recordingId); setRecording(updatedRecording); } setRerenderRobots(true); notify("success", `Webhook ${updatedWebhook.active ? "enabled" : "disabled"}`); } else { notify("error", response.message || "Failed to update webhook"); } } catch (error: any) { console.error("Error toggling webhook status:", error); notify("error", "Failed to update webhook"); } }; const formatEventName = (event: string) => { switch (event) { case "run_completed": return "Run finished"; case "run_failed": return "Run failed"; default: return event; } }; const formatLastCalled = (lastCalledAt?: string | null) => { if (!lastCalledAt) return "Not called yet"; const date = new Date(lastCalledAt); const now = new Date(); const diffMs = now.getTime() - date.getTime(); const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24)); const diffHours = Math.floor(diffMs / (1000 * 60 * 60)); const diffMinutes = Math.floor(diffMs / (1000 * 60)); if (diffMinutes < 1) return "Just now"; else if (diffMinutes < 60) return `${diffMinutes} minute${diffMinutes === 1 ? "" : "s"} ago`; else if (diffHours < 24) return `${diffHours} hour${diffHours === 1 ? "" : "s"} ago`; else if (diffDays < 7) return `${diffDays} day${diffDays === 1 ? "" : "s"} ago`; else return date.toLocaleDateString("en-US", { year: "numeric", month: "short", day: "numeric", hour: "2-digit", minute: "2-digit" }); }; const handleBack = () => { if (!recordingId) { console.error("Cannot navigate: recordingId is null"); return; } setSelectedIntegrationType(null); setSettings({ ...settings, integrationType: "airtable" }); const basePath = robotPath === "prebuilt-robots" ? "/prebuilt-robots" : "/robots"; navigate(`${basePath}/${recordingId}/integrate`); }; // --- MAIN RENDER --- if (!selectedIntegrationType && !integrationType) { return (
{!isScrapeRobot && ( )} {!isScrapeRobot && ( )}
); } return (
{(selectedIntegrationType === "googleSheets" || integrationType === "googleSheets") && ( <>{renderGoogleSheetsIntegration()} )} {(selectedIntegrationType === "airtable" || integrationType === "airtable") && ( <>{renderAirtableIntegration()} )} {(selectedIntegrationType === "webhook" || integrationType === "webhook") && ( <> Integrate using Webhooks {settings.webhooks && settings.webhooks.length > 0 && ( Webhook URL Call when Last called Status Actions {settings.webhooks.map((webhook) => ( {webhook.url} {webhook.events.map((event) => ())} {formatLastCalled(webhook.lastCalledAt)} toggleWebhookStatusSetting(webhook.id)} size="small" /> testWebhookSetting(webhook.id)} disabled={loading || !webhook.active} title="Test"> editWebhookSetting(webhook)} disabled={loading} title="Edit"> deleteWebhookSetting(webhook.id)} disabled={loading} title="Delete"> ))}
)} {!showWebhookForm && ( { setNewWebhook({ ...newWebhook, url: e.target.value }); if (urlError) setUrlError(null); }} error={!!urlError} helperText={urlError} required aria-describedby="webhook-url-help" /> setNewWebhook({ ...newWebhook, events: [e.target.value] })} sx={{ minWidth: "200px" }} required> Run finished Run failed Refer to the API documentation for examples and details. )} {showWebhookForm && ( {editingWebhook ? "Edit Webhook" : "Add New Webhook"} { setNewWebhook({ ...newWebhook, url: e.target.value }); if (urlError) setUrlError(null); }} sx={{ marginBottom: "15px" }} placeholder="https://your-api.com/webhook/endpoint" required error={!!urlError} helperText={urlError} /> setNewWebhook({ ...newWebhook, events: typeof e.target.value === "string" ? [e.target.value] : e.target.value })} SelectProps={{ multiple: true, renderValue: (selected) => ({(selected as string[]).map((value) => ())}), }} sx={{ marginBottom: "20px" }} required> Run finished Run failed setNewWebhook({ ...newWebhook, active: e.target.checked })} />} label="Active" sx={{ marginBottom: "10px" }} /> )} )}
); }; ================================================ FILE: src/components/robot/pages/RobotSettingsPage.tsx ================================================ import { useState, useEffect } from "react"; import { useTranslation } from "react-i18next"; import { TextField, Box } from "@mui/material"; import { useGlobalInfoStore } from "../../../context/globalInfo"; import { getStoredRecording } from "../../../api/storage"; import { WhereWhatPair } from "maxun-core"; import { getUserById } from "../../../api/auth"; import { RobotConfigPage } from "./RobotConfigPage"; import { useNavigate, useLocation } from "react-router-dom"; interface RobotMeta { name: string; id: string; createdAt: string; pairs: number; updatedAt: string; params: any[]; type?: 'extract' | 'scrape' | 'crawl' | 'search'; url?: string; formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[]; isLLM?: boolean; } interface RobotWorkflow { workflow: WhereWhatPair[]; } interface ScheduleConfig { runEvery: number; runEveryUnit: "MINUTES" | "HOURS" | "DAYS" | "WEEKS" | "MONTHS"; startFrom: | "SUNDAY" | "MONDAY" | "TUESDAY" | "WEDNESDAY" | "THURSDAY" | "FRIDAY" | "SATURDAY"; atTimeStart?: string; atTimeEnd?: string; timezone: string; lastRunAt?: Date; nextRunAt?: Date; cronExpression?: string; } export interface RobotSettings { id: string; userId?: number; recording_meta: RobotMeta; recording: RobotWorkflow; google_sheet_email?: string | null; google_sheet_name?: string | null; google_sheet_id?: string | null; google_access_token?: string | null; google_refresh_token?: string | null; schedule?: ScheduleConfig | null; } interface RobotSettingsProps { handleStart: (settings: RobotSettings) => void; } export const RobotSettingsPage = ({ handleStart }: RobotSettingsProps) => { const { t } = useTranslation(); const navigate = useNavigate(); const location = useLocation(); const [userEmail, setUserEmail] = useState(null); const [robot, setRobot] = useState(null); const { recordingId, notify } = useGlobalInfoStore(); useEffect(() => { getRobot(); }, []); const getRobot = async () => { if (recordingId) { try { const robot = await getStoredRecording(recordingId); setRobot(robot); } catch (error) { notify("error", t("robot_settings.errors.robot_not_found")); } } else { notify("error", t("robot_settings.errors.robot_not_found")); } }; const getTargetUrl = () => { let url = robot?.recording_meta.url; if (!url) { const lastPair = robot?.recording.workflow[robot?.recording.workflow.length - 1]; url = lastPair?.what.find((action) => action.action === "goto") ?.args?.[0]; } return url; }; useEffect(() => { const fetchUserEmail = async () => { if (robot && robot.userId) { try { const userData = await getUserById(robot.userId.toString()); if (userData && userData.user) { setUserEmail(userData.user.email); } } catch (error) { console.error("Failed to fetch user email:", error); } } }; fetchUserEmail(); }, [robot?.userId]); const targetUrl = getTargetUrl(); return ( <> {robot && ( <> {robot.recording_meta.type !== 'search' && ( )} {(() => { let listCounter = 1; return robot.recording.workflow.flatMap((wf, wfIndex) => wf.what.flatMap((action, actionIndex) => { const argsWithLimit = action.args?.filter( (arg: any) => arg && typeof arg === "object" && arg.limit !== undefined ); if (!argsWithLimit?.length) return []; return argsWithLimit.map((arg, limitIndex) => { const labelName = action.name || `List ${listCounter++}`; return ( ); }); }) ); })()} )} ); }; ================================================ FILE: src/components/robot/pages/ScheduleSettingsPage.tsx ================================================ import React, { useState, useEffect } from "react"; import { useTranslation } from "react-i18next"; import { MenuItem, TextField, Typography, Box, Button, } from "@mui/material"; import { Dropdown } from "../../ui/DropdownMui"; import { validMomentTimezones } from "../../../constants/const"; import { useGlobalInfoStore } from "../../../context/globalInfo"; import { getSchedule, deleteSchedule } from "../../../api/storage"; import { RobotConfigPage } from "./RobotConfigPage"; import { useNavigate, useLocation } from "react-router-dom"; interface ScheduleSettingsProps { handleStart: (settings: ScheduleSettings) => void; } export interface ScheduleSettings { runEvery: number; runEveryUnit: string; startFrom: string; dayOfMonth?: string; atTimeStart?: string; atTimeEnd?: string; timezone: string; } export const ScheduleSettingsPage = ({ handleStart, }: ScheduleSettingsProps) => { const { t } = useTranslation(); const navigate = useNavigate(); const location = useLocation(); const [schedule, setSchedule] = useState(null); const [settings, setSettings] = useState({ runEvery: 1, runEveryUnit: "HOURS", startFrom: "MONDAY", dayOfMonth: "1", atTimeStart: "00:00", atTimeEnd: "01:00", timezone: "UTC", }); const [isLoading, setIsLoading] = useState(false); const handleChange = ( field: keyof ScheduleSettings, value: string | number | boolean ) => { setSettings((prev) => ({ ...prev, [field]: value })); }; const units = ["MINUTES", "HOURS", "DAYS", "WEEKS", "MONTHS"]; const days = [ "MONDAY", "TUESDAY", "WEDNESDAY", "THURSDAY", "FRIDAY", "SATURDAY", "SUNDAY", ]; const { recordingId, notify } = useGlobalInfoStore(); const deleteRobotSchedule = async () => { if (recordingId) { setIsLoading(true); try { await deleteSchedule(recordingId); setSchedule(null); notify("success", t("Schedule deleted successfully")); setSettings({ runEvery: 1, runEveryUnit: "HOURS", startFrom: "MONDAY", dayOfMonth: "", atTimeStart: "00:00", atTimeEnd: "01:00", timezone: "UTC", }); } catch (error) { notify("error", "Failed to delete schedule"); } finally { setIsLoading(false); } } else { console.error("No recording id provided"); } }; const getRobotSchedule = async () => { if (recordingId) { try { const scheduleData = await getSchedule(recordingId); setSchedule(scheduleData); } catch (error) { console.error("Failed to fetch robot schedule:", error); } } else { console.error("No recording id provided"); } }; useEffect(() => { const fetchSchedule = async () => { await getRobotSchedule(); }; fetchSchedule(); }, []); const handleSave = async () => { setIsLoading(true); try { await handleStart(settings); const basePath = location.pathname.includes("/prebuilt-robots") ? "/prebuilt-robots" : "/robots"; navigate(basePath); } catch (error) { notify("error", "Failed to save schedule"); } finally { setIsLoading(false); } }; const handleCancel = () => { const basePath = location.pathname.includes("/prebuilt-robots") ? "/prebuilt-robots" : "/robots"; navigate(basePath); }; const getDayOrdinal = (day: string | undefined) => { if (!day) return ""; const lastDigit = day.slice(-1); const lastTwoDigits = day.slice(-2); // Special cases for 11, 12, 13 if (["11", "12", "13"].includes(lastTwoDigits)) { return t("schedule_settings.labels.on_day.th"); } // Other cases switch (lastDigit) { case "1": return t("schedule_settings.labels.on_day.st"); case "2": return t("schedule_settings.labels.on_day.nd"); case "3": return t("schedule_settings.labels.on_day.rd"); default: return t("schedule_settings.labels.on_day.th"); } }; const textStyle = { width: "150px", height: "52px", marginRight: "10px", }; const dropDownStyle = { marginTop: "2px", width: "150px", height: "59px", marginRight: "10px", }; return ( <> {schedule !== null ? ( <> {t("schedule_settings.run_every")}: {schedule.runEvery}{" "} {schedule.runEveryUnit.toLowerCase()} {["MONTHS", "WEEKS"].includes(settings.runEveryUnit) ? t("schedule_settings.start_from") : t("schedule_settings.start_from")} : {schedule.startFrom.charAt(0).toUpperCase() + schedule.startFrom.slice(1).toLowerCase()} {schedule.runEveryUnit === "MONTHS" && ( {t("schedule_settings.on_day")}: {schedule.dayOfMonth} {getDayOrdinal(schedule.dayOfMonth)} of the month )} {t("schedule_settings.at_around")}: {schedule.atTimeStart},{" "} {schedule.timezone} {t("schedule_settings.timezone")} ) : ( <> {t("schedule_settings.labels.run_once_every")} handleChange("runEvery", parseInt(e.target.value)) } sx={textStyle} inputProps={{ min: 1 }} /> handleChange("runEveryUnit", e.target.value) } sx={dropDownStyle} > {units.map((unit) => ( {" "} {unit.charAt(0).toUpperCase() + unit.slice(1).toLowerCase()} ))} {["MONTHS", "WEEKS"].includes(settings.runEveryUnit) ? t("schedule_settings.labels.start_from_label") : t("schedule_settings.labels.start_from_label")} handleChange("startFrom", e.target.value) } sx={dropDownStyle} > {days.map((day) => ( {day.charAt(0).toUpperCase() + day.slice(1).toLowerCase()} ))} {settings.runEveryUnit === "MONTHS" && ( {t("schedule_settings.labels.on_day_of_month")} handleChange("dayOfMonth", e.target.value) } sx={textStyle} inputProps={{ min: 1, max: 31 }} /> )} {["MINUTES", "HOURS"].includes(settings.runEveryUnit) ? ( {t("schedule_settings.labels.in_between")} handleChange("atTimeStart", e.target.value) } sx={textStyle} /> handleChange("atTimeEnd", e.target.value) } sx={textStyle} /> ) : ( {t("schedule_settings.at_around")} handleChange("atTimeStart", e.target.value) } sx={textStyle} /> )} {t("schedule_settings.timezone")} handleChange("timezone", e.target.value) } sx={dropDownStyle} > {validMomentTimezones.map((tz) => ( {tz.charAt(0).toUpperCase() + tz.slice(1).toLowerCase()} ))} )} ); } ================================================ FILE: src/components/run/ColapsibleRow.tsx ================================================ import { useEffect, useRef, useState } from "react"; import * as React from "react"; import TableRow from "@mui/material/TableRow"; import TableCell from "@mui/material/TableCell"; import { Box, Collapse, IconButton, Typography, Chip, TextField } from "@mui/material"; import { Button } from "@mui/material"; import { DeleteForever, KeyboardArrowDown, KeyboardArrowUp, Settings } from "@mui/icons-material"; import { deleteRunFromStorage } from "../../api/storage"; import { columns, Data } from "./RunsTable"; import { RunContent } from "./RunContent"; import { GenericModal } from "../ui/GenericModal"; import { getUserById } from "../../api/auth"; import { useTranslation } from "react-i18next"; import { useTheme } from "@mui/material/styles"; import { io, Socket } from "socket.io-client"; import { apiUrl } from "../../apiConfig"; const socketCache = new Map(); const progressCallbacks = new Map void>>(); function getOrCreateSocket(browserId: string): Socket { if (socketCache.has(browserId)) { return socketCache.get(browserId)!; } const socket = io(`${apiUrl}/${browserId}`, { transports: ["websocket"], rejectUnauthorized: false }); socket.on('workflowProgress', (data: any) => { const callbacks = progressCallbacks.get(browserId); if (callbacks) { callbacks.forEach(cb => cb(data)); } }); socketCache.set(browserId, socket); return socket; } function cleanupSocketIfUnused(browserId: string) { const callbacks = progressCallbacks.get(browserId); if (!callbacks || callbacks.size === 0) { const socket = socketCache.get(browserId); if (socket) { socket.disconnect(); socketCache.delete(browserId); progressCallbacks.delete(browserId); } } } interface RunTypeChipProps { runByUserId?: string; runByScheduledId?: string; runByAPI: boolean; runBySDK?: boolean; } const RunTypeChip: React.FC = ({ runByUserId, runByScheduledId, runByAPI, runBySDK }) => { const { t } = useTranslation(); if (runByScheduledId) return ; if (runBySDK) return ; if (runByAPI) return ; if (runByUserId) return ; return ; }; interface CollapsibleRowProps { row: Data; handleDelete: () => void; isOpen: boolean; onToggleExpanded: (shouldExpand: boolean) => void; currentLog: string; abortRunHandler: (runId: string, robotName: string, browserId: string) => void; runningRecordingName: string; urlRunId: string | null; } export const CollapsibleRow = ({ row, handleDelete, isOpen, onToggleExpanded, currentLog, abortRunHandler, runningRecordingName, urlRunId }: CollapsibleRowProps) => { const { t } = useTranslation(); const theme = useTheme(); const [isDeleteOpen, setDeleteOpen] = useState(false); const [openSettingsModal, setOpenSettingsModal] = useState(false); const [userEmail, setUserEmail] = useState(null); const runByLabel = row.runByScheduleId ? `${row.runByScheduleId}` : row.runByUserId ? `${userEmail}` : row.runBySDK ? 'SDK' : row.runByAPI ? 'API' : 'Unknown'; const logEndRef = useRef(null); const [workflowProgress, setWorkflowProgress] = useState<{ current: number; total: number; percentage: number; } | null>(null); // Subscribe to progress updates using module-level socket cache useEffect(() => { if (!row.browserId) return; // Get or create socket (from module cache) getOrCreateSocket(row.browserId); // Register callback if (!progressCallbacks.has(row.browserId)) { progressCallbacks.set(row.browserId, new Set()); } const callback = (data: any) => { setWorkflowProgress(data); }; progressCallbacks.get(row.browserId)!.add(callback); // Cleanup: remove callback and cleanup socket if no callbacks remain return () => { const callbacks = progressCallbacks.get(row.browserId); if (callbacks) { callbacks.delete(callback); // Cleanup socket if this was the last callback cleanupSocketIfUnused(row.browserId); } }; }, [row.browserId]); // Clear progress UI when run completes and trigger socket cleanup useEffect(() => { if (row.status !== 'running' && row.status !== 'queued') { setWorkflowProgress(null); // Attempt to cleanup socket when run completes // (will only cleanup if no other callbacks exist) if (row.browserId) { cleanupSocketIfUnused(row.browserId); } } }, [row.status, row.browserId]); const handleAbort = () => { abortRunHandler(row.runId, row.name, row.browserId); } const handleRowExpand = () => { const newOpen = !isOpen; onToggleExpanded(newOpen); }; useEffect(() => { const fetchUserEmail = async () => { if (row.runByUserId) { const userData = await getUserById(row.runByUserId); if (userData && userData.user) { setUserEmail(userData.user.email); } } }; fetchUserEmail(); }, [row.runByUserId]); const handleConfirmDelete = async () => { try { const res = await deleteRunFromStorage(`${row.runId}`); if (res) { handleDelete(); } } finally { setDeleteOpen(false); } }; return ( *': { borderBottom: 'unset' } }} hover role="checkbox" tabIndex={-1} key={row.id}> {isOpen ? : } {columns.map((column) => { // @ts-ignore const value: any = row[column.id]; if (value !== undefined) { return ( {value} ); } else { switch (column.id) { case 'runStatus': return ( {row.status === 'success' && } {row.status === 'running' && } {row.status === 'scheduled' && } {row.status === 'queued' && } {row.status === 'failed' && } {row.status === 'aborted' && } ) case 'delete': return ( setDeleteOpen(true)}> ); case 'settings': return ( setOpenSettingsModal(true)}> setOpenSettingsModal(false)} modalStyle={modalStyle} > <> {t('runs_table.run_settings_modal.title')} {t('runs_table.run_settings_modal.labels.run_type')}: ) default: return null; } } })} setDeleteOpen(false)} modalStyle={{ ...modalStyle, padding: 0, backgroundColor: 'transparent', width: 'auto', maxWidth: '520px' }}> {t('runs_table.delete_confirm.title', { name: row.name, defaultValue: 'Delete run "{{name}}"?' })} {t('runs_table.delete_confirm.message', { name: row.name, defaultValue: 'Are you sure you want to delete the run "{{name}}"?' })} ); } export const modalStyle = { top: '45%', left: '50%', transform: 'translate(-50%, -50%)', width: '30%', backgroundColor: 'background.paper', p: 4, height: 'fit-content', display: 'block', padding: '20px', }; ================================================ FILE: src/components/run/InterpretationButtons.tsx ================================================ import { Box, Button, Stack, Typography, CircularProgress } from "@mui/material"; import React, { useCallback, useEffect, useState } from "react"; import { useSocketStore } from "../../context/socket"; import { useGlobalInfoStore } from "../../context/globalInfo"; import { GenericModal } from "../ui/GenericModal"; import { WhereWhatPair } from "maxun-core"; import HelpIcon from '@mui/icons-material/Help'; import { useTranslation } from "react-i18next"; interface InterpretationButtonsProps { enableStepping: (isPaused: boolean) => void; onPreviewComplete?: () => void; } interface InterpretationInfo { running: boolean; isPaused: boolean; } const interpretationInfo: InterpretationInfo = { running: false, isPaused: false, }; export const InterpretationButtons = ({ enableStepping, onPreviewComplete }: InterpretationButtonsProps) => { const { t } = useTranslation(); const [info, setInfo] = useState(interpretationInfo); const [decisionModal, setDecisionModal] = useState<{ pair: WhereWhatPair | null, actionType: string, selector: string, tagName: string, innerText: string, action: string, open: boolean }>({ pair: null, actionType: '', selector: '', action: '', tagName: '', innerText: '', open: false }); const { socket } = useSocketStore(); const { notify } = useGlobalInfoStore(); const finishedHandler = useCallback(() => { setInfo({ ...info, isPaused: false }); enableStepping(false); }, [info, enableStepping]); const breakpointHitHandler = useCallback(() => { setInfo({ running: false, isPaused: true }); notify('warning', t('interpretation_buttons.messages.restart_required')); enableStepping(true); }, [enableStepping, t]); const decisionHandler = useCallback( ({ pair, actionType, lastData }: { pair: WhereWhatPair | null, actionType: string, lastData: { selector: string, action: string, tagName: string, innerText: string } }) => { const { selector, action, tagName, innerText } = lastData; setDecisionModal((prevState) => ({ pair, actionType, selector, action, tagName, innerText, open: true, })); }, []); const handleDecision = (decision: boolean) => { const { pair, actionType } = decisionModal; socket?.emit('decision', { pair, actionType, decision }); setDecisionModal({ pair: null, actionType: '', selector: '', action: '', tagName: '', innerText: '', open: false }); }; const handleDescription = () => { if (decisionModal.actionType === 'customAction') { return ( <> {t('interpretation_buttons.modal.use_previous')} {t('interpretation_buttons.modal.previous_action')} {decisionModal.action}, {t('interpretation_buttons.modal.element_text')} {decisionModal.innerText} ); } return null; }; useEffect(() => { if (socket) { socket.on('finished', finishedHandler); socket.on('breakpointHit', breakpointHitHandler); socket.on('decision', decisionHandler); } return () => { socket?.off('finished', finishedHandler); socket?.off('breakpointHit', breakpointHitHandler); socket?.off('decision', decisionHandler); }; }, [socket, finishedHandler, breakpointHitHandler]); const handlePlay = async () => { onPreviewComplete?.(); notify('info', t('interpretation_buttons.messages.run_finished')); // Legacy code for running the interpretation // if (!info.running) { // setInfo({ ...info, running: true }); // // const finished = await interpretCurrentRecording(); // setInfo({ ...info, running: false }); // if (finished) { // } else { // notify('error', t('interpretation_buttons.messages.run_failed')); // } // } }; // pause and stop logic (do not delete - we wil bring this back!) /* const handlePause = async () => { if (info.running) { socket?.emit("pause"); setInfo({ running: false, isPaused: true }); notify('warning', 'Please restart the interpretation after updating the recording'); enableStepping(true); } }; const handleStop = async () => { setInfo({ running: false, isPaused: false }); enableStepping(false); await stopCurrentInterpretation(); }; */ return ( { }} isOpen={decisionModal.open} canBeClosed={false} modalStyle={{ position: 'absolute', top: '50%', left: '50%', transform: 'translate(-50%, -50%)', width: 500, background: 'white', border: '2px solid #000', boxShadow: '24', height: 'fit-content', display: 'block', overflow: 'scroll', padding: '5px 25px 10px 25px', }} >
{handleDescription()}
); }; ================================================ FILE: src/components/run/InterpretationLog.tsx ================================================ import * as React from 'react'; import SwipeableDrawer from '@mui/material/SwipeableDrawer'; import Typography from '@mui/material/Typography'; import { Button, Grid, Box, TextField, IconButton, Tooltip } from '@mui/material'; import { useCallback, useEffect, useRef, useState } from "react"; import { useBrowserDimensionsStore } from "../../context/browserDimensions"; import Table from '@mui/material/Table'; import TableBody from '@mui/material/TableBody'; import TableCell from '@mui/material/TableCell'; import TableContainer from '@mui/material/TableContainer'; import TableHead from '@mui/material/TableHead'; import TableRow from '@mui/material/TableRow'; import Paper from '@mui/material/Paper'; import StorageIcon from '@mui/icons-material/Storage'; import ArrowUpwardIcon from '@mui/icons-material/ArrowUpward'; import CloseIcon from '@mui/icons-material/Close'; import CheckIcon from '@mui/icons-material/Check'; import { SidePanelHeader } from '../recorder/SidePanelHeader'; import { useGlobalInfoStore } from '../../context/globalInfo'; import { useThemeMode } from '../../context/theme-provider'; import { useTranslation } from 'react-i18next'; import { useBrowserSteps } from '../../context/browserSteps'; import { useActionContext } from '../../context/browserActions'; import { useSocketStore } from '../../context/socket'; interface InterpretationLogProps { isOpen: boolean; setIsOpen: (isOpen: boolean) => void; } export const InterpretationLog: React.FC = ({ isOpen, setIsOpen }) => { const { t } = useTranslation(); const [captureListData, setCaptureListData] = useState([]); const [captureTextData, setCaptureTextData] = useState([]); const [screenshotData, setScreenshotData] = useState([]); const [activeTab, setActiveTab] = useState(0); const [activeListTab, setActiveListTab] = useState(0); const [activeScreenshotTab, setActiveScreenshotTab] = useState(0); const [editingField, setEditingField] = useState<{listId: number, fieldKey: string} | null>(null); const [editingValue, setEditingValue] = useState(''); const [editingTextGroupName, setEditingTextGroupName] = useState(false); const [editingTextGroupNameValue, setEditingTextGroupNameValue] = useState('Text Data'); const [editing, setEditing] = useState<{ stepId: number | null; type: 'list' | 'text' | 'screenshot' | null; value: string; }>({ stepId: null, type: null, value: '' }); const logEndRef = useRef(null); const autoFocusedListIds = useRef>(new Set()); const previousDataLengths = useRef>(new Map()); const hasAutoFocusedTextTab = useRef(false); const previousGetText = useRef(false); const autoFocusedScreenshotIndices = useRef>(new Set()); const { browserSteps, updateListTextFieldLabel, removeListTextField, updateListStepName, updateScreenshotStepName, updateBrowserTextStepLabel, deleteBrowserStep, deleteStepsByActionId, emitForStepId } = useBrowserSteps(); const { captureStage, getText } = useActionContext(); const { socket } = useSocketStore(); const { browserWidth, outputPreviewHeight, outputPreviewWidth } = useBrowserDimensionsStore(); const { currentWorkflowActionsState, shouldResetInterpretationLog, currentTextGroupName, setCurrentTextGroupName, notify } = useGlobalInfoStore(); const [showPreviewData, setShowPreviewData] = useState(false); const userClosedDrawer = useRef(false); const lastListDataLength = useRef(0); const lastTextDataLength = useRef(0); const lastScreenshotDataLength = useRef(0); const toggleDrawer = (newOpen: boolean) => (event: React.KeyboardEvent | React.MouseEvent) => { if ( event.type === 'keydown' && ((event as React.KeyboardEvent).key === 'Tab' || (event as React.KeyboardEvent).key === 'Shift') ) { return; } if (!newOpen && isOpen) { userClosedDrawer.current = true; } setIsOpen(newOpen); }; const handleStartEdit = (listId: number, fieldKey: string, currentLabel: string) => { setEditingField({ listId, fieldKey }); setEditingValue(currentLabel); }; const handleSaveEdit = () => { if (editingField && editingValue.trim()) { const listStep = browserSteps.find(step => step.id === editingField.listId); const actionId = listStep?.actionId; updateListTextFieldLabel(editingField.listId, editingField.fieldKey, editingValue.trim()); // Emit updated action to backend after state update completes if (actionId) { setTimeout(() => emitForStepId(actionId), 0); } setEditingField(null); setEditingValue(''); } }; const handleCancelEdit = () => { setEditingField(null); setEditingValue(''); }; const handleDeleteField = (listId: number, fieldKey: string) => { const listStep = browserSteps.find(step => step.id === listId); const actionId = listStep?.actionId; removeListTextField(listId, fieldKey); // Emit updated action to backend after state update completes if (actionId) { setTimeout(() => emitForStepId(actionId), 0); } }; const handleStartEditTextGroupName = () => { setEditingTextGroupName(true); setEditingTextGroupNameValue(currentTextGroupName); }; const handleSaveTextGroupName = () => { const trimmedName = editingTextGroupNameValue.trim(); const finalName = trimmedName || 'Text Data'; setCurrentTextGroupName(finalName); setEditingTextGroupName(false); // Emit after React updates global state setTimeout(() => { const activeTextStep = captureTextData.find(step => step.actionId); if (activeTextStep?.actionId) emitForStepId(activeTextStep.actionId); }, 0); }; const handleDeleteTextStep = (textId: number) => { const textStep = browserSteps.find(step => step.id === textId); const actionId = textStep?.actionId; deleteBrowserStep(textId); // Emit updated action to backend after deletion if (actionId) { // Small delay to ensure state update completes setTimeout(() => emitForStepId(actionId), 0); } }; const handleRemoveListAction = (listId: number, actionId: string | undefined) => { if (!actionId) return; const listIndex = captureListData.findIndex(list => list.id === listId); const listItem = captureListData[listIndex]; const listName = listItem?.name || `List Data ${listIndex + 1}`; const isActiveList = listIndex === activeListTab; deleteStepsByActionId(actionId); if (socket) { socket.emit('removeAction', { actionId }); } if (isActiveList && captureListData.length > 1) { if (listIndex === captureListData.length - 1) { setActiveListTab(listIndex - 1); } } else if (listIndex < activeListTab) { setActiveListTab(activeListTab - 1); } notify('error', `List "${listName}" discarded`); }; const handleRemoveScreenshotAction = (screenshotId: number, actionId: string | undefined) => { if (!actionId) return; const screenshotSteps = browserSteps.filter(step => step.type === 'screenshot' && step.screenshotData); const screenshotIndex = screenshotSteps.findIndex(step => step.id === screenshotId); const screenshotStep = screenshotSteps[screenshotIndex]; const screenshotName = screenshotStep?.name || `Screenshot ${screenshotIndex + 1}`; const isActiveScreenshot = screenshotIndex === activeScreenshotTab; deleteStepsByActionId(actionId); if (socket) { socket.emit('removeAction', { actionId }); } if (isActiveScreenshot && screenshotData.length > 1) { if (screenshotIndex === screenshotData.length - 1) { setActiveScreenshotTab(screenshotIndex - 1); } } else if (screenshotIndex < activeScreenshotTab) { setActiveScreenshotTab(activeScreenshotTab - 1); } notify('error', `Screenshot "${screenshotName}" discarded`); }; const handleRemoveAllTextActions = () => { const uniqueActionIds = new Set(); captureTextData.forEach(textStep => { if (textStep.actionId) { uniqueActionIds.add(textStep.actionId); } }); uniqueActionIds.forEach(actionId => { deleteStepsByActionId(actionId); if (socket) { socket.emit('removeAction', { actionId }); } }); notify('error', `Text data "${currentTextGroupName}" discarded`); }; const checkForDuplicateName = (stepId: number, type: 'list' | 'text' | 'screenshot', newName: string): boolean => { const trimmedName = newName.trim(); if (type === 'list') { const listSteps = browserSteps.filter(step => step.type === 'list' && step.id !== stepId); const duplicate = listSteps.find(step => step.name === trimmedName); if (duplicate) { notify('error', `A list with the name "${trimmedName}" already exists. Please choose a different name.`); return true; } } else if (type === 'screenshot') { const screenshotSteps = browserSteps.filter(step => step.type === 'screenshot' && step.id !== stepId); const duplicate = screenshotSteps.find(step => step.name === trimmedName); if (duplicate) { notify('error', `A screenshot with the name "${trimmedName}" already exists. Please choose a different name.`); return true; } } return false; } const startEdit = (stepId: number, type: 'list' | 'text' | 'screenshot', currentValue: string) => { setEditing({ stepId, type, value: currentValue }); }; const saveEdit = () => { const { stepId, type, value } = editing; if (stepId == null || !type) return; const finalValue = value.trim(); if (!finalValue) { setEditing({ stepId: null, type: null, value: '' }); return; } if (checkForDuplicateName(stepId, type, finalValue)) { return; } if (type === 'list') { updateListStepName(stepId, finalValue); } else if (type === 'text') { updateBrowserTextStepLabel(stepId, finalValue); } else if (type === 'screenshot') { updateScreenshotStepName(stepId, finalValue); } const step = browserSteps.find(s => s.id === stepId); if (step?.actionId) setTimeout(() => emitForStepId(step.actionId!), 0); setEditing({ stepId: null, type: null, value: '' }); }; const cancelEdit = () => { setEditing({ stepId: null, type: null, value: '' }); }; const previousTabsCount = useRef({ lists: 0, texts: 0, screenshots: 0 }); const updateActiveTab = useCallback(() => { const availableTabs = getAvailableTabs(); const hasNewListData = captureListData.length > previousTabsCount.current.lists; const hasNewTextData = captureTextData.length > previousTabsCount.current.texts; const hasNewScreenshotData = screenshotData.length > previousTabsCount.current.screenshots; previousTabsCount.current = { lists: captureListData.length, texts: captureTextData.length, screenshots: screenshotData.length }; if (hasNewListData && availableTabs.findIndex(tab => tab.id === 'captureList') !== -1) { setActiveTab(availableTabs.findIndex(tab => tab.id === 'captureList')); } else if (hasNewTextData && availableTabs.findIndex(tab => tab.id === 'captureText') !== -1) { setActiveTab(availableTabs.findIndex(tab => tab.id === 'captureText')); } else if (hasNewScreenshotData && availableTabs.findIndex(tab => tab.id === 'captureScreenshot') !== -1) { setActiveTab(availableTabs.findIndex(tab => tab.id === 'captureScreenshot')); // Set the active screenshot tab to the latest screenshot setActiveScreenshotTab(screenshotData.length - 1); } }, [captureListData.length, captureTextData.length, screenshotData.length]); useEffect(() => { const textSteps = browserSteps.filter(step => step.type === 'text'); setCaptureTextData(textSteps); if (!getText && previousGetText.current && textSteps.length > 0) { if (!hasAutoFocusedTextTab.current) { hasAutoFocusedTextTab.current = true; setTimeout(() => { handleStartEditTextGroupName(); }, 300); } } previousGetText.current = getText; const listSteps = browserSteps.filter(step => step.type === 'list'); setCaptureListData(listSteps); const screenshotSteps = browserSteps.filter(step => step.type === 'screenshot' ) as Array<{ type: 'screenshot'; id: number; name?: string; fullPage: boolean; actionId?: string; screenshotData?: string }>; const screenshotsWithData = screenshotSteps.filter(step => step.screenshotData); const screenshots = screenshotsWithData.map(step => step.screenshotData!); setScreenshotData(screenshots); if (textSteps.length > 0 || listSteps.length > 0 || screenshots.length > 0) { setShowPreviewData(true); } else { setShowPreviewData(false); } updateActiveTab(); }, [browserSteps, updateActiveTab, getText]); useEffect(() => { if (shouldResetInterpretationLog) { setCaptureListData([]); setCaptureTextData([]); setScreenshotData([]); setActiveTab(0); setShowPreviewData(false); autoFocusedListIds.current.clear(); previousDataLengths.current.clear(); autoFocusedScreenshotIndices.current.clear(); userClosedDrawer.current = false; lastListDataLength.current = 0; lastTextDataLength.current = 0; lastScreenshotDataLength.current = 0; previousTabsCount.current = { lists: 0, texts: 0, screenshots: 0 }; hasAutoFocusedTextTab.current = false; previousGetText.current = false; } }, [shouldResetInterpretationLog]); const getAvailableTabs = useCallback(() => { const tabs = []; if (captureListData.length > 0) { tabs.push({ id: 'captureList', label: 'Lists' }); } if (captureTextData.length > 0) { tabs.push({ id: 'captureText', label: 'Texts' }); } if (screenshotData.length > 0) { tabs.push({ id: 'captureScreenshot', label: 'Screenshots' }); } return tabs; }, [captureListData.length, captureTextData.length, screenshotData.length, showPreviewData]); const availableTabs = getAvailableTabs(); useEffect(() => { if (activeTab >= availableTabs.length && availableTabs.length > 0) { setActiveTab(0); } }, [activeTab, availableTabs.length]); const { hasScrapeListAction, hasScreenshotAction, hasScrapeSchemaAction } = currentWorkflowActionsState; useEffect(() => { let shouldOpenDrawer = false; if (hasScrapeListAction && captureListData.length > 0 && captureListData[0]?.data?.length > 0) { setShowPreviewData(true); if (captureListData.length > lastListDataLength.current) { userClosedDrawer.current = false; shouldOpenDrawer = true; } lastListDataLength.current = captureListData.length; } else if (hasScrapeListAction && captureListData.length === 0) { lastListDataLength.current = 0; } if (hasScrapeSchemaAction && captureTextData.length > 0 && !getText) { setShowPreviewData(true); if (captureTextData.length > lastTextDataLength.current) { userClosedDrawer.current = false; shouldOpenDrawer = true; } lastTextDataLength.current = captureTextData.length; } else if (hasScrapeSchemaAction && captureTextData.length === 0) { lastTextDataLength.current = 0; } if (hasScreenshotAction && screenshotData.length > 0) { setShowPreviewData(true); if (screenshotData.length > lastScreenshotDataLength.current) { userClosedDrawer.current = false; shouldOpenDrawer = true; } lastScreenshotDataLength.current = screenshotData.length; } else if (hasScreenshotAction && screenshotData.length === 0) { lastScreenshotDataLength.current = 0; } const getLatestCaptureType = () => { for (let i = browserSteps.length - 1; i >= 0; i--) { const type = browserSteps[i].type; if (type === "list" || type === "text" || type === "screenshot") { return type; } } return null; }; if (shouldOpenDrawer) { setIsOpen(true); const latestType = getLatestCaptureType(); setTimeout(() => { if (latestType === "text") { const idx = getAvailableTabs().findIndex(t => t.id === "captureText"); if (idx !== -1) setActiveTab(idx); } else if (latestType === "list") { const idx = getAvailableTabs().findIndex(t => t.id === "captureList"); if (idx !== -1) setActiveTab(idx); } else if (latestType === "screenshot") { const screenshotTabIndex = getAvailableTabs().findIndex(tab => tab.id === "captureScreenshot"); if (screenshotTabIndex !== -1) { setActiveTab(screenshotTabIndex); const latestIndex = screenshotData.length - 1; setActiveScreenshotTab(latestIndex); if (!autoFocusedScreenshotIndices.current.has(latestIndex)) { autoFocusedScreenshotIndices.current.add(latestIndex); setTimeout(() => { const screenshotSteps = browserSteps.filter(step => step.type === "screenshot"); const latestScreenshotStep = screenshotSteps[latestIndex]; if (latestScreenshotStep) { const screenshotName = latestScreenshotStep.name || `Screenshot ${latestIndex + 1}`; startEdit(latestScreenshotStep.id, 'screenshot', screenshotName); } }, 300); } } } }, 100); } }, [hasScrapeListAction, hasScrapeSchemaAction, hasScreenshotAction, captureListData, captureTextData, screenshotData, setIsOpen, getText]); useEffect(() => { if (captureListData.length > 0 && isOpen && captureStage === 'initial') { const latestListIndex = captureListData.length - 1; const latestList = captureListData[latestListIndex]; if (latestList && latestList.data && latestList.data.length > 0 && editing.type !== 'list') { const previousLength = previousDataLengths.current.get(latestList.id) || 0; const currentLength = latestList.data.length; if (previousLength === 0 && currentLength > 0) { if (!autoFocusedListIds.current.has(latestList.id)) { autoFocusedListIds.current.add(latestList.id); setActiveListTab(latestListIndex); setTimeout(() => { startEdit(latestList.id, 'list', latestList.name || `List Data ${latestListIndex + 1}`); }, 300); } } previousDataLengths.current.set(latestList.id, currentLength); } } }, [captureListData.length, isOpen, captureStage]); useEffect(() => { if (screenshotData.length > 0 && isOpen) { const latestScreenshotIndex = screenshotData.length - 1; setActiveScreenshotTab(latestScreenshotIndex); } }, [screenshotData.length, isOpen]); const { darkMode } = useThemeMode(); const shouldShowTabs = availableTabs.length > 1; const getSingleContentType = () => { if (availableTabs.length === 1) { return availableTabs[0].id; } return null; }; const singleContentType = getSingleContentType(); return (
{t('interpretation_log.titles.output_preview')} {!(hasScrapeListAction || hasScrapeSchemaAction || hasScreenshotAction) && !showPreviewData && availableTabs.length === 0 && ( {t('interpretation_log.messages.no_selection')} )} {showPreviewData && availableTabs.length > 0 && ( <> {shouldShowTabs && ( {availableTabs.map((tab, index) => ( setActiveTab(index)} sx={{ px: 4, py: 2, cursor: 'pointer', // borderBottom: activeTab === index ? '2px solid' : 'none', borderColor: activeTab === index ? (darkMode ? '#ff00c3' : '#ff00c3') : 'transparent', backgroundColor: activeTab === index ? (darkMode ? '#121111ff' : '#e9ecef') : 'transparent', color: darkMode ? 'white' : 'black', fontWeight: activeTab === index ? 500 : 400, textAlign: 'center', position: 'relative', '&:hover': { backgroundColor: activeTab !== index ? (darkMode ? '#121111ff' : '#e2e6ea') : undefined } }} > {tab.label} ))} )} {(activeTab === availableTabs.findIndex(tab => tab.id === 'captureList') || singleContentType === 'captureList') && captureListData.length > 0 && ( {/* List Tabs */} {captureListData.map((listItem, index) => { const isEditing = editing.stepId === listItem.id && editing.type === 'list'; const isActive = activeListTab === index; return ( { if (!isEditing) { setActiveListTab(index); } }} onDoubleClick={() => { startEdit(listItem.id, 'list', listItem.name || `List Data ${index + 1}`) }} sx={{ px: 3, py: 1.25, cursor: isEditing ? 'text' : 'pointer', borderRadius: '8px 8px 0 0', backgroundColor: darkMode ? '#131313ff' : '#ffffff', color: isActive ? darkMode ? '#ffffff' : '#000000' : darkMode ? '#b0b0b0' : '#555555', fontWeight: isActive ? 600 : 400, fontSize: '0.875rem', border: '1px solid', borderColor: darkMode ? '#2a2a2a' : '#d0d0d0', borderBottom: isActive ? darkMode ? '2px solid #1c1c1c' : '2px solid #ffffff' : '2px solid transparent', transition: 'all 0.2s ease', position: 'relative', '&:hover': { backgroundColor: isActive ? undefined : darkMode ? '#161616' : '#e9ecef', }, '&:hover .delete-icon': { opacity: 1 }, }} > {isEditing ? ( setEditing({ ...editing, value: e.target.value })} onBlur={saveEdit} onKeyDown={(e) => { if (e.key === 'Enter') saveEdit(); if (e.key === 'Escape') cancelEdit(); }} autoFocus size="small" variant="standard" sx={{ minWidth: '120px', '& .MuiInputBase-input': { color: darkMode ? '#fff' : '#000', fontSize: 'inherit', fontWeight: 'inherit', padding: 0, }, '& .MuiInput-underline:before': { display: 'none' }, '& .MuiInput-underline:after': { display: 'none' }, '& .MuiInput-underline:hover:before': { display: 'none' }, }} /> ) : ( <> {listItem.name || `List Data ${index + 1}`} { e.stopPropagation(); handleRemoveListAction(listItem.id, listItem.actionId); }} sx={{ position: 'absolute', right: 4, top: '50%', transform: 'translateY(-50%)', opacity: 0, transition: 'opacity 0.2s', color: darkMode ? '#999' : '#666', padding: '2px', '&:hover': { color: '#f44336', backgroundColor: darkMode ? 'rgba(244, 67, 54, 0.1)' : 'rgba(244, 67, 54, 0.05)' } }} > )} ); })} {/* Table Below Tabs */} {Object.entries(captureListData[activeListTab]?.fields || {}).map(([fieldKey, field]: [string, any]) => { const isEditing = editingField?.listId === captureListData[activeListTab]?.id && editingField?.fieldKey === fieldKey; const isFirstField = Object.keys(captureListData[activeListTab]?.fields || {}).indexOf(fieldKey) === 0; return ( {isEditing ? ( setEditingValue(e.target.value)} onBlur={handleSaveEdit} onKeyDown={(e) => { if (e.key === 'Enter') handleSaveEdit(); if (e.key === 'Escape') handleCancelEdit(); }} autoFocus size="small" sx={{ flex: 1, minWidth: '150px', '& .MuiInputBase-root': { backgroundColor: darkMode ? '#2a2929' : '#fff' } }} /> ) : ( handleStartEdit(captureListData[activeListTab]?.id, fieldKey, field.label)} > {field.label} handleDeleteField(captureListData[activeListTab]?.id, fieldKey)} sx={{ position: 'absolute', right: 4, top: '50%', transform: 'translateY(-50%)', opacity: 0, transition: 'opacity 0.2s', color: darkMode ? '#999' : '#666', padding: '4px', '&:hover': { color: '#f44336', backgroundColor: darkMode ? 'rgba(244, 67, 54, 0.1)' : 'rgba(244, 67, 54, 0.05)' } }} > )} ); })} {(captureListData[activeListTab]?.data || []) .slice(0, Math.min(captureListData[activeListTab]?.limit || 10, 5)) .map((row: any, rowIndex: any) => ( {Object.values(captureListData[activeListTab]?.fields || {}).map((field: any, colIndex) => ( {typeof row[field.label] === 'object' ? JSON.stringify(row[field.label]) : String(row[field.label] || '')} ))} )) }
)} {(activeTab === availableTabs.findIndex(tab => tab.id === 'captureScreenshot') || singleContentType === 'captureScreenshot') && screenshotData.length > 0 && ( {/* Screenshot Tabs */} {(() => { const screenshotSteps = browserSteps.filter(step => step.type === 'screenshot' && step.screenshotData) as Array<{ id: number; name?: string; type: 'screenshot'; fullPage: boolean; actionId?: string; screenshotData?: string }>; return screenshotData.map((screenshot, index) => { const screenshotStep = screenshotSteps[index]; if (!screenshotStep) return null; const isActive = activeScreenshotTab === index; const isEditing = editing.stepId === screenshotStep.id && editing.type === 'screenshot'; const screenshotName = screenshotStep.name || `Screenshot ${index + 1}`; return ( { if (!isEditing) { setActiveScreenshotTab(index); } }} onDoubleClick={() => startEdit(screenshotStep.id, 'screenshot', screenshotName)} sx={{ px: 3, py: 1.25, cursor: isEditing ? 'text' : 'pointer', borderRadius: '8px 8px 0 0', backgroundColor: darkMode ? '#131313ff' : '#ffffff', color: isActive ? darkMode ? '#ffffff' : '#000000' : darkMode ? '#b0b0b0' : '#555555', fontWeight: isActive ? 600 : 400, fontSize: '0.875rem', border: '1px solid', borderColor: darkMode ? '#2a2a2a' : '#d0d0d0', borderBottom: isActive ? darkMode ? '2px solid #1c1c1c' : '2px solid #ffffff' : '2px solid transparent', transition: 'all 0.2s ease', position: 'relative', '&:hover': { backgroundColor: isActive ? undefined : darkMode ? '#161616' : '#e9ecef', }, '&:hover .delete-icon': { opacity: 1 }, }} > {isEditing ? ( setEditing({ ...editing, value: e.target.value })} onBlur={saveEdit} onKeyDown={(e) => { if (e.key === 'Enter') saveEdit(); if (e.key === 'Escape') cancelEdit(); }} autoFocus size="small" variant="standard" sx={{ minWidth: '120px', '& .MuiInputBase-input': { color: darkMode ? '#fff' : '#000', fontSize: 'inherit', fontWeight: 'inherit', padding: 0, }, '& .MuiInput-underline:before': { display: 'none' }, '& .MuiInput-underline:after': { display: 'none' }, '& .MuiInput-underline:hover:before': { display: 'none' }, }} /> ) : ( <> {screenshotName} { e.stopPropagation(); handleRemoveScreenshotAction(screenshotStep.id, screenshotStep.actionId); }} sx={{ position: 'absolute', right: 4, top: '50%', transform: 'translateY(-50%)', opacity: 0, transition: 'opacity 0.2s', color: darkMode ? '#999' : '#666', padding: '2px', '&:hover': { color: '#f44336', backgroundColor: darkMode ? 'rgba(244, 67, 54, 0.1)' : 'rgba(244, 67, 54, 0.05)' } }} > )} ); }); })()} {/* Screenshot Image */} {`Screenshot )} {(activeTab === availableTabs.findIndex(tab => tab.id === 'captureText') || singleContentType === 'captureText') && captureTextData.length > 0 && ( {editingTextGroupName ? ( setEditingTextGroupNameValue(e.target.value)} onBlur={handleSaveTextGroupName} onKeyDown={(e) => { if (e.key === 'Enter') handleSaveTextGroupName(); if (e.key === 'Escape') { setEditingTextGroupName(false); setEditingTextGroupNameValue(currentTextGroupName); } }} autoFocus size="small" variant="standard" sx={{ minWidth: '120px', '& .MuiInputBase-input': { color: darkMode ? '#fff' : '#000', fontSize: 'inherit', fontWeight: 'inherit', padding: 0, }, '& .MuiInput-underline:before': { display: 'none' }, '& .MuiInput-underline:after': { display: 'none' }, '& .MuiInput-underline:hover:before': { display: 'none' }, }} /> ) : ( <> {currentTextGroupName} { e.stopPropagation(); handleRemoveAllTextActions(); }} sx={{ position: 'absolute', right: 4, top: '50%', transform: 'translateY(-50%)', opacity: 0, transition: 'opacity 0.2s', color: darkMode ? '#999' : '#666', padding: '2px', '&:hover': { color: '#f44336', backgroundColor: darkMode ? 'rgba(244, 67, 54, 0.1)' : 'rgba(244, 67, 54, 0.05)' } }} > )} Label Value {captureTextData.map((textStep: any, index) => { const isEditing = editing.stepId === textStep.id && editing.type === 'text'; return ( {isEditing ? ( setEditing({ ...editing, value: e.target.value })} onBlur={saveEdit} onKeyDown={(e) => { if (e.key === 'Enter') saveEdit(); if (e.key === 'Escape') cancelEdit(); }} autoFocus size="small" sx={{ flex: 1, minWidth: '150px', '& .MuiInputBase-root': { backgroundColor: darkMode ? '#2a2929' : '#fff' } }} /> ) : ( startEdit(textStep.id, 'text', textStep.label)} > {textStep.label} handleDeleteTextStep(textStep.id)} sx={{ position: 'absolute', right: 4, top: '50%', transform: 'translateY(-50%)', opacity: 0, transition: 'opacity 0.2s', color: darkMode ? '#999' : '#666', padding: '4px', '&:hover': { color: '#f44336', backgroundColor: darkMode ? 'rgba(244, 67, 54, 0.1)' : 'rgba(244, 67, 54, 0.05)' } }} > )} {typeof textStep.data === 'object' ? JSON.stringify(textStep.data) : String(textStep.data || '')} ); })}
)}
)}
); }; ================================================ FILE: src/components/run/RunContent.tsx ================================================ import { Box, Typography, Paper, Button, CircularProgress, Accordion, AccordionSummary, AccordionDetails, Link } from "@mui/material"; import * as React from "react"; import { Data } from "./RunsTable"; import { TabPanel, TabContext } from "@mui/lab"; import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; import { useEffect, useState } from "react"; import JSZip from "jszip"; import Table from '@mui/material/Table'; import TableBody from '@mui/material/TableBody'; import TableCell from '@mui/material/TableCell'; import TableContainer from '@mui/material/TableContainer'; import TableHead from '@mui/material/TableHead'; import TableRow from '@mui/material/TableRow'; import { useTranslation } from "react-i18next"; import { useThemeMode } from "../../context/theme-provider"; interface RunContentProps { row: Data, currentLog: string, interpretationInProgress: boolean, logEndRef: React.RefObject, abortRunHandler: () => void, workflowProgress: { current: number; total: number; percentage: number; } | null, } export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRef, abortRunHandler, workflowProgress }: RunContentProps) => { const { t } = useTranslation(); const { darkMode } = useThemeMode(); const [tab, setTab] = React.useState('output'); const [markdownContent, setMarkdownContent] = useState(''); const [htmlContent, setHtmlContent] = useState(''); const [schemaData, setSchemaData] = useState([]); const [schemaColumns, setSchemaColumns] = useState([]); const [schemaKeys, setSchemaKeys] = useState([]); const [schemaDataByKey, setSchemaDataByKey] = useState>({}); const [schemaColumnsByKey, setSchemaColumnsByKey] = useState>({}); const [isSchemaTabular, setIsSchemaTabular] = useState(false); const [listData, setListData] = useState([]); const [listColumns, setListColumns] = useState([]); const [listKeys, setListKeys] = useState([]); const [currentListIndex, setCurrentListIndex] = useState(0); const [crawlData, setCrawlData] = useState([]); const [crawlColumns, setCrawlColumns] = useState([]); const [crawlKeys, setCrawlKeys] = useState([]); const [currentCrawlIndex, setCurrentCrawlIndex] = useState(0); const [searchData, setSearchData] = useState([]); const [searchMode, setSearchMode] = useState<'discover' | 'scrape'>('discover'); const [currentSearchIndex, setCurrentSearchIndex] = useState(0); const [screenshotKeys, setScreenshotKeys] = useState([]); const [screenshotKeyMap, setScreenshotKeyMap] = useState>({}); const [currentScreenshotIndex, setCurrentScreenshotIndex] = useState(0); const [currentSearchScreenshotTab, setCurrentSearchScreenshotTab] = useState(0); const [currentSchemaIndex, setCurrentSchemaIndex] = useState(0); const [legacyData, setLegacyData] = useState([]); const [legacyColumns, setLegacyColumns] = useState([]); const [isLegacyData, setIsLegacyData] = useState(false); useEffect(() => { setTab(tab); }, [interpretationInProgress]); const getProgressMessage = (percentage: number): string => { if (percentage === 0) return 'Initializing workflow...'; if (percentage < 25) return 'Starting execution...'; if (percentage < 50) return 'Processing actions...'; if (percentage < 75) return 'Extracting data...'; if (percentage < 100) return 'Finalizing results...'; return 'Completing...'; }; useEffect(() => { setMarkdownContent(''); setHtmlContent(''); if (row.serializableOutput?.markdown && Array.isArray(row.serializableOutput.markdown)) { const markdownData = row.serializableOutput.markdown[0]; if (markdownData?.content) { setMarkdownContent(markdownData.content); } } if (row.serializableOutput?.html && Array.isArray(row.serializableOutput.html)) { const htmlData = row.serializableOutput.html[0]; if (htmlData?.content) { setHtmlContent(htmlData.content); } } }, [row.serializableOutput]); useEffect(() => { if (row.status === 'running' || row.status === 'queued' || row.status === 'scheduled') { setSchemaData([]); setSchemaColumns([]); setSchemaKeys([]); setSchemaDataByKey({}); setSchemaColumnsByKey({}); setListData([]); setListColumns([]); setListKeys([]); setCrawlData([]); setCrawlColumns([]); setCrawlKeys([]); setSearchData([]); setLegacyData([]); setLegacyColumns([]); setIsLegacyData(false); setIsSchemaTabular(false); return; } if (!row.serializableOutput) return; const hasLegacySchema = row.serializableOutput.scrapeSchema && Array.isArray(row.serializableOutput.scrapeSchema); const hasLegacyList = row.serializableOutput.scrapeList && Array.isArray(row.serializableOutput.scrapeList); const hasOldFormat = !row.serializableOutput.scrapeSchema && !row.serializableOutput.scrapeList && !row.serializableOutput.crawl && !row.serializableOutput.search && Object.keys(row.serializableOutput).length > 0; if (hasLegacySchema || hasLegacyList || hasOldFormat) { processLegacyData(row.serializableOutput); setIsLegacyData(false); return; } setIsLegacyData(false); if (row.serializableOutput.scrapeSchema && Object.keys(row.serializableOutput.scrapeSchema).length > 0) { processSchemaData(row.serializableOutput.scrapeSchema); } if (row.serializableOutput.scrapeList) { processScrapeList(row.serializableOutput.scrapeList); } if (row.serializableOutput.crawl) { processCrawl(row.serializableOutput.crawl); } if (row.serializableOutput.search) { processSearch(row.serializableOutput.search); } }, [row.serializableOutput, row.status]); useEffect(() => { if (row.status === 'running' || row.status === 'queued' || row.status === 'scheduled') { setScreenshotKeys([]); setScreenshotKeyMap({}); setCurrentScreenshotIndex(0); return; } if (row.binaryOutput && Object.keys(row.binaryOutput).length > 0) { const rawKeys = Object.keys(row.binaryOutput); const isLegacyPattern = rawKeys.every(key => /^item-\d+-\d+$/.test(key)); let normalizedScreenshotKeys: string[]; if (isLegacyPattern) { normalizedScreenshotKeys = rawKeys.map((_, index) => `Screenshot ${index + 1}`); } else { normalizedScreenshotKeys = rawKeys.map((key, index) => { if (key === 'screenshot-visible') { return 'Screenshot (Visible)'; } else if (key === 'screenshot-fullpage') { return 'Screenshot (Full Page)'; } else if (!key || key.toLowerCase().includes("screenshot")) { return `Screenshot ${index + 1}`; } return key; }); } const keyMap: Record = {}; normalizedScreenshotKeys.forEach((displayName, index) => { keyMap[displayName] = rawKeys[index]; }); setScreenshotKeys(normalizedScreenshotKeys); setScreenshotKeyMap(keyMap); setCurrentScreenshotIndex(0); } else { setScreenshotKeys([]); setScreenshotKeyMap({}); setCurrentScreenshotIndex(0); } }, [row.binaryOutput, row.status]); const processLegacyData = (legacyOutput: Record) => { const convertedSchema: Record = {}; const convertedList: Record = {}; const keys = Object.keys(legacyOutput); keys.forEach((key) => { const data = legacyOutput[key]; if (Array.isArray(data)) { const firstNonNullElement = data.find(item => item !== null && item !== undefined); const isNestedArray = firstNonNullElement && Array.isArray(firstNonNullElement); if (isNestedArray) { data.forEach((subArray, index) => { if (subArray !== null && subArray !== undefined && Array.isArray(subArray) && subArray.length > 0) { const filteredData = subArray.filter(row => row && typeof row === 'object' && Object.values(row).some(value => value !== undefined && value !== "") ); if (filteredData.length > 0) { const autoName = `List ${Object.keys(convertedList).length + 1}`; convertedList[autoName] = filteredData; } } }); } else { const filteredData = data.filter(row => row && typeof row === 'object' && !Array.isArray(row) && Object.values(row).some(value => value !== undefined && value !== "") ); if (filteredData.length > 0) { const schemaCount = Object.keys(convertedSchema).length; const autoName = `Text ${schemaCount + 1}`; convertedSchema[autoName] = filteredData; } } } }); if (Object.keys(convertedSchema).length === 1) { const singleKey = Object.keys(convertedSchema)[0]; const singleData = convertedSchema[singleKey]; delete convertedSchema[singleKey]; convertedSchema["Texts"] = singleData; } if (Object.keys(convertedSchema).length > 0) { processSchemaData(convertedSchema); } if (Object.keys(convertedList).length > 0) { processScrapeList(convertedList); } }; const processSchemaData = (schemaOutput: any) => { const keys = Object.keys(schemaOutput); const normalizedKeys = keys.map((key, index) => { if (!key || key.toLowerCase().includes("scrapeschema")) { return keys.length === 1 ? "Texts" : `Text ${index + 1}`; } return key; }); setSchemaKeys(normalizedKeys); const dataByKey: Record = {}; const columnsByKey: Record = {}; if (Array.isArray(schemaOutput)) { const filteredData = schemaOutput.filter(row => row && typeof row === 'object' && Object.values(row).some(value => value !== undefined && value !== "") ); if (filteredData.length > 0) { const allColumns = new Set(); filteredData.forEach(item => { Object.keys(item).forEach(key => allColumns.add(key)); }); setSchemaData(filteredData); setSchemaColumns(Array.from(allColumns)); setIsSchemaTabular(filteredData.length > 1); return; } } let allData: any[] = []; let hasMultipleEntries = false; keys.forEach(key => { const data = schemaOutput[key]; if (Array.isArray(data)) { const filteredData = data.filter(row => row && typeof row === 'object' && Object.values(row).some(value => value !== undefined && value !== "") ); dataByKey[key] = filteredData; const columnsForKey = new Set(); filteredData.forEach(item => { Object.keys(item).forEach(col => columnsForKey.add(col)); }); columnsByKey[key] = Array.from(columnsForKey); allData = [...allData, ...filteredData]; if (filteredData.length > 1) hasMultipleEntries = true; } }); const remappedDataByKey: Record = {}; const remappedColumnsByKey: Record = {}; normalizedKeys.forEach((newKey, idx) => { const oldKey = keys[idx]; remappedDataByKey[newKey] = dataByKey[oldKey]; remappedColumnsByKey[newKey] = columnsByKey[oldKey]; }); setSchemaDataByKey(remappedDataByKey); setSchemaColumnsByKey(remappedColumnsByKey); if (allData.length > 0) { const allColumns = new Set(); allData.forEach(item => { Object.keys(item).forEach(key => allColumns.add(key)); }); setSchemaData(allData); setSchemaColumns(Array.from(allColumns)); setIsSchemaTabular(hasMultipleEntries || allData.length > 1); } }; const processScrapeList = (scrapeListData: any) => { const tablesList: any[][] = []; const columnsList: string[][] = []; const keys: string[] = []; if (typeof scrapeListData === 'object') { Object.keys(scrapeListData).forEach(key => { const tableData = scrapeListData[key]; if (Array.isArray(tableData) && tableData.length > 0) { const filteredData = tableData.filter(row => row && typeof row === 'object' && Object.values(row).some(value => value !== undefined && value !== "") ); if (filteredData.length > 0) { tablesList.push(filteredData); keys.push(key); const tableColumns = new Set(); filteredData.forEach(item => { Object.keys(item).forEach(key => tableColumns.add(key)); }); columnsList.push(Array.from(tableColumns)); } } }); } setListData(tablesList); setListColumns(columnsList); const normalizedListKeys = keys.map((key, index) => { if (!key || key.toLowerCase().includes("scrapelist")) { return `List ${index + 1}`; } return key; }); setListKeys(normalizedListKeys); setCurrentListIndex(0); }; const processCrawl = (crawlDataInput: any) => { const tablesList: any[][] = []; const columnsList: string[][] = []; const keys: string[] = []; if (typeof crawlDataInput === 'object') { Object.keys(crawlDataInput).forEach(key => { const tableData = crawlDataInput[key]; if (Array.isArray(tableData) && tableData.length > 0) { const filteredData = tableData.filter(row => row && typeof row === 'object' && Object.values(row).some(value => value !== undefined && value !== "") ); if (filteredData.length > 0) { tablesList.push(filteredData); keys.push(key); const tableColumns = new Set(); filteredData.forEach(item => { Object.keys(item).forEach(key => tableColumns.add(key)); }); columnsList.push(Array.from(tableColumns)); } } }); } setCrawlData(tablesList); setCrawlColumns(columnsList); const normalizedCrawlKeys = keys.map((key, index) => { if (!key || key.toLowerCase().includes("crawl")) { return `Crawl ${index + 1}`; } return key; }); setCrawlKeys(normalizedCrawlKeys); setCurrentCrawlIndex(0); }; const processSearch = (searchDataInput: any) => { if (typeof searchDataInput === 'object') { const keys = Object.keys(searchDataInput); if (keys.length > 0) { const searchKey = keys[0]; const searchInfo = searchDataInput[searchKey]; if (searchInfo && searchInfo.results && Array.isArray(searchInfo.results)) { const mode = searchInfo.mode || 'discover'; setSearchMode(mode); if (mode === 'scrape') { setSearchData(searchInfo.results); } else { const normalizedResults = searchInfo.results.map((result: any, index: number) => ({ title: result.title || '-', url: result.url || '-', description: result.description || '-', position: result.position || index + 1, })); setSearchData(normalizedResults); } setCurrentSearchIndex(0); } } } }; const convertToCSV = (data: any[], columns: string[], isSchemaData: boolean = false, isTabular: boolean = false): string => { if (isSchemaData && !isTabular && data.length === 1) { const header = 'Label,Value'; const rows = columns.map(column => `"${column}","${data[0][column] || ""}"` ); return [header, ...rows].join('\n'); } else { const header = columns.map(col => `"${col}"`).join(','); const rows = data.map(row => columns.map(col => { const value = row[col] || ""; const escapedValue = String(value).replace(/"/g, '""'); return `"${escapedValue}"`; }).join(',') ); return [header, ...rows].join('\n'); } }; const downloadCSV = (data: any[], columns: string[], filename: string, isSchemaData: boolean = false, isTabular: boolean = false) => { const csvContent = convertToCSV(data, columns, isSchemaData, isTabular); const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' }); const url = URL.createObjectURL(blob); const link = document.createElement("a"); link.href = url; link.setAttribute("download", filename); document.body.appendChild(link); link.click(); document.body.removeChild(link); setTimeout(() => { URL.revokeObjectURL(url); }, 100); }; const downloadJSON = (data: any[], filename: string) => { const jsonContent = JSON.stringify(data, null, 2); const blob = new Blob([jsonContent], { type: 'application/json;charset=utf-8;' }); const url = URL.createObjectURL(blob); const link = document.createElement("a"); link.href = url; link.setAttribute("download", filename); document.body.appendChild(link); link.click(); document.body.removeChild(link); setTimeout(() => { URL.revokeObjectURL(url); }, 100); }; const downloadMarkdown = (content: string, filename: string) => { const blob = new Blob([content], { type: 'text/markdown;charset=utf-8;' }); const url = URL.createObjectURL(blob); const link = document.createElement("a"); link.href = url; link.setAttribute("download", filename); document.body.appendChild(link); link.click(); document.body.removeChild(link); setTimeout(() => { URL.revokeObjectURL(url); }, 100); }; const downloadText = (content: string, filename: string) => { const blob = new Blob([content], { type: 'text/plain;charset=utf-8;' }); const url = URL.createObjectURL(blob); const link = document.createElement("a"); link.href = url; link.setAttribute("download", filename); document.body.appendChild(link); link.click(); document.body.removeChild(link); setTimeout(() => { URL.revokeObjectURL(url); }, 100); }; const downloadHTML = (content: string, filename: string) => { const blob = new Blob([content], { type: 'text/html;charset=utf-8;' }); const url = URL.createObjectURL(blob); const link = document.createElement("a"); link.href = url; link.setAttribute("download", filename); document.body.appendChild(link); link.click(); document.body.removeChild(link); setTimeout(() => { URL.revokeObjectURL(url); }, 100); }; const downloadAllCrawlsAsZip = async (crawlDataArray: any[], zipFilename: string) => { const zip = new JSZip(); for (let index = 0; index < crawlDataArray.length; index++) { const item = crawlDataArray[index]; const url = item?.metadata?.url || item?.url || ''; const folderName = url ? url.replace(/^https?:\/\//, '').replace(/\//g, '_').replace(/[^a-zA-Z0-9_.-]/g, '_') : `page_${index + 1}`; const pageFolder = zip.folder(folderName); if (!pageFolder) continue; pageFolder.file('metadata.json', JSON.stringify(item, null, 2)); if (item.text) { const textContent = typeof item.text === 'object' ? JSON.stringify(item.text, null, 2) : String(item.text); pageFolder.file('content.txt', textContent); } if (item.html) { const htmlContent = typeof item.html === 'object' ? JSON.stringify(item.html, null, 2) : String(item.html); pageFolder.file('content.html', htmlContent); } if (item.markdown) { const mdContent = typeof item.markdown === 'object' ? JSON.stringify(item.markdown, null, 2) : String(item.markdown); pageFolder.file('content.md', mdContent); } if (item.links && Array.isArray(item.links)) { const uniqueLinks = Array.from(new Set(item.links)); pageFolder.file('links.txt', uniqueLinks.join('\n')); } const screenshots = [ { id: item.screenshotVisible, name: 'screenshot_visible.png' }, { id: item.screenshotFullpage, name: 'screenshot_full_page.png' } ]; for (const screenshot of screenshots) { if (screenshot.id && row.binaryOutput && row.binaryOutput[screenshot.id]) { const binaryData = row.binaryOutput[screenshot.id].data; if (binaryData && !binaryData.startsWith('http')) { const base64Data = binaryData.replace(/^data:image\/\w+;base64,/, ""); pageFolder.file(screenshot.name, base64Data, { base64: true }); } } } } const blob = await zip.generateAsync({ type: 'blob' }); const url = URL.createObjectURL(blob); const link = document.createElement("a"); link.href = url; link.setAttribute("download", zipFilename); document.body.appendChild(link); link.click(); document.body.removeChild(link); setTimeout(() => { URL.revokeObjectURL(url); }, 100); }; const renderDataTable = ( data: any[], columns: string[], title: string, csvFilename: string, jsonFilename: string, isSchemaData: boolean = false ) => { if (data.length === 0) return null; const shouldShowAsKeyValue = isSchemaData && !isSchemaTabular && data.length === 1; if (!title || title.trim() === '') { return ( <> {shouldShowAsKeyValue ? ( <> theme.palette.mode === 'dark' ? '#11111' : '#f8f9fa' }} > Label theme.palette.mode === 'dark' ? '#11111' : '#f8f9fa' }} > Value ) : ( columns.map((column) => ( theme.palette.mode === 'dark' ? '#11111' : '#f8f9fa' }} > {column} )) )} {shouldShowAsKeyValue ? ( columns.map((column) => ( {column} {data[0][column] === undefined || data[0][column] === "" ? "-" : (typeof data[0][column] === 'object' ? JSON.stringify(data[0][column]) : String(data[0][column]))} )) ) : ( data.map((row, index) => ( {columns.map((column) => ( {row[column] === undefined || row[column] === "" ? "-" : (typeof row[column] === 'object' ? JSON.stringify(row[column]) : String(row[column]))} ))} )) )}
); } return ( } aria-controls={`${title.toLowerCase()}-content`} id={`${title.toLowerCase()}-header`} > {title} {shouldShowAsKeyValue ? ( <> theme.palette.mode === 'dark' ? '#11111' : '#f8f9fa' }} > Label theme.palette.mode === 'dark' ? '#11111' : '#f8f9fa' }} > Value ) : ( columns.map((column) => ( theme.palette.mode === 'dark' ? '#11111' : '#f8f9fa' }} > {column} )) )} {shouldShowAsKeyValue ? ( columns.map((column) => ( {column} {data[0][column] === undefined || data[0][column] === "" ? "-" : (typeof data[0][column] === 'object' ? JSON.stringify(data[0][column]) : String(data[0][column]))} )) ) : ( data.map((row, index) => ( {columns.map((column) => ( {row[column] === undefined || row[column] === "" ? "-" : (typeof row[column] === 'object' ? JSON.stringify(row[column]) : String(row[column]))} ))} )) )}
); }; const hasData = schemaData.length > 0 || listData.length > 0 || crawlData.length > 0 || searchData.length > 0 || legacyData.length > 0; const hasScreenshots = row.binaryOutput && Object.keys(row.binaryOutput).length > 0; const hasMarkdown = markdownContent.length > 0; const hasHTML = htmlContent.length > 0; return ( {hasMarkdown || hasHTML ? ( <> {hasMarkdown && ( }> Markdown {markdownContent} )} {hasHTML && ( }> HTML {htmlContent} )} {hasScreenshots && ( }> {t('run_content.captured_screenshot.title', 'Captured Screenshots')} {screenshotKeys.length > 1 && ( {screenshotKeys.map((key, idx) => ( setCurrentScreenshotIndex(idx)} sx={{ px: 3, py: 1, cursor: 'pointer', backgroundColor: currentScreenshotIndex === idx ? (theme) => theme.palette.mode === 'dark' ? '#121111ff' : '#e9ecef' : 'transparent', borderBottom: currentScreenshotIndex === idx ? '3px solid #FF00C3' : 'none', color: (theme) => theme.palette.mode === 'dark' ? '#fff' : '#000', }} > {key} ))} )} {screenshotKeys.length > 0 && ( {`Screenshot )} )} ) : ( <> {row.status === 'running' || row.status === 'queued' ? ( <> {workflowProgress ? ( <> {getProgressMessage(workflowProgress.percentage)} ) : ( <> {t('run_content.loading')} )} ) : (!hasData && !hasScreenshots ? {t('run_content.empty_output')} : null)} {hasData && ( {isLegacyData && ( renderDataTable( legacyData, legacyColumns, t('run_content.captured_data.title'), 'data.csv', 'data.json' ) )} {!isLegacyData && ( <> {schemaData.length > 0 && ( }> {t('run_content.captured_data.schema_title', 'Captured Texts')} {schemaKeys.length > 0 && ( {schemaKeys.map((key, idx) => ( setCurrentSchemaIndex(idx)} sx={{ px: 3, py: 1, cursor: 'pointer', backgroundColor: currentSchemaIndex === idx ? (theme) => theme.palette.mode === 'dark' ? '#121111ff' : '#e9ecef' : 'transparent', borderBottom: currentSchemaIndex === idx ? '3px solid #FF00C3' : 'none', color: (theme) => theme.palette.mode === 'dark' ? '#fff' : '#000', }} > {key} ))} )} {renderDataTable( schemaDataByKey[schemaKeys[currentSchemaIndex]] || schemaData, schemaColumnsByKey[schemaKeys[currentSchemaIndex]] || schemaColumns, '', `${schemaKeys[currentSchemaIndex] || 'schema_data'}.csv`, `${schemaKeys[currentSchemaIndex] || 'schema_data'}.json`, true )} )} {listData.length > 0 && ( }> {t('run_content.captured_data.list_title', 'Captured Lists')} {listKeys.map((key, idx) => ( setCurrentListIndex(idx)} sx={{ px: 3, py: 1, cursor: 'pointer', backgroundColor: currentListIndex === idx ? (theme) => theme.palette.mode === 'dark' ? '#121111ff' : '#e9ecef' : 'transparent', borderBottom: currentListIndex === idx ? '3px solid #FF00C3' : 'none', color: (theme) => theme.palette.mode === 'dark' ? '#fff' : '#000', }} > {key} ))} {(listColumns[currentListIndex] || []).map((column) => ( theme.palette.mode === 'dark' ? '#11111' : '#f8f9fa' }} > {column} ))} {(listData[currentListIndex] || []).map((rowItem, idx) => ( {(listColumns[currentListIndex] || []).map((column) => ( {rowItem[column] === undefined || rowItem[column] === '' ? '-' : typeof rowItem[column] === 'object' ? JSON.stringify(rowItem[column]) : String(rowItem[column])} ))} ))}
)} )} {crawlData.length > 0 && crawlData[0] && crawlData[0].length > 0 && ( }> Crawl Results {crawlData[0].map((item: any, idx: number) => { const url = item?.metadata?.url || item?.url || `URL ${idx + 1}`; return ( setCurrentCrawlIndex(idx)} sx={{ px: 2, py: 1, cursor: 'pointer', backgroundColor: currentCrawlIndex === idx ? darkMode ? '#121111ff' : '#e9ecef' : 'transparent', borderBottom: currentCrawlIndex === idx ? '3px solid #FF00C3' : 'none', color: darkMode ? '#fff' : '#000', whiteSpace: 'nowrap', fontSize: '0.875rem', flexShrink: 0, }} title={url} > Link {idx + 1} ); })} {crawlData[0][currentCrawlIndex] && ( <> }> Metadata {crawlData[0][currentCrawlIndex].metadata && Object.entries(crawlData[0][currentCrawlIndex].metadata).map(([key, value]: [string, any]) => ( {key} {value === undefined || value === '' ? '-' : typeof value === 'object' ? JSON.stringify(value) : String(value)} )) }
{crawlData[0][currentCrawlIndex].text && ( }> Text Content {typeof crawlData[0][currentCrawlIndex].text === 'object' ? JSON.stringify(crawlData[0][currentCrawlIndex].text, null, 2) : crawlData[0][currentCrawlIndex].text} )} {crawlData[0][currentCrawlIndex].html && ( }> HTML {typeof crawlData[0][currentCrawlIndex].html === 'object' ? JSON.stringify(crawlData[0][currentCrawlIndex].html, null, 2) : crawlData[0][currentCrawlIndex].html} )} {crawlData[0][currentCrawlIndex].markdown && ( }> Markdown {typeof crawlData[0][currentCrawlIndex].markdown === 'object' ? JSON.stringify(crawlData[0][currentCrawlIndex].markdown, null, 2) : crawlData[0][currentCrawlIndex].markdown} )} {(() => { const validLinks = crawlData[0][currentCrawlIndex].links?.filter((link: any) => typeof link === 'string' && link.trim() !== '' ) || []; return validLinks.length > 0 && ( }> Links ({validLinks.length}) {(Array.from(new Set(validLinks)) as string[]).map((link: string, idx: number) => ( {link} ))} ); })()} )}
)} {searchData.length > 0 && ( }> Search Results {searchMode === 'scrape' && searchData.length > 0 ? ( <> {searchData.map((item: any, idx: number) => { const url = item?.metadata?.url || item?.url || `Result ${idx + 1}`; return ( setCurrentSearchIndex(idx)} sx={{ px: 2, py: 1, cursor: 'pointer', backgroundColor: currentSearchIndex === idx ? darkMode ? '#121111ff' : '#e9ecef' : 'transparent', borderBottom: currentSearchIndex === idx ? '3px solid #FF00C3' : 'none', color: darkMode ? '#fff' : '#000', whiteSpace: 'nowrap', fontSize: '0.875rem', flexShrink: 0, }} title={url} > Link {idx + 1} ); })} {searchData[currentSearchIndex] && ( <> }> Metadata {searchData[currentSearchIndex].metadata && Object.entries(searchData[currentSearchIndex].metadata).map(([key, value]: [string, any]) => ( {key} {value === undefined || value === '' ? '-' : typeof value === 'object' ? JSON.stringify(value) : String(value)} )) }
{searchData[currentSearchIndex].text && ( }> Text Content {searchData[currentSearchIndex].text} )} {searchData[currentSearchIndex].html && ( }> HTML {typeof searchData[currentSearchIndex].html === 'object' ? JSON.stringify(searchData[currentSearchIndex].html, null, 2) : searchData[currentSearchIndex].html} )} {searchData[currentSearchIndex].markdown && ( }> Markdown {typeof searchData[currentSearchIndex].markdown === 'object' ? JSON.stringify(searchData[currentSearchIndex].markdown, null, 2) : searchData[currentSearchIndex].markdown} )} {(() => { const validLinks = searchData[currentSearchIndex].links?.filter((link: any) => typeof link === 'string' && link.trim() !== '' ) || []; return validLinks.length > 0 && ( }> Links ({validLinks.length}) {(Array.from(new Set(validLinks)) as string[]).map((link: string, idx: number) => ( {link} ))} ); })()} {(searchData[currentSearchIndex].screenshotVisible || searchData[currentSearchIndex].screenshotFullpage) && ( }> Screenshots {(() => { const tabs: { key: string; label: string; value: string }[] = []; if (searchData[currentSearchIndex].screenshotVisible) tabs.push({ key: 'visible', label: 'Screenshot (Visible)', value: searchData[currentSearchIndex].screenshotVisible }); if (searchData[currentSearchIndex].screenshotFullpage) tabs.push({ key: 'fullpage', label: 'Screenshot (Full Page)', value: searchData[currentSearchIndex].screenshotFullpage }); // Ensure activeTab is valid for current tabs array const activeTab = Math.min(currentSearchScreenshotTab, tabs.length - 1); const getImageSrc = (val: string) => { if (val.startsWith('http')) return val; if (row.binaryOutput && row.binaryOutput[val]) { const binaryData = row.binaryOutput[val].data || row.binaryOutput[val]; return typeof binaryData === 'string' && binaryData.startsWith('http') ? binaryData : typeof binaryData === 'string' && binaryData.startsWith('data:') ? binaryData : `data:image/png;base64,${binaryData}`; } return `data:image/png;base64,${val}`; }; return ( <> {tabs.length > 1 && ( {tabs.map((tab, idx) => ( setCurrentSearchScreenshotTab(idx)} sx={{ px: 3, py: 1, cursor: 'pointer', backgroundColor: activeTab === idx ? (darkMode ? '#121111ff' : '#e9ecef') : 'transparent', borderBottom: activeTab === idx ? '3px solid #FF00C3' : 'none', color: darkMode ? '#fff' : '#000', }} > {tab.label} ))} )} {tabs.length > 0 && ( <> = 0 ? activeTab : 0].value)} alt={tabs[activeTab >= 0 ? activeTab : 0].label} style={{ maxWidth: '100%', borderRadius: '4px', border: '1px solid rgba(255,255,255,0.1)' }} /> )} ); })()} )} )} ) : ( <> Title URL Description {searchData.map((result: any, idx: number) => ( {result.title || '-'} {result.url ? ( {result.url} ) : '-'} {result.description || '-'} ))}
)}
)}
)} {hasScreenshots && ( }> {t('run_content.captured_screenshot.title', 'Captured Screenshots')} {screenshotKeys.length > 0 && ( {screenshotKeys.map((key, idx) => ( setCurrentScreenshotIndex(idx)} sx={{ px: 3, py: 1, cursor: 'pointer', backgroundColor: currentScreenshotIndex === idx ? (theme) => theme.palette.mode === 'dark' ? '#121111ff' : '#e9ecef' : 'transparent', borderBottom: currentScreenshotIndex === idx ? '3px solid #FF00C3' : 'none', color: (theme) => theme.palette.mode === 'dark' ? '#fff' : '#000', }} > {key} ))} )} {screenshotKeys.length > 0 && ( {`Screenshot )} )} )}
); }; ================================================ FILE: src/components/run/RunSettings.tsx ================================================ import React, { useState, useEffect, useRef } from "react"; import { GenericModal } from "../ui/GenericModal"; import { MenuItem, TextField, Typography, Switch, FormControlLabel } from "@mui/material"; import { Dropdown } from "../ui/DropdownMui"; import Button from "@mui/material/Button"; import { modalStyle } from "../run/ColapsibleRow"; interface RunSettingsProps { isOpen: boolean; handleStart: (settings: RunSettings) => void; handleClose: () => void; isTask: boolean; params?: string[]; } export interface RunSettings { maxConcurrency: number; maxRepeats: number; debug: boolean; params?: any; } export const RunSettingsModal = ({ isOpen, handleStart, handleClose, isTask, params }: RunSettingsProps) => { const [settings, setSettings] = useState({ maxConcurrency: 1, maxRepeats: 1, debug: true, }); const [showInterpreterSettings, setShowInterpreterSettings] = useState(false); const hasRun = useRef(false); useEffect(() => { if (!isOpen) { hasRun.current = false; return; } if (!showInterpreterSettings && !hasRun.current) { hasRun.current = true; handleStart(settings); } }, [isOpen, showInterpreterSettings, settings, handleStart]); if (!showInterpreterSettings) { return null; } return (
{isTask && ( Recording parameters: {params?.map((item, index) => ( setSettings({ ...settings, params: settings.params ? { ...settings.params, [item]: e.target.value } : { [item]: e.target.value }, }) } /> ))} )} setShowInterpreterSettings(!showInterpreterSettings) } /> } label="Developer Mode Settings" sx={{ margin: "20px 0px" }} /> {showInterpreterSettings && ( setSettings({ ...settings, maxConcurrency: parseInt(e.target.value, 10), }) } defaultValue={settings.maxConcurrency} /> setSettings({ ...settings, maxRepeats: parseInt(e.target.value, 10), }) } defaultValue={settings.maxRepeats} /> setSettings({ ...settings, debug: e.target.value === "true", }) } > true false )}
); }; ================================================ FILE: src/components/run/Runs.tsx ================================================ import React from 'react'; import { Grid } from "@mui/material"; import { RunsTable } from "./RunsTable"; interface RunsProps { currentInterpretationLog: string; abortRunHandler: (runId: string, robotName: string, browserId: string) => void; runId: string; runningRecordingName: string; } export const Runs = ( { currentInterpretationLog, abortRunHandler, runId, runningRecordingName }: RunsProps) => { return ( ); } ================================================ FILE: src/components/run/RunsTable.tsx ================================================ import * as React from 'react'; import { useCallback, useEffect, useMemo, useState, useRef } from "react"; import { useTranslation } from 'react-i18next'; import Paper from '@mui/material/Paper'; import Table from '@mui/material/Table'; import TableBody from '@mui/material/TableBody'; import TableCell from '@mui/material/TableCell'; import TableContainer from '@mui/material/TableContainer'; import TableHead from '@mui/material/TableHead'; import TablePagination from '@mui/material/TablePagination'; import TableRow from '@mui/material/TableRow'; import { Accordion, AccordionSummary, AccordionDetails, Typography, Box, TextField, Tooltip, CircularProgress } from '@mui/material'; import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; import SearchIcon from '@mui/icons-material/Search'; import { useLocation, useNavigate } from 'react-router-dom'; import { useGlobalInfoStore, useCachedRuns, useCacheInvalidation } from "../../context/globalInfo"; import { RunSettings } from "./RunSettings"; import { CollapsibleRow } from "./ColapsibleRow"; import { ArrowDownward, ArrowUpward, UnfoldMore } from '@mui/icons-material'; import { io, Socket } from 'socket.io-client'; import { apiUrl } from '../../apiConfig'; export const columns: readonly Column[] = [ { id: 'runStatus', label: 'Status', minWidth: 80 }, { id: 'name', label: 'Name', minWidth: 80 }, { id: 'startedAt', label: 'Started At', minWidth: 80 }, { id: 'finishedAt', label: 'Finished At', minWidth: 80 }, { id: 'settings', label: 'Settings', minWidth: 80 }, { id: 'delete', label: 'Delete', minWidth: 80 }, ]; type SortDirection = 'asc' | 'desc' | 'none'; interface AccordionSortConfig { [robotMetaId: string]: { field: keyof Data | null; direction: SortDirection; }; } interface Column { id: 'runStatus' | 'name' | 'startedAt' | 'finishedAt' | 'delete' | 'settings'; label: string; minWidth?: number; align?: 'right'; format?: (value: string) => string; } export interface Data { id: number; status: string; name: string; startedAt: string; finishedAt: string; runByUserId?: string; runByScheduleId?: string; browserId: string; runByAPI?: boolean; runBySDK?: boolean; log: string; runId: string; robotId: string; robotMetaId: string; interpreterSettings: RunSettings; serializableOutput: any; binaryOutput: any; } interface RunsTableProps { currentInterpretationLog: string; abortRunHandler: (runId: string, robotName: string, browserId: string) => void; runId: string; runningRecordingName: string; } interface PaginationState { [robotMetaId: string]: { page: number; rowsPerPage: number; }; } export const RunsTable: React.FC = ({ currentInterpretationLog, abortRunHandler, runId, runningRecordingName }) => { const { t } = useTranslation(); const navigate = useNavigate(); const location = useLocation(); const getUrlParams = () => { const match = location.pathname.match(/\/runs\/([^\/]+)(?:\/run\/([^\/]+))?/); return { robotMetaId: match?.[1] || null, urlRunId: match?.[2] || null }; }; const { robotMetaId: urlRobotMetaId, urlRunId } = getUrlParams(); const isAccordionExpanded = useCallback((currentRobotMetaId: string) => { return currentRobotMetaId === urlRobotMetaId; }, [urlRobotMetaId]); const [accordionPage, setAccordionPage] = useState(0); const [accordionsPerPage, setAccordionsPerPage] = useState(10); const [accordionSortConfigs, setAccordionSortConfigs] = useState({}); const handleSort = useCallback((columnId: keyof Data, robotMetaId: string) => { setAccordionSortConfigs(prevConfigs => { const currentConfig = prevConfigs[robotMetaId] || { field: null, direction: 'none' }; const newDirection: SortDirection = currentConfig.field !== columnId ? 'asc' : currentConfig.direction === 'none' ? 'asc' : currentConfig.direction === 'asc' ? 'desc' : 'none'; return { ...prevConfigs, [robotMetaId]: { field: newDirection === 'none' ? null : columnId, direction: newDirection, } }; }); }, []); const translatedColumns = useMemo(() => columns.map(column => ({ ...column, label: t(`runstable.${column.id}`, column.label) })), [t] ); const { notify, rerenderRuns, setRerenderRuns } = useGlobalInfoStore(); const { data: rows = [], isLoading: isFetching, error, refetch } = useCachedRuns(); const { invalidateRuns } = useCacheInvalidation(); const activeSocketsRef = useRef>(new Map()); const [searchTerm, setSearchTerm] = useState(''); const [paginationStates, setPaginationStates] = useState({}); const [expandedRows, setExpandedRows] = useState>(new Set()); const [expandedAccordions, setExpandedAccordions] = useState>(new Set()); const handleAccordionChange = useCallback((robotMetaId: string, isExpanded: boolean) => { setExpandedAccordions(prev => { const newSet = new Set(prev); if (isExpanded) { newSet.add(robotMetaId); } else { newSet.delete(robotMetaId); } return newSet; }); navigate(isExpanded ? `/runs/${robotMetaId}` : '/runs'); }, [navigate]); const handleRowExpand = useCallback((runId: string, robotMetaId: string, shouldExpand: boolean) => { setExpandedRows(prev => { const newSet = new Set(prev); if (shouldExpand) { newSet.add(runId); } else { newSet.delete(runId); } return newSet; }); // Update URL navigation navigate( shouldExpand ? `/runs/${robotMetaId}/run/${runId}` : `/runs/${robotMetaId}` ); }, [navigate]); // Sync expandedRows and expandedAccordions with URL params useEffect(() => { if (urlRunId) { setExpandedRows(prev => { const newSet = new Set(prev); newSet.add(urlRunId); return newSet; }); } if (urlRobotMetaId) { setExpandedAccordions(prev => { const newSet = new Set(prev); newSet.add(urlRobotMetaId); return newSet; }); } }, [urlRunId, urlRobotMetaId]); // Auto-expand currently running robot (but allow manual collapse) useEffect(() => { if (runId && runningRecordingName) { const currentRunningRow = rows.find(row => row.runId === runId && row.name === runningRecordingName ); if (currentRunningRow) { setExpandedRows(prev => { const newSet = new Set(prev); newSet.add(currentRunningRow.runId); return newSet; }); } } }, [runId, runningRecordingName, rows]); const handleAccordionPageChange = useCallback((event: unknown, newPage: number) => { setAccordionPage(newPage); }, []); const handleAccordionsPerPageChange = useCallback((event: React.ChangeEvent) => { setAccordionsPerPage(+event.target.value); setAccordionPage(0); }, []); const handleChangePage = useCallback((robotMetaId: string, newPage: number) => { setPaginationStates(prev => ({ ...prev, [robotMetaId]: { ...prev[robotMetaId], page: newPage } })); }, []); const handleChangeRowsPerPage = useCallback((robotMetaId: string, newRowsPerPage: number) => { setPaginationStates(prev => ({ ...prev, [robotMetaId]: { page: 0, // Reset to first page when changing rows per page rowsPerPage: newRowsPerPage } })); }, []); const getPaginationState = useCallback((robotMetaId: string) => { const defaultState = { page: 0, rowsPerPage: 10 }; if (!paginationStates[robotMetaId]) { setTimeout(() => { setPaginationStates(prev => ({ ...prev, [robotMetaId]: defaultState })); }, 0); return defaultState; } return paginationStates[robotMetaId]; }, [paginationStates]); const debouncedSearch = useCallback((fn: Function, delay: number) => { let timeoutId: NodeJS.Timeout; return (...args: any[]) => { clearTimeout(timeoutId); timeoutId = setTimeout(() => fn(...args), delay); }; }, []); const handleSearchChange = useCallback((event: React.ChangeEvent) => { const debouncedSetSearch = debouncedSearch((value: string) => { setSearchTerm(value); setAccordionPage(0); setPaginationStates(prev => { const reset = Object.keys(prev).reduce((acc, robotId) => ({ ...acc, [robotId]: { ...prev[robotId], page: 0 } }), {}); return reset; }); }, 300); debouncedSetSearch(event.target.value); }, [debouncedSearch]); // Handle rerender requests using cache invalidation useEffect(() => { if (rerenderRuns) { // Invalidate cache to force refetch refetch(); setRerenderRuns(false); } }, [rerenderRuns, refetch, setRerenderRuns]); useEffect(() => { if (!rows || rows.length === 0) return; const activeRuns = rows.filter((row: Data) => row.status === 'running' && row.browserId && row.browserId.trim() !== '' ); activeRuns.forEach((run: Data) => { const { browserId, runId: currentRunId, name } = run; if (activeSocketsRef.current.has(browserId)) { return; } console.log(`[RunsTable] Connecting to browser socket: ${browserId} for run: ${currentRunId}`); try { const socket = io(`${apiUrl}/${browserId}`, { transports: ['websocket'], rejectUnauthorized: false }); socket.on('connect', () => { console.log(`[RunsTable] Connected to browser ${browserId}`); }); socket.on('debugMessage', (msg: string) => { console.log(`[RunsTable] Debug message for ${browserId}:`, msg); // Optionally update logs in real-time here }); socket.on('run-completed', (data: any) => { console.log(`[RunsTable] Run completed for ${browserId}:`, data); // Invalidate cache to show updated run status invalidateRuns(); setRerenderRuns(true); // Show notification if (data.status === 'success') { notify('success', t('main_page.notifications.interpretation_success', { name: data.robotName || name })); } else { notify('error', t('main_page.notifications.interpretation_failed', { name: data.robotName || name })); } socket.disconnect(); activeSocketsRef.current.delete(browserId); }); socket.on('urlChanged', (url: string) => { console.log(`[RunsTable] URL changed for ${browserId}:`, url); }); socket.on('dom-snapshot-loading', () => { console.log(`[RunsTable] DOM snapshot loading for ${browserId}`); }); socket.on('connect_error', (error: Error) => { console.error(`[RunsTable] Connection error for browser ${browserId}:`, error.message); }); socket.on('disconnect', (reason: string) => { console.log(`[RunsTable] Disconnected from browser ${browserId}:`, reason); activeSocketsRef.current.delete(browserId); }); activeSocketsRef.current.set(browserId, socket); } catch (error) { console.error(`[RunsTable] Error connecting to browser ${browserId}:`, error); } }); // Disconnect from sockets for runs that are no longer active const activeBrowserIds = new Set(activeRuns.map((run: Data) => run.browserId)); activeSocketsRef.current.forEach((socket, browserId) => { if (!activeBrowserIds.has(browserId)) { console.log(`[RunsTable] Disconnecting from inactive browser: ${browserId}`); socket.disconnect(); activeSocketsRef.current.delete(browserId); } }); // Cleanup on unmount return () => { console.log('[RunsTable] Cleaning up all socket connections'); activeSocketsRef.current.forEach((socket) => { socket.disconnect(); }); activeSocketsRef.current.clear(); }; }, [rows, notify, t, invalidateRuns, setRerenderRuns]); const handleDelete = useCallback(() => { notify('success', t('runstable.notifications.delete_success')); refetch(); }, [notify, t, refetch]); // Filter rows based on search term const filteredRows = useMemo(() => { let result = rows.filter((row) => row.name.toLowerCase().includes(searchTerm.toLowerCase()) ); return result; }, [rows, searchTerm]); const parseDateString = (dateStr: string): Date => { try { if (dateStr.includes('PM') || dateStr.includes('AM')) { return new Date(dateStr); } return new Date(dateStr.replace(/(\d+)\/(\d+)\//, '$2/$1/')) } catch { return new Date(0); } }; const groupedRows = useMemo(() => { const groupedData = filteredRows.reduce((acc, row) => { if (!acc[row.robotMetaId]) { acc[row.robotMetaId] = []; } acc[row.robotMetaId].push(row); return acc; }, {} as Record); Object.keys(groupedData).forEach(robotId => { groupedData[robotId].sort((a: any, b: any) => parseDateString(b.startedAt).getTime() - parseDateString(a.startedAt).getTime() ); }); const robotEntries = Object.entries(groupedData).map(([robotId, runs]) => ({ robotId, runs: runs as Data[], latestRunDate: parseDateString((runs as Data[])[0].startedAt).getTime() })); robotEntries.sort((a, b) => b.latestRunDate - a.latestRunDate); return robotEntries.reduce((acc, { robotId, runs }) => { acc[robotId] = runs; return acc; }, {} as Record); }, [filteredRows]); const renderTableRows = useCallback((data: Data[], robotMetaId: string) => { const { page, rowsPerPage } = getPaginationState(robotMetaId); const start = page * rowsPerPage; const end = start + rowsPerPage; let sortedData = [...data]; const sortConfig = accordionSortConfigs[robotMetaId]; if (sortConfig?.field === 'startedAt' || sortConfig?.field === 'finishedAt') { if (sortConfig.direction !== 'none') { sortedData.sort((a, b) => { const dateA = parseDateString(a[sortConfig.field!]); const dateB = parseDateString(b[sortConfig.field!]); return sortConfig.direction === 'asc' ? dateA.getTime() - dateB.getTime() : dateB.getTime() - dateA.getTime(); }); } } return sortedData .slice(start, end) .map((row) => ( handleRowExpand(row.runId, row.robotMetaId, shouldExpand)} currentLog={currentInterpretationLog} abortRunHandler={abortRunHandler} runningRecordingName={runningRecordingName} urlRunId={urlRunId} /> )); }, [paginationStates, runId, runningRecordingName, currentInterpretationLog, abortRunHandler, handleDelete, accordionSortConfigs]); const renderSortIcon = useCallback((column: Column, robotMetaId: string) => { const sortConfig = accordionSortConfigs[robotMetaId]; if (column.id !== 'startedAt' && column.id !== 'finishedAt') return null; if (sortConfig?.field !== column.id) { return ( ); } return sortConfig.direction === 'asc' ? : sortConfig.direction === 'desc' ? : ; }, [accordionSortConfigs]); return ( {t('runstable.runs', 'Runs')} }} sx={{ width: '250px' }} /> {isFetching ? ( ) : Object.keys(groupedRows).length === 0 ? ( {searchTerm ? t('runstable.placeholder.search') : t('runstable.placeholder.title')} {searchTerm ? t('recordingtable.search_criteria') : t('runstable.placeholder.body') } ) : ( <> {Object.entries(groupedRows) .slice( accordionPage * accordionsPerPage, accordionPage * accordionsPerPage + accordionsPerPage ) .map(([robotMetaId, data]) => ( handleAccordionChange(robotMetaId, isExpanded)} TransitionProps={{ unmountOnExit: true }} // Optimize accordion rendering > }> {data[data.length - 1].name} {translatedColumns.map((column) => ( { if (column.id === 'startedAt' || column.id === 'finishedAt') { handleSort(column.id, robotMetaId); } }} > {column.label} {renderSortIcon(column, robotMetaId)} ))} {renderTableRows(data, robotMetaId)}
handleChangePage(robotMetaId, newPage) } rowsPerPageOptions={[]} />
))}
)}
); }; ================================================ FILE: src/components/ui/AlertSnackbar.tsx ================================================ import * as React from 'react'; import Snackbar from '@mui/material/Snackbar'; import MuiAlert, { AlertProps } from '@mui/material/Alert'; import { useGlobalInfoStore } from "../../context/globalInfo"; const Alert = React.forwardRef(function Alert( props, ref, ) { return ; }); export interface AlertSnackbarProps { severity: 'error' | 'warning' | 'info' | 'success', message: string, isOpen: boolean, }; export const AlertSnackbar = ({ severity, message, isOpen }: AlertSnackbarProps) => { const [open, setOpen] = React.useState(isOpen); const { closeNotify } = useGlobalInfoStore(); const handleClose = (event?: React.SyntheticEvent | Event, reason?: string) => { if (reason === 'clickaway') { return; } closeNotify(); setOpen(false); }; return ( {message} ); } ================================================ FILE: src/components/ui/Box.tsx ================================================ import * as React from 'react'; import Box from '@mui/material/Box'; interface BoxProps { width: number | string, height: number | string, background: string, radius: string, children?: JSX.Element, }; export const SimpleBox = ({ width, height, background, radius, children }: BoxProps) => { return ( {children} ); } ================================================ FILE: src/components/ui/ConfirmationBox.tsx ================================================ import React from 'react'; import { Box, Button, Typography } from "@mui/material"; interface ConfirmationBoxProps { selector: string; onYes: () => void; onNo: () => void; } export const ConfirmationBox = ({ selector, onYes, onNo }: ConfirmationBoxProps) => { return ( Confirmation Do you want to interact with the element: {selector}? ); }; ================================================ FILE: src/components/ui/DropdownMui.tsx ================================================ import React from 'react'; import { FormControl, InputLabel, Select } from "@mui/material"; import { SelectChangeEvent } from "@mui/material/Select/Select"; import { SxProps } from '@mui/system'; interface DropdownProps { id: string; label: string; value: string | undefined; handleSelect: (event: SelectChangeEvent) => void; children?: React.ReactNode; sx?: SxProps; }; export const Dropdown = ({ id, label, value, handleSelect, children, sx }: DropdownProps) => { return ( {label} ); }; ================================================ FILE: src/components/ui/Form.tsx ================================================ import styled from 'styled-components'; export const NavBarForm = styled.form` flex: 1px; margin-left: 5px; margin-right: 5px; position: relative; `; export const NavBarInput = styled.input` box-sizing: border-box; outline: none; width: 100%; height: 24px; border-radius: 12px; border: none; padding-left: 12px; padding-right: 40px; `; ================================================ FILE: src/components/ui/GenericModal.tsx ================================================ import React, { FC } from 'react'; import { Modal, IconButton, Box } from '@mui/material'; import { Clear } from "@mui/icons-material"; interface ModalProps { isOpen: boolean; onClose: () => void; children?: JSX.Element; modalStyle?: React.CSSProperties; canBeClosed?: boolean; } export const GenericModal: FC = ( { isOpen, onClose, children, modalStyle, canBeClosed = true }) => { return ( { }} > e.stopPropagation()}> {canBeClosed ? : null } {children} ); }; const defaultModalStyle = { position: 'absolute', top: '50%', left: '50%', transform: 'translate(-50%, -50%)', width: 1000, bgcolor: 'rgba(13, 13, 13, 1)', boxShadow: 24, p: 4, height: '50%', display: 'block', overflow: 'scroll', padding: '5px 25px 10px 25px', zIndex: 3147483647, borderRadius: 4, // Added borderRadius for rounded corners }; ================================================ FILE: src/components/ui/Loader.tsx ================================================ import styled from "styled-components"; import { Stack } from "@mui/material"; import { useThemeMode } from "../../context/theme-provider"; interface LoaderProps { text: string; } export const Loader: React.FC = ({ text }) => { const { darkMode } = useThemeMode(); return ( {text} ); }; interface StyledParagraphProps { darkMode: boolean; } const StyledParagraph = styled.p` font-size: large; font-family: inherit; color: ${({ darkMode }) => (darkMode ? 'white' : '#333')}; margin-top: 20px; `; const DotsContainer = styled.div` display: flex; justify-content: center; align-items: center; gap: 15px; /* Space between dots */ `; const Dot = styled.div` width: 15px; height: 15px; background-color: #ff00c3; border-radius: 50%; animation: intensePulse 1.2s infinite ease-in-out both, bounceAndPulse 1.5s infinite ease-in-out; &:nth-child(1) { animation-delay: -0.3s; } &:nth-child(2) { animation-delay: -0.2s; } &:nth-child(3) { animation-delay: -0.1s; } &:nth-child(4) { animation-delay: 0s; } @keyframes bounceAndPulse { 0%, 100% { transform: translateY(0) scale(1); } 50% { transform: translateY(-10px) scale(1.3); } } @keyframes intensePulse { 0%, 100% { box-shadow: 0 0 0 0 rgba(255, 0, 195, 0.7); } 50% { box-shadow: 0 0 15px 10px rgba(255, 0, 195, 0.3); } } `; ================================================ FILE: src/components/ui/buttons/AddButton.tsx ================================================ import { IconButton } from "@mui/material"; import { Add } from "@mui/icons-material"; import React, { FC } from "react"; interface AddButtonProps { handleClick: () => void; size?: "small" | "medium" | "large"; title?: string; disabled?: boolean; hoverEffect?: boolean; style?: React.CSSProperties; } export const AddButton: FC = ( { handleClick, size, title, disabled = false, hoverEffect = true, style }) => { return ( {title} ); }; ================================================ FILE: src/components/ui/buttons/BreakpointButton.tsx ================================================ import { IconButton } from "@mui/material"; import { Circle } from "@mui/icons-material"; interface BreakpointButtonProps { handleClick: () => void; size?: "small" | "medium" | "large"; changeColor?: boolean; } export const BreakpointButton = ({ handleClick, size, changeColor }: BreakpointButtonProps) => { return ( ); }; ================================================ FILE: src/components/ui/buttons/Buttons.tsx ================================================ import styled from 'styled-components'; export const NavBarButton = styled.button<{ disabled: boolean, mode: 'light' | 'dark' }>` margin-left: 10px; margin-right: 5px; padding: 0; border: none; background-color: ${mode => mode ? '#333' : '#ffffff'}; cursor: ${({ disabled }) => disabled ? 'default' : 'pointer'}; width: 24px; height: 24px; border-radius: 12px; outline: none; color: ${mode => mode ? '#ffffff' : '#333333'}; `; export const UrlFormButton = styled.button` position: absolute; top: 0; right: 10px; padding: 0; border: none; background-color: transparent; cursor: pointer; width: 24px; height: 24px; border-radius: 12px; outline: none; // color: #333; // &:hover { // background-color: #ddd; // }, // &:active { // background-color: #d0d0d0; // }, `; ================================================ FILE: src/components/ui/buttons/ClearButton.tsx ================================================ import { IconButton } from "@mui/material"; import { Clear } from "@mui/icons-material"; import React, { FC } from "react"; interface ClearButtonProps { handleClick: () => void; size?: "small" | "medium" | "large"; } export const ClearButton: FC = ({ handleClick, size }) => { return ( ); }; ================================================ FILE: src/components/ui/buttons/EditButton.tsx ================================================ import { IconButton } from "@mui/material"; import { Edit } from "@mui/icons-material"; import React, { FC } from "react"; interface EditButtonProps { handleClick: () => void; size?: "small" | "medium" | "large"; } export const EditButton: FC = ({ handleClick, size }) => { return ( ); }; ================================================ FILE: src/components/ui/buttons/RemoveButton.tsx ================================================ import { IconButton } from "@mui/material"; import { Remove } from "@mui/icons-material"; import React, { FC } from "react"; interface RemoveButtonProps { handleClick: () => void; size?: "small" | "medium" | "large"; } export const RemoveButton: FC = ({ handleClick, size }) => { return ( ); }; ================================================ FILE: src/components/ui/texts.tsx ================================================ import styled from "styled-components"; export const WarningText = styled.p` border: 1px solid orange; display: flex; margin: 10px; flex-direction: column; font-size: small; background: rgba(255,165,0,0.15); padding: 5px; font-family: "Roboto","Helvetica","Arial",sans-serif; font-weight: 400; line-height: 1.5; letter-spacing: 0.00938em; ` ================================================ FILE: src/constants/const.ts ================================================ export const VIEWPORT_W = 900; export const VIEWPORT_H = 400; // Default Playwright viewport dimensions export const BROWSER_DEFAULT_WIDTH = 1280; export const BROWSER_DEFAULT_HEIGHT = 720; export const ONE_PERCENT_OF_VIEWPORT_W = VIEWPORT_W / 100; export const ONE_PERCENT_OF_VIEWPORT_H = VIEWPORT_H / 100; export const validMomentTimezones: string[] = [ 'Africa/Abidjan', 'Africa/Accra', 'Africa/Addis_Ababa', 'Africa/Algiers', 'Africa/Asmara', 'Africa/Asmera', 'Africa/Bamako', 'Africa/Bangui', 'Africa/Banjul', 'Africa/Bissau', 'Africa/Blantyre', 'Africa/Brazzaville', 'Africa/Bujumbura', 'Africa/Cairo', 'Africa/Casablanca', 'Africa/Ceuta', 'Africa/Conakry', 'Africa/Dakar', 'Africa/Dar_es_Salaam', 'Africa/Djibouti', 'Africa/Douala', 'Africa/El_Aaiun', 'Africa/Freetown', 'Africa/Gaborone', 'Africa/Harare', 'Africa/Johannesburg', 'Africa/Juba', 'Africa/Kampala', 'Africa/Khartoum', 'Africa/Kigali', 'Africa/Kinshasa', 'Africa/Lagos', 'Africa/Libreville', 'Africa/Lome', 'Africa/Luanda', 'Africa/Lubumbashi', 'Africa/Lusaka', 'Africa/Malabo', 'Africa/Maputo', 'Africa/Maseru', 'Africa/Mbabane', 'Africa/Mogadishu', 'Africa/Monrovia', 'Africa/Nairobi', 'Africa/Ndjamena', 'Africa/Niamey', 'Africa/Nouakchott', 'Africa/Ouagadougou', 'Africa/Porto-Novo', 'Africa/Sao_Tome', 'Africa/Timbuktu', 'Africa/Tripoli', 'Africa/Tunis', 'Africa/Windhoek', 'America/Adak', 'America/Anchorage', 'America/Anguilla', 'America/Antigua', 'America/Araguaina', 'America/Argentina/Buenos_Aires', 'America/Argentina/Catamarca', 'America/Argentina/ComodRivadavia', 'America/Argentina/Cordoba', 'America/Argentina/Jujuy', 'America/Argentina/La_Rioja', 'America/Argentina/Mendoza', 'America/Argentina/Rio_Gallegos', 'America/Argentina/Salta', 'America/Argentina/San_Juan', 'America/Argentina/San_Luis', 'America/Argentina/Tucuman', 'America/Argentina/Ushuaia', 'America/Aruba', 'America/Asuncion', 'America/Atikokan', 'America/Atka', 'America/Bahia', 'America/Bahia_Banderas', 'America/Barbados', 'America/Belem', 'America/Belize', 'America/Blanc-Sablon', 'America/Boa_Vista', 'America/Bogota', 'America/Boise', 'America/Buenos_Aires', 'America/Cambridge_Bay', 'America/Campo_Grande', 'America/Cancun', 'America/Caracas', 'America/Catamarca', 'America/Cayenne', 'America/Cayman', 'America/Chicago', 'America/Chihuahua', 'America/Coral_Harbour', 'America/Cordoba', 'America/Costa_Rica', 'America/Creston', 'America/Cuiaba', 'America/Curacao', 'America/Danmarkshavn', 'America/Dawson', 'America/Dawson_Creek', 'America/Denver', 'America/Detroit', 'America/Dominica', 'America/Edmonton', 'America/Eirunepe', 'America/El_Salvador', 'America/Ensenada', 'America/Fort_Nelson', 'America/Fort_Wayne', 'America/Fortaleza', 'America/Glace_Bay', 'America/Godthab', 'America/Goose_Bay', 'America/Grand_Turk', 'America/Grenada', 'America/Guadeloupe', 'America/Guatemala', 'America/Guayaquil', 'America/Guyana', 'America/Halifax', 'America/Havana', 'America/Hermosillo', 'America/Indiana/Indianapolis', 'America/Indiana/Knox', 'America/Indiana/Marengo', 'America/Indiana/Petersburg', 'America/Indiana/Tell_City', 'America/Indiana/Vevay', 'America/Indiana/Vincennes', 'America/Indiana/Winamac', 'America/Indianapolis', 'America/Inuvik', 'America/Iqaluit', 'America/Jamaica', 'America/Jujuy', 'America/Juneau', 'America/Kentucky/Louisville', 'America/Kentucky/Monticello', 'America/Knox_IN', 'America/Kralendijk', 'America/La_Paz', 'America/Lima', 'America/Los_Angeles', 'America/Louisville', 'America/Lower_Princes', 'America/Maceio', 'America/Managua', 'America/Manaus', 'America/Marigot', 'America/Martinique', 'America/Matamoros', 'America/Mazatlan', 'America/Mendoza', 'America/Menominee', 'America/Merida', 'America/Metlakatla', 'America/Mexico_City', 'America/Miquelon', 'America/Moncton', 'America/Monterrey', 'America/Montevideo', 'America/Montreal', 'America/Montserrat', 'America/Nassau', 'America/New_York', 'America/Nipigon', 'America/Nome', 'America/Noronha', 'America/North_Dakota/Beulah', 'America/North_Dakota/Center', 'America/North_Dakota/New_Salem', 'America/Ojinaga', 'America/Panama', 'America/Pangnirtung', 'America/Paramaribo', 'America/Phoenix', 'America/Port-au-Prince', 'America/Port_of_Spain', 'America/Porto_Acre', 'America/Porto_Velho', 'America/Puerto_Rico', 'America/Punta_Arenas', 'America/Rainy_River', 'America/Rankin_Inlet', 'America/Recife', 'America/Regina', 'America/Resolute', 'America/Rio_Branco', 'America/Rosario', 'America/Santa_Isabel', 'America/Santarem', 'America/Santiago', 'America/Santo_Domingo', 'America/Sao_Paulo', 'America/Scoresbysund', 'America/Shiprock', 'America/Sitka', 'America/St_Barthelemy', 'America/St_Johns', 'America/St_Kitts', 'America/St_Lucia', 'America/St_Thomas', 'America/St_Vincent', 'America/Swift_Current', 'America/Tegucigalpa', 'America/Thule', 'America/Thunder_Bay', 'America/Tijuana', 'America/Toronto', 'America/Tortola', 'America/Vancouver', 'America/Virgin', 'America/Whitehorse', 'America/Winnipeg', 'America/Yakutat', 'America/Yellowknife', 'Antarctica/Casey', 'Antarctica/Davis', 'Antarctica/DumontDUrville', 'Antarctica/Macquarie', 'Antarctica/Mawson', 'Antarctica/McMurdo', 'Antarctica/Palmer', 'Antarctica/Rothera', 'Antarctica/South_Pole', 'Antarctica/Syowa', 'Antarctica/Troll', 'Antarctica/Vostok', 'Arctic/Longyearbyen', 'Asia/Aden', 'Asia/Almaty', 'Asia/Amman', 'Asia/Anadyr', 'Asia/Aqtau', 'Asia/Aqtobe', 'Asia/Ashgabat', 'Asia/Ashkhabad', 'Asia/Atyrau', 'Asia/Baghdad', 'Asia/Bahrain', 'Asia/Baku', 'Asia/Bangkok', 'Asia/Barnaul', 'Asia/Beirut', 'Asia/Bishkek', 'Asia/Brunei', 'Asia/Calcutta', 'Asia/Chita', 'Asia/Choibalsan', 'Asia/Chongqing', 'Asia/Chungking', 'Asia/Colombo', 'Asia/Dacca', 'Asia/Damascus', 'Asia/Dhaka', 'Asia/Dili', 'Asia/Dubai', 'Asia/Dushanbe', 'Asia/Famagusta', 'Asia/Gaza', 'Asia/Harbin', 'Asia/Hebron', 'Asia/Ho_Chi_Minh', 'Asia/Hong_Kong', 'Asia/Hovd', 'Asia/Irkutsk', 'Asia/Istanbul', 'Asia/Jakarta', 'Asia/Jayapura', 'Asia/Jerusalem', 'Asia/Kabul', 'Asia/Kamchatka', 'Asia/Karachi', 'Asia/Kashgar', 'Asia/Kathmandu', 'Asia/Katmandu', 'Asia/Khandyga', 'Asia/Kolkata', 'Asia/Krasnoyarsk', 'Asia/Kuala_Lumpur', 'Asia/Kuching', 'Asia/Kuwait', 'Asia/Macao', 'Asia/Macau', 'Asia/Magadan', 'Asia/Makassar', 'Asia/Manila', 'Asia/Muscat', 'Asia/Nicosia', 'Asia/Novokuznetsk', 'Asia/Novosibirsk', 'Asia/Omsk', 'Asia/Oral', 'Asia/Phnom_Penh', 'Asia/Pontianak', 'Asia/Pyongyang', 'Asia/Qatar', 'Asia/Qyzylorda', 'Asia/Rangoon', 'Asia/Riyadh', 'Asia/Saigon', 'Asia/Sakhalin', 'Asia/Samarkand', 'Asia/Seoul', 'Asia/Shanghai', 'Asia/Singapore', 'Asia/Srednekolymsk', 'Asia/Taipei', 'Asia/Tashkent', 'Asia/Tbilisi', 'Asia/Tehran', 'Asia/Tel_Aviv', 'Asia/Thimbu', 'Asia/Thimphu', 'Asia/Tokyo', 'Asia/Tomsk', 'Asia/Ujung_Pandang', 'Asia/Ulaanbaatar', 'Asia/Ulan_Bator', 'Asia/Urumqi', 'Asia/Ust-Nera', 'Asia/Vientiane', 'Asia/Vladivostok', 'Asia/Yakutsk', 'Asia/Yangon', 'Asia/Yekaterinburg', 'Asia/Yerevan', 'Atlantic/Azores', 'Atlantic/Bermuda', 'Atlantic/Canary', 'Atlantic/Cape_Verde', 'Atlantic/Faeroe', 'Atlantic/Faroe', 'Atlantic/Jan_Mayen', 'Atlantic/Madeira', 'Atlantic/Reykjavik', 'Atlantic/South_Georgia', 'Atlantic/St_Helena', 'Atlantic/Stanley', 'Australia/ACT', 'Australia/Adelaide', 'Australia/Brisbane', 'Australia/Broken_Hill', 'Australia/Canberra', 'Australia/Currie', 'Australia/Darwin', 'Australia/Eucla', 'Australia/Hobart', 'Australia/LHI', 'Australia/Lindeman', 'Australia/Lord_Howe', 'Australia/Melbourne', 'Australia/NSW', 'Australia/North', 'Australia/Perth', 'Australia/Queensland', 'Australia/South', 'Australia/Sydney', 'Australia/Tasmania', 'Australia/Victoria', 'Australia/West', 'Australia/Yancowinna', 'Brazil/Acre', 'Brazil/DeNoronha', 'Brazil/East', 'Brazil/West', 'CET', 'CST6CDT', 'Canada/Atlantic', 'Canada/Central', 'Canada/Eastern', 'Canada/Mountain', 'Canada/Newfoundland', 'Canada/Pacific', 'Canada/Saskatchewan', 'Canada/Yukon', 'Chile/Continental', 'Chile/EasterIsland', 'Cuba', 'EET', 'EST', 'EST5EDT', 'Egypt', 'Eire', 'Etc/GMT', 'Etc/GMT+0', 'Etc/GMT+1', 'Etc/GMT+10', 'Etc/GMT+11', 'Etc/GMT+12', 'Etc/GMT+2', 'Etc/GMT+3', 'Etc/GMT+4', 'Etc/GMT+5', 'Etc/GMT+6', 'Etc/GMT+7', 'Etc/GMT+8', 'Etc/GMT+9', 'Etc/GMT-0', 'Etc/GMT-1', 'Etc/GMT-10', 'Etc/GMT-11', 'Etc/GMT-12', 'Etc/GMT-13', 'Etc/GMT-14', 'Etc/GMT-2', 'Etc/GMT-3', 'Etc/GMT-4', 'Etc/GMT-5', 'Etc/GMT-6', 'Etc/GMT-7', 'Etc/GMT-8', 'Etc/GMT-9', 'Etc/GMT0', 'Etc/Greenwich', 'Etc/UCT', 'Etc/UTC', 'Etc/Universal', 'Etc/Zulu', 'Europe/Amsterdam', 'Europe/Andorra', 'Europe/Astrakhan', 'Europe/Athens', 'Europe/Belfast', 'Europe/Belgrade', 'Europe/Berlin', 'Europe/Bratislava', 'Europe/Brussels', 'Europe/Bucharest', 'Europe/Budapest', 'Europe/Busingen', 'Europe/Chisinau', 'Europe/Copenhagen', 'Europe/Dublin', 'Europe/Gibraltar', 'Europe/Guernsey', 'Europe/Helsinki', 'Europe/Isle_of_Man', 'Europe/Istanbul', 'Europe/Jersey', 'Europe/Kaliningrad', 'Europe/Kiev', 'Europe/Kirov', 'Europe/Lisbon', 'Europe/Ljubljana', 'Europe/London', 'Europe/Luxembourg', 'Europe/Madrid', 'Europe/Malta', 'Europe/Mariehamn', 'Europe/Minsk', 'Europe/Monaco', 'Europe/Moscow', 'Europe/Nicosia', 'Europe/Oslo', 'Europe/Paris', 'Europe/Podgorica', 'Europe/Prague', 'Europe/Riga', 'Europe/Rome', 'Europe/Samara', 'Europe/San_Marino', 'Europe/Sarajevo', 'Europe/Saratov', 'Europe/Simferopol', 'Europe/Skopje', 'Europe/Sofia', 'Europe/Stockholm', 'Europe/Tallinn', 'Europe/Tirane', 'Europe/Tiraspol', 'Europe/Ulyanovsk', 'Europe/Uzhgorod', 'Europe/Vaduz', 'Europe/Vatican', 'Europe/Vienna', 'Europe/Vilnius', 'Europe/Volgograd', 'Europe/Warsaw', 'Europe/Zagreb', 'Europe/Zaporozhye', 'Europe/Zurich', 'GB', 'GB-Eire', 'GMT', 'GMT+0', 'GMT-0', 'GMT0', 'Greenwich', 'HST', 'Hongkong', 'Iceland', 'Indian/Antananarivo', 'Indian/Chagos', 'Indian/Christmas', 'Indian/Cocos', 'Indian/Comoro', 'Indian/Kerguelen', 'Indian/Mahe', 'Indian/Maldives', 'Indian/Mauritius', 'Indian/Mayotte', 'Indian/Reunion', 'Iran', 'Israel', 'Jamaica', 'Japan', 'Kwajalein', 'Libya', 'MET', 'MST', 'MST7MDT', 'Mexico/BajaNorte', 'Mexico/BajaSur', 'Mexico/General', 'NZ', 'NZ-CHAT', 'Navajo', 'PRC', 'PST8PDT', 'Pacific/Apia', 'Pacific/Auckland', 'Pacific/Bougainville', 'Pacific/Chatham', 'Pacific/Chuuk', 'Pacific/Easter', 'Pacific/Efate', 'Pacific/Enderbury', 'Pacific/Fakaofo', 'Pacific/Fiji', 'Pacific/Funafuti', 'Pacific/Galapagos', 'Pacific/Gambier', 'Pacific/Guadalcanal', 'Pacific/Guam', 'Pacific/Honolulu', 'Pacific/Johnston', 'Pacific/Kiritimati', 'Pacific/Kosrae', 'Pacific/Kwajalein', 'Pacific/Majuro', 'Pacific/Marquesas', 'Pacific/Midway', 'Pacific/Nauru', 'Pacific/Niue', 'Pacific/Norfolk', 'Pacific/Noumea', 'Pacific/Pago_Pago', 'Pacific/Palau', 'Pacific/Pitcairn', 'Pacific/Pohnpei', 'Pacific/Ponape', 'Pacific/Port_Moresby', 'Pacific/Rarotonga', 'Pacific/Saipan', 'Pacific/Samoa', 'Pacific/Tahiti', 'Pacific/Tarawa', 'Pacific/Tongatapu', 'Pacific/Truk', 'Pacific/Wake', 'Pacific/Wallis', 'Pacific/Yap', 'Poland', 'Portugal', 'ROC', 'ROK', 'Singapore', 'Turkey', 'UCT', 'US/Alaska', 'US/Aleutian', 'US/Arizona', 'US/Central', 'US/East-Indiana', 'US/Eastern', 'US/Hawaii', 'US/Indiana-Starke', 'US/Michigan', 'US/Mountain', 'US/Pacific', 'US/Pacific-New', 'US/Samoa', 'UTC', 'Universal', 'W-SU', 'WET', 'Zulu', ]; ================================================ FILE: src/context/auth.tsx ================================================ import { useReducer, createContext, useEffect, useCallback } from 'react'; import axios from 'axios'; import { useNavigate } from 'react-router-dom'; import { apiUrl } from "../apiConfig"; interface AuthProviderProps { children: React.ReactNode; } interface ActionType { type: 'LOGIN' | 'LOGOUT'; payload?: any; } type InitialStateType = { user: any; lastActivityTime?: number; }; const initialState = { user: null, lastActivityTime: Date.now(), }; const AUTO_LOGOUT_TIME = 4 * 60 * 60 * 1000; // 4 hours in milliseconds const AuthContext = createContext<{ state: InitialStateType; dispatch: React.Dispatch; }>({ state: initialState, dispatch: () => null, }); const reducer = (state: InitialStateType, action: ActionType) => { switch (action.type) { case 'LOGIN': return { ...state, user: action.payload, lastActivityTime: Date.now(), }; case 'LOGOUT': return { ...state, user: null, lastActivityTime: undefined, }; default: return state; } }; const AuthProvider = ({ children }: AuthProviderProps) => { const [state, dispatch] = useReducer(reducer, initialState); const navigate = useNavigate(); axios.defaults.withCredentials = true; const handleLogout = useCallback(async () => { try { await axios.get(`${apiUrl}/auth/logout`); dispatch({ type: 'LOGOUT' }); window.localStorage.removeItem('user'); navigate('/login'); } catch (err) { console.error('Logout error:', err); } }, [navigate]); const checkAutoLogout = useCallback(() => { if (state.user && state.lastActivityTime) { const currentTime = Date.now(); const timeSinceLastActivity = currentTime - state.lastActivityTime; if (timeSinceLastActivity >= AUTO_LOGOUT_TIME) { handleLogout(); } } }, [state.user, state.lastActivityTime, handleLogout]); // Update last activity time on user interactions const updateActivityTime = useCallback(() => { if (state.user) { dispatch({ type: 'LOGIN', payload: state.user // Reuse existing user data }); } }, [state.user]); // Initialize user from localStorage useEffect(() => { const storedUser = window.localStorage.getItem('user'); if (storedUser) { dispatch({ type: 'LOGIN', payload: JSON.parse(storedUser) }); } }, []); // Set up activity listeners useEffect(() => { if (state.user) { // List of events to track for user activity const events = ['mousedown', 'keydown', 'scroll', 'touchstart']; // Throttled event handler let timeoutId: NodeJS.Timeout; const handleActivity = () => { if (timeoutId) { clearTimeout(timeoutId); } timeoutId = setTimeout(updateActivityTime, 1000); }; // Add event listeners events.forEach(event => { window.addEventListener(event, handleActivity); }); // Set up periodic check for auto logout const checkInterval = setInterval(checkAutoLogout, 60000); // Check every minute // Cleanup return () => { events.forEach(event => { window.removeEventListener(event, handleActivity); }); clearInterval(checkInterval); if (timeoutId) { clearTimeout(timeoutId); } }; } }, [state.user, updateActivityTime, checkAutoLogout]); axios.interceptors.response.use( function (response) { return response; }, function (error) { const res = error.response; if (res?.status === 401 && res.config && !res.config.__isRetryRequest) { return new Promise((_, reject) => { handleLogout() .then(() => { console.log('/401 error > logout'); reject(error); }) .catch((err) => { console.error('AXIOS INTERCEPTORS ERROR:', err); reject(error); }); }); } return Promise.reject(error); } ); return ( {children} ); }; export { AuthContext, AuthProvider }; ================================================ FILE: src/context/browserActions.tsx ================================================ import React, { createContext, useContext, useState, ReactNode } from 'react'; import { useSocketStore } from './socket'; import { WorkflowFile } from 'maxun-core'; import { emptyWorkflow } from '../shared/constants'; export type PaginationType = 'scrollDown' | 'scrollUp' | 'clickNext' | 'clickLoadMore' | 'none' | ''; export type LimitType = '10' | '100' | 'custom' | ''; export type CaptureStage = 'initial' | 'pagination' | 'limit' | 'complete' | ''; export type ActionType = 'text' | 'list' | 'screenshot'; interface ActionContextProps { getText: boolean; getList: boolean; getScreenshot: boolean; paginationMode: boolean; limitMode: boolean; paginationType: PaginationType; limitType: LimitType; workflow: WorkflowFile; customLimit: string; captureStage: CaptureStage; showPaginationOptions: boolean; showLimitOptions: boolean; activeAction: 'none' | 'text' | 'list' | 'screenshot'; setActiveAction: (action: 'none' | 'text' | 'list' | 'screenshot') => void; setWorkflow: (workflow: WorkflowFile) => void; setShowPaginationOptions: (show: boolean) => void; setShowLimitOptions: (show: boolean) => void; setCaptureStage: (stage: CaptureStage) => void; startAction: (action: 'text' | 'list' | 'screenshot') => void; finishAction: (action: 'text' | 'list' | 'screenshot') => void; startGetText: () => void; stopGetText: () => void; startGetList: () => void; stopGetList: () => void; startGetScreenshot: () => void; stopGetScreenshot: () => void; startPaginationMode: () => void; stopPaginationMode: () => void; updatePaginationType: (type: PaginationType) => void; startLimitMode: () => void; stopLimitMode: () => void; updateLimitType: (type: LimitType) => void; updateCustomLimit: (limit: string) => void; } const ActionContext = createContext(undefined); export const ActionProvider = ({ children }: { children: ReactNode }) => { const [workflow, setWorkflow] = useState(emptyWorkflow); const [getText, setGetText] = useState(false); const [getList, setGetList] = useState(false); const [getScreenshot, setGetScreenshot] = useState(false); const [paginationMode, setPaginationMode] = useState(false); const [limitMode, setLimitMode] = useState(false); const [paginationType, setPaginationType] = useState(''); const [limitType, setLimitType] = useState(''); const [customLimit, setCustomLimit] = useState(''); const [captureStage, setCaptureStage] = useState('initial'); const [showPaginationOptions, setShowPaginationOptions] = useState(false); const [showLimitOptions, setShowLimitOptions] = useState(false); const [activeAction, setActiveAction] = useState<'none' | 'text' | 'list' | 'screenshot'>('none'); const { socket } = useSocketStore(); const startAction = (action: 'text' | 'list' | 'screenshot') => { if (activeAction !== 'none') return; setActiveAction(action); if (action === 'text') { setGetText(true); } else if (action === 'list') { setGetList(true); socket?.emit('setGetList', { getList: true }); setCaptureStage('initial'); } else if (action === 'screenshot') { setGetScreenshot(true); } }; const finishAction = (action: 'text' | 'list' | 'screenshot') => { if (activeAction !== action) return; setActiveAction('none'); if (action === 'text') { setGetText(false); } else if (action === 'list') { setGetList(false); setPaginationType(''); setLimitType(''); setCustomLimit(''); setCaptureStage('complete'); socket?.emit('setGetList', { getList: false }); } else if (action === 'screenshot') { setGetScreenshot(false); } }; const updatePaginationType = (type: PaginationType) => setPaginationType(type); const updateLimitType = (type: LimitType) => setLimitType(type); const updateCustomLimit = (limit: string) => setCustomLimit(limit); const startPaginationMode = () => { setPaginationMode(true); setCaptureStage('pagination'); socket?.emit('setGetList', { getList: false }); socket?.emit('setPaginationMode', { pagination: true }); }; const stopPaginationMode = () => { setPaginationMode(false), socket?.emit('setPaginationMode', { pagination: false }); }; const startLimitMode = () => { setLimitMode(true); setCaptureStage('limit'); }; const stopLimitMode = () => setLimitMode(false); const startGetText = () => startAction('text'); const stopGetText = () => { setGetText(false); setActiveAction('none'); }; const startGetList = () => startAction('list'); const stopGetList = () => { setGetList(false); socket?.emit('setGetList', { getList: false }); setPaginationType(''); setLimitType(''); setCustomLimit(''); setCaptureStage('complete'); setActiveAction('none'); }; const startGetScreenshot = () => startAction('screenshot'); const stopGetScreenshot = () => { setGetScreenshot(false); setActiveAction('none'); }; return ( {children} ); }; export const useActionContext = () => { const context = useContext(ActionContext); if (context === undefined) { throw new Error('useActionContext must be used within an ActionProvider'); } return context; }; ================================================ FILE: src/context/browserDimensions.tsx ================================================ import React, { createContext, useCallback, useContext, useEffect, useState } from "react"; import { AppDimensions, getResponsiveDimensions } from "../helpers/dimensionUtils"; interface BrowserDimensionsContext extends AppDimensions { setWidth: (newWidth: number) => void; updateDimensions: () => void; } const initialDimensions = getResponsiveDimensions(); const browserDimensionsContext = createContext({ ...initialDimensions, setWidth: () => {}, updateDimensions: () => {} }); export const useBrowserDimensionsStore = () => useContext(browserDimensionsContext); export const BrowserDimensionsProvider = ({ children }: { children: JSX.Element }) => { const [dimensions, setDimensions] = useState(initialDimensions); const updateDimensions = useCallback(() => { setDimensions(getResponsiveDimensions()); }, []); const setWidth = useCallback((newWidth: number) => { setDimensions((prevDimensions: any) => ({ ...prevDimensions, browserWidth: newWidth, canvasWidth: newWidth, browserHeight: Math.round(newWidth / 1.6), canvasHeight: Math.round(newWidth / 1.6) })); }, []); useEffect(() => { window.addEventListener('resize', updateDimensions); return () => { window.removeEventListener('resize', updateDimensions); }; }, [updateDimensions]); return ( {children} ); }; ================================================ FILE: src/context/browserSteps.tsx ================================================ import React, { createContext, useContext, useEffect, useRef, useState } from 'react'; import { useSocketStore } from "./socket"; import { useGlobalInfoStore } from "./globalInfo"; import { useActionContext } from './browserActions'; export interface TextStep { id: number; type: 'text'; label: string; data: string; isShadow?: boolean; selectorObj: SelectorObject; actionId?: string; name?: string; } export interface ScreenshotStep { id: number; type: 'screenshot'; name?: string; fullPage: boolean; actionId?: string; screenshotData?: string; } export interface ListStep { id: number; type: 'list'; name?: string; listSelector: string; isShadow?: boolean; fields: { [key: string]: TextStep }; pagination?: { type: string; selector: string; isShadow?: boolean; }; limit?: number; actionId?: string; data?: any[]; } export type BrowserStep = TextStep | ScreenshotStep | ListStep; export interface SelectorObject { selector: string; isShadow?: boolean; tag?: string; attribute?: string; [key: string]: any; } interface BrowserStepsContextType { browserSteps: BrowserStep[]; addTextStep: ( label: string, data: string, selectorObj: SelectorObject, actionId: string ) => void; addListStep: ( listSelector: string, fields: { [key: string]: TextStep }, listId: number, actionId: string, pagination?: { type: string; selector: string; isShadow?: boolean; }, limit?: number, isShadow?: boolean ) => void; addScreenshotStep: (fullPage: boolean, actionId: string) => void; deleteBrowserStep: (id: number) => void; updateBrowserTextStepLabel: (id: number, newLabel: string) => void; updateListTextFieldLabel: ( listId: number, fieldKey: string, newLabel: string ) => void; updateListStepLimit: (listId: number, limit: number) => void; updateListStepPagination: (listId: number, pagination: { type: string; selector: string | null; isShadow?: boolean }) => void; updateListStepData: (listId: number, extractedData: any[]) => void; updateListStepName: (listId: number, name: string) => void; updateScreenshotStepName: (id: number, name: string) => void; removeListTextField: (listId: number, fieldKey: string) => void; deleteStepsByActionId: (actionId: string) => void; updateScreenshotStepData: (id: number, screenshotData: string) => void; emitActionForStep: (step: BrowserStep) => void; emitForStepId: (actionId: string, nameOverride?: string) => void; } const BrowserStepsContext = createContext(undefined); export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ children }) => { const { socket } = useSocketStore(); const { currentTextGroupName } = useGlobalInfoStore(); const [browserSteps, setBrowserSteps] = useState([]); const [discardedFields, setDiscardedFields] = useState>(new Set()); const { paginationType, limitType, customLimit } = useActionContext(); const browserStepsRef = useRef(browserSteps); useEffect(() => { browserStepsRef.current = browserSteps; }, [browserSteps]); const currentTextGroupNameRef = useRef(currentTextGroupName); useEffect(() => { currentTextGroupNameRef.current = currentTextGroupName; }, [currentTextGroupName]); const getListSettingsObject = (listStep: ListStep) => { const fields: Record = {}; Object.entries(listStep.fields).forEach(([id, field]) => { if (field.selectorObj?.selector) { fields[field.label] = { selector: field.selectorObj.selector, tag: field.selectorObj.tag, attribute: field.selectorObj.attribute, isShadow: field.selectorObj.isShadow }; } }); const livePaginationType = paginationType || listStep.pagination?.type || ""; const liveLimit = limitType === "custom" ? parseInt(customLimit || "0", 10) : parseInt(limitType || "0", 10); return { listSelector: listStep.listSelector, fields: fields, pagination: { type: livePaginationType, selector: listStep.pagination?.selector, isShadow: listStep.isShadow }, limit: liveLimit > 0 ? liveLimit : listStep.limit, isShadow: listStep.isShadow }; }; const emitActionForStep = (step: BrowserStep) => { if (!socket) return; if (!step.actionId) return; if (!socket.connected) return; let action = ""; let settings: any = {}; // Always read the latest steps from the ref to prevent stale data const latestSteps = browserStepsRef.current; if (step.type === "list") { action = "scrapeList"; const baseSettings = getListSettingsObject(step); settings = { ...baseSettings, name: step.name || `List Data ${latestSteps.filter(s => s.type === "list").length}`, }; } else if (step.type === "text") { action = "scrapeSchema"; const freshTextSteps = latestSteps.filter( (s): s is TextStep => s.type === "text" && s.actionId === step.actionId ); // Build schema settings from text steps const fieldSettings: Record< string, { selector: string; tag?: string; [key: string]: any; } > = {}; freshTextSteps.forEach((textStep) => { if (textStep.selectorObj?.selector && textStep.label) { fieldSettings[textStep.label] = { selector: textStep.selectorObj.selector, tag: textStep.selectorObj.tag, attribute: textStep.selectorObj.attribute, isShadow: textStep.selectorObj.isShadow, }; } }); settings = { ...fieldSettings, name: currentTextGroupNameRef.current || "Text Data", }; } else if (step.type === "screenshot") { action = "screenshot"; const freshScreenshot = latestSteps.find( (s) => s.type === "screenshot" && s.actionId === step.actionId ) as ScreenshotStep | undefined; settings = { name: step.name || freshScreenshot?.name || `Screenshot ${latestSteps.filter((s) => s.type === "screenshot").length}`, type: "png", caret: "hide", scale: "device", timeout: 30000, fullPage: freshScreenshot?.fullPage ?? step.fullPage ?? true, animations: "allow", }; } socket.emit("action", { action, actionId: step.actionId, settings }); }; const emitForStepId = (actionId: string, nameOverride?: string) => { const step = browserStepsRef.current.find(s => s.actionId === actionId); if (!step) return; let enrichedStep = { ...step }; if (step.type === "text") { enrichedStep = { ...step, name: currentTextGroupNameRef.current }; } if (step.type === "screenshot") { const freshScreenshot = browserStepsRef.current.find( s => s.type === "screenshot" && s.actionId === actionId ) as ScreenshotStep | undefined; if (freshScreenshot) { enrichedStep = { ...freshScreenshot }; if (nameOverride && freshScreenshot.name !== nameOverride) { enrichedStep.name = nameOverride; browserStepsRef.current = browserStepsRef.current.map(s => s.id === freshScreenshot.id ? { ...s, name: nameOverride } : s ); setBrowserSteps(prev => prev.map(s => s.id === freshScreenshot.id ? { ...s, name: nameOverride } : s ) ); } } } if (step.type === "list") { const freshList = browserStepsRef.current.find( s => s.type === "list" && s.actionId === actionId ) as ListStep | undefined; if (freshList) { enrichedStep = { ...freshList }; } } emitActionForStep(enrichedStep); }; const addTextStep = (label: string, data: string, selectorObj: SelectorObject, actionId: string) => { setBrowserSteps((prevSteps) => { const textCount = prevSteps.filter(s => s.type === 'text').length + 1; const generatedLabel = label || `Label ${textCount}`; return [ ...prevSteps, { id: Date.now(), type: "text", label: generatedLabel, data, selectorObj, actionId, }, ]; }); }; const addListStep = ( listSelector: string, newFields: { [key: string]: TextStep }, listId: number, actionId: string, pagination?: { type: string; selector: string; isShadow?: boolean; }, limit?: number, isShadow?: boolean ) => { setBrowserSteps((prevSteps) => { const existingListStepIndex = prevSteps.findIndex( (step) => step.type === "list" && step.id === listId ); if (existingListStepIndex !== -1) { const updatedSteps = [...prevSteps]; const existingListStep = updatedSteps[ existingListStepIndex ] as ListStep; // Preserve existing labels for fields const mergedFields = Object.entries(newFields).reduce( (acc, [key, field]) => { if (!discardedFields.has(`${listId}-${key}`)) { // If field exists, preserve its label if (existingListStep.fields[key]) { acc[key] = { ...field, label: existingListStep.fields[key].label, actionId, }; } else { acc[key] = { ...field, actionId, }; } } return acc; }, {} as { [key: string]: TextStep } ); updatedSteps[existingListStepIndex] = { ...existingListStep, listSelector, fields: mergedFields, pagination: pagination || existingListStep.pagination, limit: limit, isShadow: isShadow !== undefined ? isShadow : existingListStep.isShadow, actionId, }; return updatedSteps; } else { const fieldsWithActionId = Object.entries(newFields).reduce( (acc, [key, field]) => { acc[key] = { ...field, actionId, }; return acc; }, {} as { [key: string]: TextStep } ); const listCount = prevSteps.filter(s => s.type === 'list').length + 1; return [ ...prevSteps, { id: listId, type: "list", name: `List Data ${listCount}`, listSelector, fields: fieldsWithActionId, pagination, limit, actionId, }, ]; } }); }; const addScreenshotStep = (fullPage: boolean, actionId: string) => { setBrowserSteps(prevSteps => [ ...prevSteps, { id: Date.now(), type: 'screenshot', fullPage, actionId } ]); }; const deleteBrowserStep = (id: number) => { setBrowserSteps(prevSteps => prevSteps.filter(step => step.id !== id)); }; const deleteStepsByActionId = (actionId: string) => { setBrowserSteps(prevSteps => prevSteps.filter(step => step.actionId !== actionId)); }; const updateBrowserTextStepLabel = (id: number, newLabel: string) => { setBrowserSteps(prevSteps => prevSteps.map(step => step.id === id ? { ...step, label: newLabel } : step ) ); }; const updateListTextFieldLabel = ( listId: number, fieldKey: string, newLabel: string ) => { setBrowserSteps((prevSteps) => prevSteps.map((step) => { if (step.type === "list" && step.id === listId) { const oldLabel = step.fields[fieldKey].label; const updatedFields = { ...step.fields, [fieldKey]: { ...step.fields[fieldKey], label: newLabel, }, }; const updatedData = step.data?.map((row: any) => { if (row[oldLabel] !== undefined) { const { [oldLabel]: value, ...rest } = row; return { ...rest, [newLabel]: value, }; } return row; }); return { ...step, fields: updatedFields, data: updatedData, }; } return step; }) ); }; const updateListStepData = (listId: number, extractedData: any[]) => { setBrowserSteps((prevSteps) => { return prevSteps.map(step => { if (step.type === 'list' && step.id === listId) { return { ...step, data: extractedData }; } return step; }); }); }; const updateScreenshotStepData = (id: number, screenshotData: string) => { setBrowserSteps(prevSteps => { return prevSteps.map(step => { if (step.type === 'screenshot' && step.id === id) { return { ...step, screenshotData: screenshotData }; } return step; }); }); }; const updateListStepLimit = (listId: number, limit: number) => { setBrowserSteps(prevSteps => prevSteps.map(step => { if (step.type === 'list' && step.id === listId) { return { ...step, limit: limit }; } return step; }) ); }; const updateListStepPagination = ( listId: number, pagination: { type: string; selector: string | null; isShadow?: boolean } ) => { setBrowserSteps((prevSteps) => prevSteps.map((step) => { if (step.type === "list" && step.id === listId) { return { ...step, pagination: { ...pagination, selector: pagination.selector || "", }, }; } return step; }) ); }; const updateListStepName = (listId: number, name: string) => { setBrowserSteps((prevSteps) => prevSteps.map((step) => { if (step.type === "list" && step.id === listId) { return { ...step, name: name, }; } return step; }) ); }; const updateScreenshotStepName = (id: number, name: string) => { setBrowserSteps(prevSteps => { const updated = prevSteps.map(step => step.id === id && step.type === 'screenshot' ? { ...step, name } : step ); browserStepsRef.current = updated; return updated; }); }; const removeListTextField = (listId: number, fieldKey: string) => { setBrowserSteps((prevSteps) => prevSteps.map((step) => { if (step.type === "list" && step.id === listId) { const { [fieldKey]: _, ...remainingFields } = step.fields; return { ...step, fields: remainingFields, }; } return step; }) ); setDiscardedFields((prevDiscarded) => new Set(prevDiscarded).add(`${listId}-${fieldKey}`) ); }; return ( {children} ); }; export const useBrowserSteps = () => { const context = useContext(BrowserStepsContext); if (!context) { throw new Error('useBrowserSteps must be used within a BrowserStepsProvider'); } return context; }; ================================================ FILE: src/context/globalInfo.tsx ================================================ import { createContext, useContext, useState } from "react"; import { AlertSnackbarProps } from "../components/ui/AlertSnackbar"; import { WhereWhatPair } from "maxun-core"; import { QueryClient, QueryClientProvider, useQuery, useQueryClient } from '@tanstack/react-query'; import { getStoredRuns, getStoredRecordings } from "../api/storage"; const createDataCacheClient = () => new QueryClient({ defaultOptions: { queries: { staleTime: 30 * 1000, gcTime: 5 * 60 * 1000, retry: 2, retryDelay: (attemptIndex) => Math.min(1000 * 2 ** attemptIndex, 30000), } } }); const dataCacheKeys = { runs: ['cached-runs'] as const, recordings: ['cached-recordings'] as const, } as const; interface RobotMeta { name: string; id: string; createdAt: string; pairs: number; updatedAt: string; params: any[]; type?: 'extract' | 'scrape' | 'crawl' | 'search'; url?: string; formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[]; isLLM?: boolean; } interface RobotWorkflow { workflow: WhereWhatPair[]; } interface ScheduleConfig { runEvery: number; runEveryUnit: 'MINUTES' | 'HOURS' | 'DAYS' | 'WEEKS' | 'MONTHS'; startFrom: 'SUNDAY' | 'MONDAY' | 'TUESDAY' | 'WEDNESDAY' | 'THURSDAY' | 'FRIDAY' | 'SATURDAY'; atTimeStart?: string; atTimeEnd?: string; timezone: string; lastRunAt?: Date; nextRunAt?: Date; cronExpression?: string; } export interface RobotSettings { id: string; userId?: number; recording_meta: RobotMeta; recording: RobotWorkflow; google_sheet_email?: string | null; google_sheet_name?: string | null; google_sheet_id?: string | null; google_access_token?: string | null; google_refresh_token?: string | null; schedule?: ScheduleConfig | null; } interface GlobalInfo { browserId: string | null; setBrowserId: (newId: string | null) => void; lastAction: string; setLastAction: (action: string) => void; notification: AlertSnackbarProps; notify: (severity: 'error' | 'warning' | 'info' | 'success', message: string) => void; closeNotify: () => void; isLogin: boolean; setIsLogin: (isLogin: boolean) => void; recordings: string[]; setRecordings: (recordings: string[]) => void; rerenderRuns: boolean; setRerenderRuns: (rerenderRuns: boolean) => void; rerenderRobots: boolean; setRerenderRobots: (rerenderRuns: boolean) => void; recordingLength: number; setRecordingLength: (recordingLength: number) => void; recordingId: string | null; setRecordingId: (newId: string | null) => void; retrainRobotId: string | null; setRetrainRobotId: (newId: string | null) => void; recordingName: string; setRecordingName: (recordingName: string) => void; initialUrl: string; setInitialUrl: (initialUrl: string) => void; recordingUrl: string; setRecordingUrl: (recordingUrl: string) => void; currentWorkflowActionsState: { hasScrapeListAction: boolean; hasScreenshotAction: boolean; hasScrapeSchemaAction: boolean; }; setCurrentWorkflowActionsState: (actionsState: { hasScrapeListAction: boolean; hasScreenshotAction: boolean; hasScrapeSchemaAction: boolean; }) => void; shouldResetInterpretationLog: boolean; resetInterpretationLog: () => void; currentTextActionId: string; setCurrentTextActionId: (actionId: string) => void; currentListActionId: string; setCurrentListActionId: (actionId: string) => void; currentScreenshotActionId: string; setCurrentScreenshotActionId: (actionId: string) => void; currentTextGroupName: string; setCurrentTextGroupName: (name: string) => void; isDOMMode: boolean; setIsDOMMode: (isDOMMode: boolean) => void; updateDOMMode: (isDOMMode: boolean) => void; }; class GlobalInfoStore implements Partial { browserId = null; lastAction = ''; recordingLength = 0; notification: AlertSnackbarProps = { severity: 'info', message: '', isOpen: false, }; recordingId = null; retrainRobotId = null; recordings: string[] = []; rerenderRuns = false; rerenderRobots = false; recordingName = ''; initialUrl = 'https://'; recordingUrl = 'https://'; isLogin = false; currentWorkflowActionsState = { hasScrapeListAction: false, hasScreenshotAction: false, hasScrapeSchemaAction: false, }; shouldResetInterpretationLog = false; currentTextActionId = ''; currentListActionId = ''; currentScreenshotActionId = ''; currentTextGroupName = 'Text Data'; isDOMMode = false; }; const globalInfoStore = new GlobalInfoStore(); const globalInfoContext = createContext(globalInfoStore as GlobalInfo); export const useGlobalInfoStore = () => useContext(globalInfoContext); export const useCachedRuns = () => { return useQuery({ queryKey: dataCacheKeys.runs, queryFn: async () => { const runs = await getStoredRuns(); if (!runs) throw new Error('Failed to fetch runs data'); return runs.map((run: any, index: number) => ({ id: index, ...run })); }, staleTime: 30 * 1000, gcTime: 5 * 60 * 1000, retry: 2, }); }; export const useCacheInvalidation = () => { const queryClient = useQueryClient(); const invalidateRuns = () => { queryClient.invalidateQueries({ queryKey: dataCacheKeys.runs }); }; const invalidateRecordings = () => { queryClient.invalidateQueries({ queryKey: dataCacheKeys.recordings }); }; const addOptimisticRun = (newRun: any) => { queryClient.setQueryData(dataCacheKeys.runs, (oldData: any) => { if (!oldData) return [{ id: 0, ...newRun }]; return [{ id: oldData.length, ...newRun }, ...oldData]; }); }; const addOptimisticRobot = (newRobot: any) => { queryClient.setQueryData(dataCacheKeys.recordings, (oldData: any) => { if (!oldData) return [newRobot]; return [newRobot, ...oldData]; }); }; const removeOptimisticRobot = (tempId: string) => { queryClient.setQueryData(dataCacheKeys.recordings, (oldData: any) => { if (!oldData) return []; return oldData.filter((robot: any) => robot.id !== tempId); }); }; const invalidateAllCache = () => { invalidateRuns(); invalidateRecordings(); }; return { invalidateRuns, invalidateRecordings, addOptimisticRun, addOptimisticRobot, removeOptimisticRobot, invalidateAllCache }; }; export const useCachedRecordings = () => { return useQuery({ queryKey: dataCacheKeys.recordings, queryFn: async () => { const recordings = await getStoredRecordings(); if (!recordings) throw new Error('Failed to fetch recordings data'); return recordings; }, staleTime: 30 * 1000, gcTime: 5 * 60 * 1000, retry: 2, }); }; export const GlobalInfoProvider = ({ children }: { children: JSX.Element }) => { const [browserId, setBrowserId] = useState(globalInfoStore.browserId); const [lastAction, setLastAction] = useState(globalInfoStore.lastAction); const [notification, setNotification] = useState(globalInfoStore.notification); const [recordings, setRecordings] = useState(globalInfoStore.recordings); const [rerenderRuns, setRerenderRuns] = useState(globalInfoStore.rerenderRuns); const [rerenderRobots, setRerenderRobots] = useState(globalInfoStore.rerenderRobots); const [recordingLength, setRecordingLength] = useState(globalInfoStore.recordingLength); const [recordingId, setRecordingId] = useState(() => { try { const stored = sessionStorage.getItem('recordingId'); return stored ? JSON.parse(stored) : globalInfoStore.recordingId; } catch { return globalInfoStore.recordingId; } }); const setPersistedRecordingId = (newRecordingId: string | null) => { setRecordingId(newRecordingId); try { if (newRecordingId) { sessionStorage.setItem('recordingId', JSON.stringify(newRecordingId)); } else { sessionStorage.removeItem('recordingId'); } } catch (error) { console.warn('Failed to persist recordingId to sessionStorage:', error); } }; const [retrainRobotId, setRetrainRobotId] = useState(globalInfoStore.retrainRobotId); const [recordingName, setRecordingName] = useState(globalInfoStore.recordingName); const [isLogin, setIsLogin] = useState(globalInfoStore.isLogin); const [initialUrl, setInitialUrl] = useState(globalInfoStore.initialUrl); const [recordingUrl, setRecordingUrl] = useState(globalInfoStore.recordingUrl); const [currentWorkflowActionsState, setCurrentWorkflowActionsState] = useState(globalInfoStore.currentWorkflowActionsState); const [shouldResetInterpretationLog, setShouldResetInterpretationLog] = useState(globalInfoStore.shouldResetInterpretationLog); const [currentTextActionId, setCurrentTextActionId] = useState(''); const [currentListActionId, setCurrentListActionId] = useState(''); const [currentScreenshotActionId, setCurrentScreenshotActionId] = useState(''); const [currentTextGroupName, setCurrentTextGroupName] = useState('Text Data'); const [isDOMMode, setIsDOMMode] = useState(globalInfoStore.isDOMMode); const notify = (severity: 'error' | 'warning' | 'info' | 'success', message: string) => { setNotification({ severity, message, isOpen: true }); } const closeNotify = () => { setNotification(globalInfoStore.notification); } const setBrowserIdWithValidation = (browserId: string | null) => { setBrowserId(browserId); if (!browserId) { setRecordingLength(0); } } const resetInterpretationLog = () => { setShouldResetInterpretationLog(true); setTimeout(() => { setShouldResetInterpretationLog(false); }, 100); } const updateDOMMode = (mode: boolean) => { setIsDOMMode(mode); } const [dataCacheClient] = useState(() => createDataCacheClient()); return ( {children} ); }; ================================================ FILE: src/context/socket.tsx ================================================ import React, { createContext, useCallback, useContext, useState, useRef, useEffect } from 'react'; import { io, Socket } from 'socket.io-client'; import { apiUrl } from "../apiConfig"; const SERVER_ENDPOINT = apiUrl; interface SocketState { socket: Socket | null; queueSocket: Socket | null; id: string; setId: (id: string) => void; connectToQueueSocket: (userId: string, onRunCompleted?: (data: any) => void, onRunStarted?: (data: any) => void, onRunRecovered?: (data: any) => void, onRunScheduled?: (data: any) => void) => void; disconnectQueueSocket: () => void; }; class SocketStore implements Partial { socket: Socket | null = null; queueSocket: Socket | null = null; id = ''; }; const socketStore = new SocketStore(); const socketStoreContext = createContext(socketStore as SocketState); export const useSocketStore = () => useContext(socketStoreContext); export const SocketProvider = ({ children }: { children: JSX.Element }) => { const [socket, setSocket] = useState(socketStore.socket); const [queueSocket, setQueueSocket] = useState(socketStore.queueSocket); const [id, setActiveId] = useState(socketStore.id); const runCompletedCallbackRef = useRef<((data: any) => void) | null>(null); const runStartedCallbackRef = useRef<((data: any) => void) | null>(null); const runRecoveredCallbackRef = useRef<((data: any) => void) | null>(null); const runScheduledCallbackRef = useRef<((data: any) => void) | null>(null); const setId = useCallback((id: string) => { // the socket client connection is recomputed whenever id changes -> the new browser has been initialized const socket = io(`${SERVER_ENDPOINT}/${id}`, { transports: ["websocket"], rejectUnauthorized: false }); socket.on('connect', () => console.log('connected to socket')); socket.on("connect_error", (err) => console.log(`connect_error due to ${err.message}`)); setSocket(socket); setActiveId(id); }, [setSocket]); const connectToQueueSocket = useCallback((userId: string, onRunCompleted?: (data: any) => void, onRunStarted?: (data: any) => void, onRunRecovered?: (data: any) => void, onRunScheduled?: (data: any) => void) => { runCompletedCallbackRef.current = onRunCompleted || null; runStartedCallbackRef.current = onRunStarted || null; runRecoveredCallbackRef.current = onRunRecovered || null; runScheduledCallbackRef.current = onRunScheduled || null; const newQueueSocket = io(`${SERVER_ENDPOINT}/queued-run`, { transports: ["websocket"], rejectUnauthorized: false, query: { userId } }); newQueueSocket.on('connect', () => { console.log('Queue socket connected for user:', userId); }); newQueueSocket.on('connect_error', (error) => { console.log('Queue socket connection error:', error); }); newQueueSocket.on('run-completed', (completionData) => { console.log('Run completed event received:', completionData); if (runCompletedCallbackRef.current) { runCompletedCallbackRef.current(completionData); } }); newQueueSocket.on('run-started', (startedData) => { console.log('Run started event received:', startedData); if (runStartedCallbackRef.current) { runStartedCallbackRef.current(startedData); } }); newQueueSocket.on('run-recovered', (recoveredData) => { console.log('Run recovered event received:', recoveredData); if (runRecoveredCallbackRef.current) { runRecoveredCallbackRef.current(recoveredData); } }); newQueueSocket.on('run-scheduled', (scheduledData) => { console.log('Run scheduled event received:', scheduledData); if (runScheduledCallbackRef.current) { runScheduledCallbackRef.current(scheduledData); } }); setQueueSocket(currentSocket => { if (currentSocket) { currentSocket.disconnect(); } return newQueueSocket; }); socketStore.queueSocket = newQueueSocket; }, []); const disconnectQueueSocket = useCallback(() => { setQueueSocket(currentSocket => { if (currentSocket) { currentSocket.disconnect(); } return null; }); socketStore.queueSocket = null; runStartedCallbackRef.current = null; runCompletedCallbackRef.current = null; runRecoveredCallbackRef.current = null; runScheduledCallbackRef.current = null; }, []); // Cleanup on unmount useEffect(() => { return () => { if (queueSocket) { queueSocket.disconnect(); } }; }, [queueSocket]); return ( {children} ); }; ================================================ FILE: src/context/theme-provider.tsx ================================================ import React, { createContext, useContext, useState, useEffect } from 'react'; import { ThemeProvider, createTheme } from '@mui/material/styles'; import CssBaseline from '@mui/material/CssBaseline'; const lightTheme = createTheme({ palette: { primary: { main: "#ff00c3", contrastText: "#ffffff", }, }, components: { MuiTableContainer: { styleOverrides: { root: { overflow: 'auto', /* Firefox */ scrollbarWidth: 'thin', scrollbarColor: 'gray transparent', /* WebKit (Chrome, Edge, Safari) */ '&::-webkit-scrollbar': { width: '5px', height: '5px', }, '&::-webkit-scrollbar-track': { background: 'transparent', }, '&::-webkit-scrollbar-thumb': { backgroundColor: 'gray', borderRadius: '8px', }, }, }, }, MuiButton: { styleOverrides: { root: { // Default styles for all buttons (optional) textTransform: "none", }, containedPrimary: { // Styles for 'contained' variant with 'primary' color "&:hover": { backgroundColor: "#ff66d9", }, }, outlined: { // Apply white background for all 'outlined' variant buttons backgroundColor: "#ffffff", "&:hover": { backgroundColor: "#f0f0f0", // Optional lighter background on hover }, }, }, }, MuiLink: { styleOverrides: { root: { "&:hover": { color: "#ff00c3", }, }, }, }, MuiIconButton: { styleOverrides: { root: { // '&:hover': { // color: "#ff66d9", // }, }, }, }, MuiTab: { styleOverrides: { root: { minHeight: 60, textTransform: "none", }, }, }, MuiAlert: { styleOverrides: { outlinedInfo: { color: 'rgb(0, 0, 0)', border: 'none', "& .MuiAlert-icon": { color: "#000000", }, }, standardInfo: { backgroundColor: "#fce1f4", color: "#ff00c3", "& .MuiAlert-icon": { color: "#ff00c3", }, }, }, }, }, }); const darkTheme = createTheme({ palette: { mode: 'dark', primary: { main: "#ff00c3", contrastText: "#ffffff", }, error: { main: '#f44336', light: '#e57373', dark: '#d32f2f', contrastText: '#ffffff', }, background: { default: '#000000ff', paper: '#000000ff', }, text: { primary: '#ffffff', secondary: '#b3b3b3', }, }, components: { MuiTableContainer: { styleOverrides: { root: { overflow: 'auto', /* Firefox */ scrollbarWidth: 'thin', scrollbarColor: 'currentColor transparent', /* WebKit (Chrome, Edge, Safari) */ '&::-webkit-scrollbar': { width: '5px', height: '5px', }, '&::-webkit-scrollbar-track': { background: 'transparent', }, '&::-webkit-scrollbar-thumb': { backgroundColor: 'currentColor', borderRadius: '8px', }, }, }, }, MuiButton: { styleOverrides: { root: { textTransform: "none", color: '#ffffff', '&.MuiButton-outlined': { borderColor: '#ffffff', color: '#ffffff', "&:hover": { borderColor: '#ffffff', backgroundColor: 'inherit', }, }, }, containedPrimary: { "&:hover": { backgroundColor: "#ff66d9", }, }, outlined: { borderColor: '#ff00c3', color: '#ff00c3', "&:hover": { // backgroundColor: 'rgba(255, 0, 195, 0.08)', borderColor: '#ff66d9', }, '&.MuiButton-outlinedError': { borderColor: '#f44336', color: '#f44336', "&:hover": { // backgroundColor: 'rgba(244, 67, 54, 0.08)', borderColor: '#d32f2f', }, }, }, }, }, MuiLink: { styleOverrides: { root: { color: '#ff66d9', "&:hover": { color: "#ff00c3", }, }, }, }, MuiIconButton: { styleOverrides: { root: { color: '#ffffff', // "&:hover": { // backgroundColor: 'rgba(255, 0, 195, 0.08)', // }, '&.MuiIconButton-colorError': { color: '#f44336', // "&:hover": { // backgroundColor: 'rgba(244, 67, 54, 0.08)', // }, }, }, }, }, MuiTab: { styleOverrides: { root: { minHeight: 60, textTransform: "none", color: '#ffffff', "&.Mui-selected": { color: '#ff00c3', }, }, }, }, MuiAlert: { styleOverrides: { outlinedInfo: { color: '#ffffff', border: 'none', "& .MuiAlert-icon": { color: "#ffffff", }, }, standardInfo: { backgroundColor: "#080808ff", color: "#ff00c3", "& .MuiAlert-icon": { color: "#ff00c3", }, }, }, }, // Additional dark mode specific components MuiPaper: { styleOverrides: { root: { backgroundColor: '#000000ff', border: '1px solid #080808ff', }, }, }, MuiAppBar: { styleOverrides: { root: { backgroundColor: '#080808ff', }, }, }, MuiDrawer: { styleOverrides: { paper: { backgroundColor: '#080808ff', }, }, }, MuiTableCell: { styleOverrides: { root: { borderBottom: '1px solid #080808ff', }, }, }, MuiDivider: { styleOverrides: { root: { borderColor: '#494949ff', }, }, }, // MuiTextField:{ // styleOverrides: { // root: { // '& .MuiInputBase-root': { // backgroundColor: '#1d1c1cff', // }, // } // }} }, }); const ThemeModeContext = createContext({ toggleTheme: () => { }, darkMode: false, }); export const useThemeMode = () => useContext(ThemeModeContext); const ThemeModeProvider = ({ children }: { children: React.ReactNode }) => { // Load saved mode from localStorage or default to light mode const [darkMode, setDarkMode] = useState(() => { const savedMode = localStorage.getItem('darkMode'); return savedMode ? JSON.parse(savedMode) : false; }); const toggleTheme = () => { setDarkMode((prevMode: any) => { const newMode = !prevMode; localStorage.setItem('darkMode', JSON.stringify(newMode)); // Save new mode to localStorage return newMode; }); }; useEffect(() => { localStorage.setItem('darkMode', JSON.stringify(darkMode)); // Save initial mode }, [darkMode]); return ( {children} ); }; export default ThemeModeProvider; ================================================ FILE: src/helpers/capturedElementHighlighter.ts ================================================ /** * Helper class for managing persistent highlights of captured elements. * Shows dotted highlights for elements that have been captured but not yet confirmed. */ class CapturedElementHighlighter { private static readonly STYLE_ID = 'maxun-captured-elements-style'; /** * Apply persistent dotted highlights to captured elements in the DOM iframe * @param selectors Array of captured element selectors */ public applyHighlights(selectors: Array<{ selector: string }>): void { const iframeDoc = this.getIframeDocument(); if (!iframeDoc) return; // Remove existing highlights this.clearHighlights(); // Create CSS rules for each captured selector const cssRules: string[] = []; selectors.forEach(({ selector }) => { const cssSelector = this.getCSSSelector(selector); if (cssSelector) { cssRules.push(` ${cssSelector} { outline: 2px dotted #ff00c3 !important; outline-offset: 2px !important; box-shadow: 0 0 0 1px rgba(255, 255, 255, 0.5) !important; } `); } }); // Inject style element if (cssRules.length > 0) { const styleElement = iframeDoc.createElement('style'); styleElement.id = CapturedElementHighlighter.STYLE_ID; styleElement.textContent = cssRules.join('\n'); iframeDoc.head.appendChild(styleElement); } } /** * Clear all persistent highlights from the DOM iframe */ public clearHighlights(): void { const iframeDoc = this.getIframeDocument(); if (!iframeDoc) return; const existingStyle = iframeDoc.getElementById(CapturedElementHighlighter.STYLE_ID); if (existingStyle) { existingStyle.remove(); } } /** * Get the iframe document */ private getIframeDocument(): Document | null { let iframeElement = document.querySelector('#dom-browser-iframe') as HTMLIFrameElement; if (!iframeElement) { iframeElement = document.querySelector('.replayer-wrapper iframe') as HTMLIFrameElement; } return iframeElement?.contentDocument || null; } /** * Convert selector to CSS format for highlighting */ private getCSSSelector(selector: string): string { // Handle XPath selectors by extracting data-mx-id if (selector.startsWith('//') || selector.startsWith('(//')) { const mxIdMatch = selector.match(/data-mx-id='([^']+)'/); if (mxIdMatch) { return `[data-mx-id='${mxIdMatch[1]}']`; } return ''; } // Already a CSS selector return selector; } } export const capturedElementHighlighter = new CapturedElementHighlighter(); ================================================ FILE: src/helpers/clientListExtractor.ts ================================================ interface TextStep { id: number; type: "text"; label: string; data: string; selectorObj: { selector: string; tag?: string; isShadow?: boolean; attribute: string; }; } interface ExtractedListData { [key: string]: string; } interface Field { selector: string; attribute: string; tag?: string; isShadow?: boolean; } class ClientListExtractor { private evaluateXPath = ( rootElement: Element | Document, xpath: string ): Element | null => { try { const ownerDoc = rootElement.nodeType === Node.DOCUMENT_NODE ? (rootElement as Document) : rootElement.ownerDocument; if (!ownerDoc) return null; const result = ownerDoc.evaluate( xpath, rootElement, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ); return result.singleNodeValue as Element | null; } catch (error) { console.warn("XPath evaluation failed:", xpath, error); return null; } }; private evaluateXPathAll = ( rootElement: Element | Document, xpath: string ): Element[] => { try { const ownerDoc = rootElement.nodeType === Node.DOCUMENT_NODE ? (rootElement as Document) : rootElement.ownerDocument; if (!ownerDoc) return []; const result = ownerDoc.evaluate( xpath, rootElement, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ); const elements: Element[] = []; for (let i = 0; i < result.snapshotLength; i++) { const node = result.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { elements.push(node as Element); } } return elements; } catch (error) { console.warn("XPath evaluation failed:", xpath, error); return []; } }; private queryElement = ( rootElement: Element | Document, selector: string ): Element | null => { if (!selector.includes(">>") && !selector.includes(":>>")) { // Check if it's an XPath selector (starts with // or / or ./) if ( selector.startsWith("//") || selector.startsWith("/") || selector.startsWith("./") ) { return this.evaluateXPath(rootElement, selector); } else { return rootElement.querySelector(selector); } } const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim()); let currentElement: Element | Document | null = rootElement; for (let i = 0; i < parts.length; i++) { if (!currentElement) return null; if ( (currentElement as Element).tagName === "IFRAME" || (currentElement as Element).tagName === "FRAME" ) { try { const frameElement = currentElement as | HTMLIFrameElement | HTMLFrameElement; const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document; if (!frameDoc) return null; // Handle XPath in iframe context if ( parts[i].startsWith("//") || parts[i].startsWith("/") || parts[i].startsWith("./") ) { currentElement = this.evaluateXPath(frameDoc, parts[i]); } else { currentElement = frameDoc.querySelector(parts[i]); } continue; } catch (e) { console.warn( `Cannot access ${( currentElement as Element ).tagName.toLowerCase()} content:`, e ); return null; } } let nextElement: Element | null = null; if ("querySelector" in currentElement) { // Handle XPath vs CSS selector if ( parts[i].startsWith("//") || parts[i].startsWith("/") || parts[i].startsWith("./") ) { nextElement = this.evaluateXPath(currentElement, parts[i]); } else { nextElement = currentElement.querySelector(parts[i]); } } currentElement = nextElement; } return currentElement as Element | null; }; private queryElementAll = ( rootElement: Element | Document, selector: string ): Element[] => { if (!selector.includes(">>") && !selector.includes(":>>")) { // Check if it's an XPath selector (starts with // or /) if (selector.startsWith("//") || selector.startsWith("/")) { return this.evaluateXPathAll(rootElement, selector); } else { return Array.from(rootElement.querySelectorAll(selector)); } } const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim()); let currentElements: (Element | Document)[] = [rootElement]; for (const part of parts) { const nextElements: Element[] = []; for (const element of currentElements) { if ( (element as Element).tagName === "IFRAME" || (element as Element).tagName === "FRAME" ) { try { const frameElement = element as | HTMLIFrameElement | HTMLFrameElement; const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document; if (frameDoc) { // Handle XPath in iframe context if (part.startsWith("//") || part.startsWith("/")) { nextElements.push(...this.evaluateXPathAll(frameDoc, part)); } else { nextElements.push( ...Array.from(frameDoc.querySelectorAll(part)) ); } } } catch (e) { console.warn( `Cannot access ${( element as Element ).tagName.toLowerCase()} content:`, e ); continue; } } else { if ("querySelectorAll" in element) { // Handle XPath vs CSS selector if (part.startsWith("//") || part.startsWith("/")) { nextElements.push(...this.evaluateXPathAll(element, part)); } else { nextElements.push(...Array.from(element.querySelectorAll(part))); } } } } currentElements = nextElements; } return currentElements as Element[]; }; private extractValue = ( element: Element, attribute: string ): string | null => { if (!element) return null; const baseURL = element.ownerDocument?.location?.href || window.location.origin; if (element.shadowRoot) { const shadowContent = element.shadowRoot.textContent; if (shadowContent?.trim()) { return shadowContent.trim(); } } if (attribute === "innerText") { let textContent = (element as HTMLElement).innerText?.trim() || (element as HTMLElement).textContent?.trim(); if (!textContent) { const dataAttributes = [ "data-600", "data-text", "data-label", "data-value", "data-content", ]; for (const attr of dataAttributes) { const dataValue = element.getAttribute(attr); if (dataValue && dataValue.trim()) { textContent = dataValue.trim(); break; } } } return textContent || null; } else if (attribute === "innerHTML") { return element.innerHTML?.trim() || null; } else if (attribute === "href") { let anchorElement = element; if (element.tagName !== "A") { anchorElement = element.closest("a") || element.parentElement?.closest("a") || element; } const hrefValue = anchorElement.getAttribute("href"); if (!hrefValue || hrefValue.trim() === "") { return null; } try { return new URL(hrefValue, baseURL).href; } catch (e) { console.warn("Error creating URL from", hrefValue, e); return hrefValue; } } else if (attribute === "src") { const attrValue = element.getAttribute(attribute); const dataAttr = attrValue || element.getAttribute("data-" + attribute); if (!dataAttr || dataAttr.trim() === "") { const style = window.getComputedStyle(element as HTMLElement); const bgImage = style.backgroundImage; if (bgImage && bgImage !== "none") { const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/); return matches ? new URL(matches[1], baseURL).href : null; } return null; } try { return new URL(dataAttr, baseURL).href; } catch (e) { console.warn("Error creating URL from", dataAttr, e); return dataAttr; } } return element.getAttribute(attribute); }; private convertFields = (fields: any): Record => { const convertedFields: Record = {}; for (const [key, field] of Object.entries(fields)) { const typedField = field as TextStep; convertedFields[typedField.label] = { selector: typedField.selectorObj.selector, attribute: typedField.selectorObj.attribute, isShadow: typedField.selectorObj.isShadow || false, }; } return convertedFields; }; public extractListData = ( iframeDocument: Document, listSelector: string, fields: any, limit: number = 5 ): ExtractedListData[] => { try { const convertedFields = this.convertFields(fields); const containers = this.queryElementAll(iframeDocument, listSelector); if (containers.length === 0) { console.warn("❌ No containers found for listSelector:", listSelector); return []; } const extractedData: ExtractedListData[] = []; const containersToProcess = Math.min(containers.length, limit); for ( let containerIndex = 0; containerIndex < containersToProcess; containerIndex++ ) { const container = containers[containerIndex]; const record: ExtractedListData = {}; for (const [label, { selector, attribute, isShadow }] of Object.entries( convertedFields )) { let element: Element | null = null; if (selector.startsWith("//")) { const indexedSelector = this.createIndexedXPath( selector, listSelector, containerIndex + 1 ); element = this.evaluateXPathSingle( iframeDocument, indexedSelector, isShadow ); } else { element = this.queryElement(container, selector); } if (element) { const value = this.extractValue(element, attribute); if (value !== null && value !== "") { record[label] = value; } else { console.warn(` ⚠️ Empty value for "${label}"`); record[label] = ""; } } else { console.warn(` ❌ Element not found for "${label}"`); record[label] = ""; } } if (Object.values(record).some((value) => value !== "")) { extractedData.push(record); } else { console.warn( ` ⚠️ Skipping empty record for container ${containerIndex + 1}` ); } } return extractedData; } catch (error) { console.error("💥 Error in client-side extractListData:", error); return []; } }; private createIndexedXPath( childSelector: string, listSelector: string, containerIndex: number ): string { if (childSelector.includes(listSelector.replace("//", ""))) { const listPattern = listSelector.replace("//", ""); const indexedListSelector = `(${listSelector})[${containerIndex}]`; const indexedSelector = childSelector.replace( `//${listPattern}`, indexedListSelector ); return indexedSelector; } else { console.warn(` ⚠️ Pattern doesn't match, using fallback approach`); return `(${listSelector})[${containerIndex}]${childSelector.replace( "//", "/" )}`; } } // Helper method for single XPath evaluation private evaluateXPathSingle = ( document: Document, xpath: string, isShadow: boolean = false ): Element | null => { try { const result = document.evaluate( xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue as Element | null; if (!isShadow) { if (result === null) { return null; } return result; } let cleanPath = xpath; let isIndexed = false; const indexedMatch = xpath.match(/^\((.*?)\)\[(\d+)\](.*)$/); if (indexedMatch) { cleanPath = indexedMatch[1] + indexedMatch[3]; isIndexed = true; } const pathParts = cleanPath .replace(/^\/\//, "") .split("/") .map((p) => p.trim()) .filter((p) => p.length > 0); let currentContexts: (Document | Element | ShadowRoot)[] = [document]; for (let i = 0; i < pathParts.length; i++) { const part = pathParts[i]; const nextContexts: (Element | ShadowRoot)[] = []; for (const ctx of currentContexts) { const positionalMatch = part.match(/^([^[]+)\[(\d+)\]$/); let partWithoutPosition = part; let requestedPosition: number | null = null; if (positionalMatch) { partWithoutPosition = positionalMatch[1]; requestedPosition = parseInt(positionalMatch[2]); } const matched = this.queryInsideContext(ctx, partWithoutPosition); let elementsToAdd = matched; if (requestedPosition !== null) { const index = requestedPosition - 1; // XPath is 1-based, arrays are 0-based if (index >= 0 && index < matched.length) { elementsToAdd = [matched[index]]; } else { console.warn( ` ⚠️ Position ${requestedPosition} out of range (${matched.length} elements found)` ); elementsToAdd = []; } } elementsToAdd.forEach((el) => { nextContexts.push(el); if (el.shadowRoot) { nextContexts.push(el.shadowRoot); } }); } if (nextContexts.length === 0) { return null; } currentContexts = nextContexts; } if (currentContexts.length > 0) { if (isIndexed && indexedMatch) { const requestedIndex = parseInt(indexedMatch[2]) - 1; // XPath is 1-based, array is 0-based if (requestedIndex >= 0 && requestedIndex < currentContexts.length) { return currentContexts[requestedIndex] as Element; } else { console.warn( `⚠️ Requested index ${requestedIndex + 1} out of range (${ currentContexts.length } elements found)` ); return null; } } return currentContexts[0] as Element; } return null; } catch (err) { console.error("💥 Critical XPath failure:", xpath, err); return null; } }; private queryInsideContext = ( context: Document | Element | ShadowRoot, part: string ): Element[] => { try { const { tagName, conditions } = this.parseXPathPart(part); const candidateElements = Array.from(context.querySelectorAll(tagName)); if (candidateElements.length === 0) { return []; } const matchingElements = candidateElements.filter((el) => { const matches = this.elementMatchesConditions(el, conditions); return matches; }); return matchingElements; } catch (err) { console.error("Error in queryInsideContext:", err); return []; } }; private parseXPathPart = ( part: string ): { tagName: string; conditions: string[] } => { const tagMatch = part.match(/^([a-zA-Z0-9-]+)/); const tagName = tagMatch ? tagMatch[1] : "*"; const conditionMatches = part.match(/\[([^\]]+)\]/g); const conditions = conditionMatches ? conditionMatches.map((c) => c.slice(1, -1)) : []; return { tagName, conditions }; }; // Check if element matches all given conditions private elementMatchesConditions = ( element: Element, conditions: string[] ): boolean => { for (const condition of conditions) { if (!this.elementMatchesCondition(element, condition)) { return false; } } return true; }; private elementMatchesCondition = ( element: Element, condition: string ): boolean => { condition = condition.trim(); if (/^\d+$/.test(condition)) { return true; } // Handle @attribute="value" const attrMatch = condition.match(/^@([^=]+)=["']([^"']+)["']$/); if (attrMatch) { const [, attr, value] = attrMatch; const elementValue = element.getAttribute(attr); const matches = elementValue === value; return matches; } // Handle contains(@class, 'value') const classContainsMatch = condition.match( /^contains\(@class,\s*["']([^"']+)["']\)$/ ); if (classContainsMatch) { const className = classContainsMatch[1]; const matches = element.classList.contains(className); return matches; } // Handle contains(@attribute, 'value') const attrContainsMatch = condition.match( /^contains\(@([^,]+),\s*["']([^"']+)["']\)$/ ); if (attrContainsMatch) { const [, attr, value] = attrContainsMatch; const elementValue = element.getAttribute(attr) || ""; const matches = elementValue.includes(value); return matches; } // Handle text()="value" const textMatch = condition.match(/^text\(\)=["']([^"']+)["']$/); if (textMatch) { const expectedText = textMatch[1]; const elementText = element.textContent?.trim() || ""; const matches = elementText === expectedText; return matches; } // Handle contains(text(), 'value') const textContainsMatch = condition.match( /^contains\(text\(\),\s*["']([^"']+)["']\)$/ ); if (textContainsMatch) { const expectedText = textContainsMatch[1]; const elementText = element.textContent?.trim() || ""; const matches = elementText.includes(expectedText); return matches; } // Handle count(*)=0 (element has no children) if (condition === "count(*)=0") { const matches = element.children.length === 0; return matches; } // Handle other count conditions const countMatch = condition.match(/^count\(\*\)=(\d+)$/); if (countMatch) { const expectedCount = parseInt(countMatch[1]); const matches = element.children.length === expectedCount; return matches; } return true; }; } export const clientListExtractor = new ClientListExtractor(); ================================================ FILE: src/helpers/clientPaginationDetector.ts ================================================ /** * Client-Side Pagination Auto-Detection * Detects pagination type and selector for list extraction * Operates on passed document object (works in DOM mode / iframe) */ import type { ClientSelectorGenerator } from './clientSelectorGenerator'; export type PaginationDetectionResult = { type: 'scrollDown' | 'scrollUp' | 'clickNext' | 'clickLoadMore' | ''; selector: string | null; confidence: 'high' | 'medium' | 'low'; debug?: any; }; const MAX_BUTTON_TEXT_LENGTH = 50; const nextButtonTextPatterns = [ /^\s*next\s*$/i, /\bnext\s+page\b/i, /\bpage\s+suivante\b/i, /\bsiguiente\b/i, /\bweiter\b/i, /\bnächste\b/i, /\bvolgende\b/i, /\bpróximo\b/i, /\bavanti\b/i, ]; const nextButtonArrowPatterns = [ /^[>\s›→»⟩]+$/, /^>>$/, ]; const loadMorePatterns = [ /^\s*load\s+more\s*$/i, /^\s*show\s+more\s*$/i, /^\s*view\s+more\s*$/i, /^\s*see\s+more\s*$/i, /^\s*more\s+results\s*$/i, /^\s*plus\s+de\s+résultats\s*$/i, /^\s*más\s+resultados\s*$/i, /^\s*weitere\s+ergebnisse\s*$/i, /^\s*meer\s+laden\s*$/i, /^\s*carica\s+altri\s*$/i, /^\s*carregar\s+mais\s*$/i, ]; const paginationContainerPatterns = /paginat|page-nav|pager|page-numbers|page-list/i; class ClientPaginationDetector { autoDetectPagination( doc: Document, listSelector: string, selectorGenerator: ClientSelectorGenerator, options?: { disableScrollDetection?: boolean } ): PaginationDetectionResult { try { const listElements = this.evaluateSelector(listSelector, doc); if (listElements.length === 0) { return { type: '', selector: null, confidence: 'low', debug: 'No list elements found' }; } const listContainer = this.getListContainer(listElements); const paginationWrapper = this.findPaginationContainer(listContainer, doc); if (paginationWrapper) { const scopedResult = this.detectFromPaginationWrapper(paginationWrapper, listContainer, doc, selectorGenerator); if (scopedResult) { return scopedResult; } } const nearbyResult = this.detectFromNearbyElements(listContainer, doc, selectorGenerator); if (nearbyResult) { return nearbyResult; } const infiniteScrollScore = options?.disableScrollDetection ? 0 : this.detectInfiniteScrollIndicators(doc, listContainer); if (infiniteScrollScore >= 8) { const confidence = infiniteScrollScore >= 15 ? 'high' : infiniteScrollScore >= 12 ? 'medium' : 'low'; return { type: 'scrollDown', selector: null, confidence }; } const fallbackResult = this.detectFromFullDocument(listContainer, doc, selectorGenerator); if (fallbackResult) { return fallbackResult; } return { type: '', selector: null, confidence: 'low', debug: { listElementsCount: listElements.length, paginationWrapperFound: !!paginationWrapper, infiniteScrollScore, } }; } catch (error: any) { console.error('Pagination detection error:', error); return { type: '', selector: null, confidence: 'low', debug: 'Exception: ' + error.message }; } } /** * Derive the common parent container from the list elements. * If all elements share the same parent, use that parent. * Otherwise use the first element's parent as a best guess. */ private getListContainer(listElements: HTMLElement[]): HTMLElement { if (listElements.length === 0) return listElements[0]; const firstParent = listElements[0].parentElement; if (!firstParent) return listElements[0]; const allShareParent = listElements.every(el => el.parentElement === firstParent); if (allShareParent) return firstParent; let ancestor: HTMLElement | null = firstParent; while (ancestor) { if (listElements.every(el => ancestor!.contains(el))) { return ancestor; } ancestor = ancestor.parentElement; } return firstParent; } /** * Find pagination container structurally near the list. * Walks up from the list container checking siblings at each level. */ private findPaginationContainer(listContainer: HTMLElement, _doc: Document): HTMLElement | null { let scope = listContainer.parentElement; const MAX_LEVELS = 4; for (let level = 0; level < MAX_LEVELS && scope; level++) { const children = Array.from(scope.children) as HTMLElement[]; for (const child of children) { if (child === listContainer || child.contains(listContainer) || listContainer.contains(child)) continue; if (!this.isVisible(child)) continue; const classAndLabel = `${child.className || ''} ${child.getAttribute('aria-label') || ''} ${child.getAttribute('role') || ''}`; if (paginationContainerPatterns.test(classAndLabel)) { return child; } if (child.tagName === 'NAV') { if (this.containsPaginationLinks(child)) { return child; } } if (this.containsNumericPageLinks(child)) { return child; } } scope = scope.parentElement; } return null; } /** * Check if a container has pagination-like links (numbered or next/prev) */ private containsPaginationLinks(container: HTMLElement): boolean { const links = container.querySelectorAll('a, button, [role="button"]'); let numericCount = 0; let hasNextPrev = false; for (const link of Array.from(links)) { const text = (link.textContent || '').trim(); if (/^\d+$/.test(text)) numericCount++; if (this.matchesAnyPattern(text, nextButtonTextPatterns)) hasNextPrev = true; if (this.matchesAnyPattern(text, loadMorePatterns)) hasNextPrev = true; } return numericCount >= 2 || hasNextPrev; } /** * Check if a container has 2+ sequential numeric links (strong page-number signal) */ private containsNumericPageLinks(container: HTMLElement): boolean { const links = container.querySelectorAll('a, button, [role="button"]'); const numbers: number[] = []; for (const link of Array.from(links)) { const text = (link.textContent || '').trim(); if (/^\d+$/.test(text)) { numbers.push(parseInt(text, 10)); } } if (numbers.length < 2) return false; numbers.sort((a, b) => a - b); for (let i = 0; i < numbers.length - 1; i++) { if (numbers[i + 1] - numbers[i] === 1) return true; } return false; } /** * Detect pagination from a known pagination wrapper element. * Since we've already identified the wrapper structurally, we search only within it. */ private detectFromPaginationWrapper( wrapper: HTMLElement, _listContainer: HTMLElement, doc: Document, selectorGenerator: ClientSelectorGenerator ): PaginationDetectionResult | null { const clickables = this.getClickableElementsIn(wrapper); let nextButton: HTMLElement | null = null; let nextScore = 0; let loadMoreButton: HTMLElement | null = null; let loadMoreScore = 0; for (const element of clickables) { if (!this.isVisible(element)) continue; if (element.hasAttribute('disabled') || element.getAttribute('aria-disabled') === 'true') continue; const text = (element.textContent || '').trim(); const ariaLabel = element.getAttribute('aria-label') || ''; const title = element.getAttribute('title') || ''; if (text.length > MAX_BUTTON_TEXT_LENGTH) continue; const combinedText = `${text} ${ariaLabel} ${title}`; if (this.matchesAnyPattern(combinedText, loadMorePatterns)) { const score = 20; if (score > loadMoreScore) { loadMoreScore = score; loadMoreButton = element; } } let isNext = false; if (this.matchesAnyPattern(combinedText, nextButtonTextPatterns)) { isNext = true; } else if (text.length <= 3 && this.matchesAnyPattern(text, nextButtonArrowPatterns)) { isNext = true; } if (!isNext && !text.trim() && this.matchesAnyPattern(ariaLabel, nextButtonTextPatterns)) { isNext = true; } if (isNext) { const score = 20; if (score > nextScore) { nextScore = score; nextButton = element; } } } const hasNumberedPages = this.containsNumericPageLinks(wrapper); if (loadMoreButton) { const selector = this.generateSelectorsForElement(loadMoreButton, doc, selectorGenerator); return { type: 'clickLoadMore', selector, confidence: 'high' }; } if (nextButton) { const selector = this.generateSelectorsForElement(nextButton, doc, selectorGenerator); const confidence = hasNumberedPages ? 'high' : 'high'; return { type: 'clickNext', selector, confidence }; } if (hasNumberedPages) { const lastLink = this.findLastPageLink(wrapper); if (lastLink) { const selector = this.generateSelectorsForElement(lastLink, doc, selectorGenerator); return { type: 'clickNext', selector, confidence: 'medium' }; } } return null; } /** * Find the "next" link in a numbered pagination bar. * Look for the link after the current/active page number. */ private findLastPageLink(container: HTMLElement): HTMLElement | null { const links = Array.from(container.querySelectorAll('a, button, [role="button"]')) as HTMLElement[]; for (let i = 0; i < links.length; i++) { const link = links[i]; const isActive = link.getAttribute('aria-current') === 'page' || link.classList.contains('active') || link.classList.contains('current') || link.classList.contains('selected') || (link.closest('[aria-current="page"]') !== null); if (isActive && i + 1 < links.length) { return links[i + 1]; } } return null; } /** * Detect pagination from clickable elements near the list container. * No aggressive nav filtering. Uses proximity + text matching. */ private detectFromNearbyElements( listContainer: HTMLElement, doc: Document, selectorGenerator: ClientSelectorGenerator ): PaginationDetectionResult | null { const clickableElements = this.getClickableElements(doc); let nextButton: HTMLElement | null = null; let nextButtonScore = 0; let loadMoreButton: HTMLElement | null = null; let loadMoreScore = 0; for (const element of clickableElements) { if (!this.isVisible(element)) continue; if (listContainer.contains(element)) continue; if (element.hasAttribute('disabled') || element.getAttribute('aria-disabled') === 'true') continue; const text = (element.textContent || '').trim(); const ariaLabel = element.getAttribute('aria-label') || ''; const title = element.getAttribute('title') || ''; if (text.length > MAX_BUTTON_TEXT_LENGTH) continue; const combinedText = `${text} ${ariaLabel} ${title}`; const nearList = this.isNearList(element, listContainer); if (!nearList) continue; if (this.matchesAnyPattern(combinedText, loadMorePatterns)) { let score = 10 + 5; if (element.tagName === 'BUTTON') score += 2; const className = element.className || ''; if (paginationContainerPatterns.test(className)) score += 3; if (score > loadMoreScore) { loadMoreScore = score; loadMoreButton = element; } } let isNext = false; if (this.matchesAnyPattern(combinedText, nextButtonTextPatterns)) { isNext = true; } else if (text.length <= 3 && this.matchesAnyPattern(text, nextButtonArrowPatterns)) { isNext = true; } if (!isNext && !text.trim() && this.matchesAnyPattern(ariaLabel, nextButtonTextPatterns)) { isNext = true; } if (isNext) { let score = 10 + 5; if (element.tagName === 'BUTTON') score += 2; const className = element.className || ''; if (paginationContainerPatterns.test(className)) score += 3; const paginationAncestor = element.closest('[class*="paginat"], [class*="pager"], [aria-label*="paginat" i]'); if (paginationAncestor) score += 5; if (score > nextButtonScore) { nextButtonScore = score; nextButton = element; } } } if (loadMoreButton && loadMoreScore >= 15) { const selector = this.generateSelectorsForElement(loadMoreButton, doc, selectorGenerator); const confidence = loadMoreScore >= 18 ? 'high' : 'medium'; return { type: 'clickLoadMore', selector, confidence }; } if (nextButton && nextButtonScore >= 15) { const selector = this.generateSelectorsForElement(nextButton, doc, selectorGenerator); const confidence = nextButtonScore >= 18 ? 'high' : 'medium'; return { type: 'clickNext', selector, confidence }; } return null; } /** * Full-document fallback with relaxed filters. * No nav skipping. Scores elements across the whole page but requires both * text match AND proximity for a positive result. */ private detectFromFullDocument( listContainer: HTMLElement, doc: Document, selectorGenerator: ClientSelectorGenerator ): PaginationDetectionResult | null { const clickableElements = this.getClickableElements(doc); let nextButton: HTMLElement | null = null; let nextButtonScore = 0; let loadMoreButton: HTMLElement | null = null; let loadMoreScore = 0; for (const element of clickableElements) { if (!this.isVisible(element)) continue; if (listContainer.contains(element)) continue; if (element.hasAttribute('disabled') || element.getAttribute('aria-disabled') === 'true') continue; const text = (element.textContent || '').trim(); const ariaLabel = element.getAttribute('aria-label') || ''; const title = element.getAttribute('title') || ''; if (text.length > MAX_BUTTON_TEXT_LENGTH) continue; const combinedText = `${text} ${ariaLabel} ${title}`; const nearList = this.isNearList(element, listContainer); if (this.matchesAnyPattern(combinedText, loadMorePatterns)) { let score = 10; if (nearList) score += 5; if (element.tagName === 'BUTTON') score += 2; if (score > loadMoreScore) { loadMoreScore = score; loadMoreButton = element; } } let isNext = false; if (this.matchesAnyPattern(combinedText, nextButtonTextPatterns)) { isNext = true; } else if (text.length <= 3 && this.matchesAnyPattern(text, nextButtonArrowPatterns)) { isNext = true; } if (!isNext && !text.trim() && this.matchesAnyPattern(ariaLabel, nextButtonTextPatterns)) { isNext = true; } if (isNext) { let score = 10; if (nearList) score += 5; if (element.tagName === 'BUTTON') score += 2; if (score > nextButtonScore) { nextButtonScore = score; nextButton = element; } } } if (loadMoreButton && loadMoreScore >= 10) { const selector = this.generateSelectorsForElement(loadMoreButton, doc, selectorGenerator); const confidence = loadMoreScore >= 15 ? 'medium' : 'low'; return { type: 'clickLoadMore', selector, confidence }; } if (nextButton && nextButtonScore >= 10) { const selector = this.generateSelectorsForElement(nextButton, doc, selectorGenerator); const confidence = nextButtonScore >= 15 ? 'medium' : 'low'; return { type: 'clickNext', selector, confidence }; } return null; } // ---------- Utility methods ---------- private evaluateSelector(selector: string, doc: Document): HTMLElement[] { try { const isXPath = selector.startsWith('//') || selector.startsWith('(//'); if (isXPath) { const result = doc.evaluate( selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ); const elements: HTMLElement[] = []; for (let i = 0; i < result.snapshotLength; i++) { const node = result.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { elements.push(node as HTMLElement); } } return elements; } else { return Array.from(doc.querySelectorAll(selector)); } } catch (err) { console.error('Selector evaluation failed:', selector, err); return []; } } private getClickableElements(doc: Document): HTMLElement[] { const clickables: HTMLElement[] = []; const selectors = ['button', 'a', '[role="button"]', '[onclick]', '.btn', '.button']; for (const sel of selectors) { clickables.push(...Array.from(doc.querySelectorAll(sel)) as HTMLElement[]); } return Array.from(new Set(clickables)); } private getClickableElementsIn(container: HTMLElement): HTMLElement[] { const clickables: HTMLElement[] = []; const selectors = ['button', 'a', '[role="button"]', '[onclick]', '.btn', '.button']; for (const sel of selectors) { clickables.push(...Array.from(container.querySelectorAll(sel)) as HTMLElement[]); } if (container.tagName === 'BUTTON' || container.tagName === 'A' || container.getAttribute('role') === 'button') { clickables.push(container); } return Array.from(new Set(clickables)); } private isVisible(element: HTMLElement): boolean { try { const style = window.getComputedStyle(element); return style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0' && element.offsetWidth > 0 && element.offsetHeight > 0; } catch { return false; } } private matchesAnyPattern(text: string, patterns: RegExp[]): boolean { return patterns.some(pattern => pattern.test(text)); } private isNearList(element: HTMLElement, listContainer: HTMLElement): boolean { try { const listRect = listContainer.getBoundingClientRect(); const elementRect = element.getBoundingClientRect(); if (elementRect.top >= listRect.bottom && elementRect.top <= listRect.bottom + 300) { return true; } if (elementRect.bottom <= listRect.top && elementRect.bottom >= listRect.top - 200) { return true; } const verticalOverlap = !(elementRect.bottom < listRect.top || elementRect.top > listRect.bottom); if (verticalOverlap) { const horizontalDistance = Math.min( Math.abs(elementRect.left - listRect.right), Math.abs(elementRect.right - listRect.left) ); if (horizontalDistance < 150) { return true; } } return false; } catch { return false; } } private detectInfiniteScrollIndicators(doc: Document, _listContainer: HTMLElement): number { try { let score = 0; const initialHeight = doc.documentElement.scrollHeight; const viewportHeight = window.innerHeight; if (initialHeight <= viewportHeight) return 0; const sentinelPatterns = [ '[data-infinite]', '[data-scroll-trigger]', '#infinite-scroll-trigger', '[class*="infinite-scroll"]', '[id*="infinite-scroll"]', ]; for (const sel of sentinelPatterns) { if (doc.querySelector(sel)) { score += 6; break; } } const infiniteScrollLibraries = [ '.infinite-scroll', '[data-infinite-scroll]', '[class*="infinite-scroll"]', ]; for (const sel of infiniteScrollLibraries) { if (doc.querySelector(sel)) { score += 6; break; } } const scrollToTopPatterns = [ '[aria-label*="scroll to top" i]', '[title*="back to top" i]', '.back-to-top', '#back-to-top', '[class*="scrolltop"]', '[class*="backtotop"]', ]; for (const sel of scrollToTopPatterns) { try { const element = doc.querySelector(sel); if (element && this.isVisible(element as HTMLElement)) { score += 2; break; } } catch { continue; } } if (initialHeight > viewportHeight * 5) score += 2; return score; } catch { return 0; } } private generateSelectorsForElement( element: HTMLElement, doc: Document, selectorGenerator: ClientSelectorGenerator ): string | null { try { const primary = selectorGenerator.generateSelectorsFromElement(element, doc); if (!primary) { console.warn('Could not generate selectors for element'); return null; } const selectorChain = [ primary && 'iframeSelector' in primary && primary.iframeSelector?.full ? primary.iframeSelector.full : null, primary && 'shadowSelector' in primary && primary.shadowSelector?.full ? primary.shadowSelector.full : null, primary && 'testIdSelector' in primary ? primary.testIdSelector : null, primary && 'id' in primary ? primary.id : null, primary && 'hrefSelector' in primary ? primary.hrefSelector : null, primary && 'relSelector' in primary ? primary.relSelector : null, primary && 'accessibilitySelector' in primary ? primary.accessibilitySelector : null, primary && 'attrSelector' in primary ? primary.attrSelector : null, primary && 'generalSelector' in primary ? primary.generalSelector : null, ] .filter(s => s !== null && s !== undefined && s !== '') .join(','); return selectorChain || null; } catch (error) { console.error('Error generating selectors:', error); return null; } } } export const clientPaginationDetector = new ClientPaginationDetector(); ================================================ FILE: src/helpers/clientSelectorGenerator.ts ================================================ interface Coordinates { x: number; y: number; } interface ElementInfo { tagName: string; hasOnlyText?: boolean; innerText?: string; url?: string; imageUrl?: string; attributes?: Record; innerHTML?: string; outerHTML?: string; isIframeContent?: boolean; isFrameContent?: boolean; iframeURL?: string; frameURL?: string; iframeIndex?: number; frameIndex?: number; frameHierarchy?: string[]; isShadowRoot?: boolean; shadowRootMode?: string; shadowRootContent?: string; } interface Selectors { id?: string | null; generalSelector?: string | null; attrSelector?: string | null; testIdSelector?: string | null; text?: string; href?: string; hrefSelector?: string | null; accessibilitySelector?: string | null; formSelector?: string | null; relSelector?: string | null; iframeSelector?: { full: string; isIframe: boolean; } | null; shadowSelector?: { full: string; mode: string; } | null; } export enum ActionType { AwaitText = "awaitText", Click = "click", DragAndDrop = "dragAndDrop", Screenshot = "screenshot", Hover = "hover", Input = "input", Keydown = "keydown", Load = "load", Navigate = "navigate", Scroll = "scroll", } enum TagName { A = "A", B = "B", Cite = "CITE", EM = "EM", Input = "INPUT", Select = "SELECT", Span = "SPAN", Strong = "STRONG", TextArea = "TEXTAREA", } interface Action { type: ActionType; tagName: TagName; inputType?: string; value?: string; selectors: Selectors; timestamp: number; isPassword: boolean; hasOnlyText: boolean; } export interface ElementFingerprint { tagName: string; normalizedClasses: string; childrenCount: number; childrenStructure: string; attributes: string; depth: number; textCharacteristics: { hasText: boolean; textLength: number; hasLinks: number; hasImages: number; hasButtons: number; }; signature: string; } interface ElementGroup { elements: HTMLElement[]; fingerprint: ElementFingerprint; representative: HTMLElement; } class ClientSelectorGenerator { private listSelector: string = ""; private getList: boolean = false; private paginationMode: boolean = false; private pathCache = new WeakMap(); private descendantsCache = new WeakMap(); private meaningfulCache = new WeakMap(); private selectorCache = new Map(); private elementGroups: Map = new Map(); private groupedElements: Set = new Set(); private lastAnalyzedDocument: Document | null = null; private groupingConfig = { minGroupSize: 2, similarityThreshold: 0.7, minWidth: 50, minHeight: 20, maxParentLevels: 5, excludeSelectors: ["script", "style", "meta", "link", "title", "head"], }; private selectorElementCache = new Map(); private elementSelectorCache = new WeakMap(); private lastCachedDocument: Document | null = null; private classCache = new Map(); private spatialIndex = new Map(); private performanceConfig = { enableSpatialIndexing: true, maxSelectorBatchSize: 50, useElementCache: true, debounceMs: 16, // ~60fps }; // Add setter methods for state management public setListSelector(selector: string): void { this.listSelector = selector; } public setGetList(getList: boolean): void { this.getList = getList; } public setPaginationMode(paginationMode: boolean): void { this.paginationMode = paginationMode; } public getCurrentState(): { listSelector: string; getList: boolean; paginationMode: boolean; } { return { listSelector: this.listSelector, getList: this.getList, paginationMode: this.paginationMode, }; } /** * Normalize class names by removing dynamic/unique parts */ private normalizeClasses(classList: DOMTokenList): string { return Array.from(classList) .filter((cls) => { // Filter out classes that look like they contain IDs or dynamic content return ( !cls.match(/\d{3,}|uuid|hash|id-|_\d+$/i) && !cls.startsWith("_ngcontent-") && !cls.startsWith("_nghost-") && !cls.match(/^ng-tns-c\d+-\d+$/) ); }) .sort() .join(" "); } /** * Get element's structural fingerprint for grouping */ private getStructuralFingerprint( element: HTMLElement ): ElementFingerprint | null { if (element.nodeType !== Node.ELEMENT_NODE) return null; const tagName = element.tagName.toLowerCase(); const isCustomElement = tagName.includes("-"); const standardExcludeSelectors = [ "script", "style", "meta", "link", "title", "head", ]; if (!isCustomElement && standardExcludeSelectors.includes(tagName)) { return null; } if (this.groupingConfig.excludeSelectors.includes(tagName)) return null; const children = Array.from(element.children); let childrenStructureString: string; if (tagName === 'table') { // For tables, the fingerprint is based on the header or first row's structure. const thead = element.querySelector('thead'); const representativeRow = thead ? thead.querySelector('tr') : element.querySelector('tr'); if (representativeRow) { const structure = Array.from(representativeRow.children).map(child => ({ tag: child.tagName.toLowerCase(), classes: this.normalizeClasses(child.classList), })); childrenStructureString = JSON.stringify(structure); } else { childrenStructureString = JSON.stringify([]); } } else if (tagName === 'tr') { // For rows, the fingerprint is based on the cell structure, ignoring the cell's inner content. const structure = children.map((child) => ({ tag: child.tagName.toLowerCase(), classes: this.normalizeClasses(child.classList), })); childrenStructureString = JSON.stringify(structure); } else { // Original logic for all other elements. const structure = children.map((child) => ({ tag: child.tagName.toLowerCase(), classes: this.normalizeClasses(child.classList), hasText: (child.textContent ?? "").trim().length > 0, })); childrenStructureString = JSON.stringify(structure); } const normalizedClasses = this.normalizeClasses(element.classList); const relevantAttributes = Array.from(element.attributes) .filter((attr) => { if (isCustomElement) { return !["id", "style", "data-reactid", "data-react-checksum"].includes(attr.name.toLowerCase()); } else { return ( !["id", "style", "data-reactid", "data-react-checksum"].includes(attr.name.toLowerCase()) && (!attr.name.startsWith("data-") || attr.name === "data-type" || attr.name === "data-role") ); } }) .map((attr) => `${attr.name}=${attr.value}`) .sort(); let depth = 0; let parent = element.parentElement; while (parent && depth < 20) { depth++; parent = parent.parentElement; } const textContent = (element.textContent ?? "").trim(); const textCharacteristics = { hasText: textContent.length > 0, textLength: Math.floor(textContent.length / 20) * 20, hasLinks: element.querySelectorAll("a").length, hasImages: element.querySelectorAll("img").length, hasButtons: element.querySelectorAll('button, input[type="button"], input[type="submit"]').length, }; const signature = `${tagName}::${normalizedClasses}::${children.length}::${childrenStructureString}::${relevantAttributes.join("|")}`; return { tagName, normalizedClasses, childrenCount: children.length, childrenStructure: childrenStructureString, attributes: relevantAttributes.join("|"), depth, textCharacteristics, signature, }; } /** * Calculate similarity between two fingerprints */ private calculateSimilarity( fp1: ElementFingerprint, fp2: ElementFingerprint ): number { if (!fp1 || !fp2) return 0; let score = 0; let maxScore = 0; // Tag name must match maxScore += 10; if (fp1.tagName === fp2.tagName) score += 10; else return 0; // Class similarity maxScore += 8; if (fp1.normalizedClasses === fp2.normalizedClasses) score += 8; else if (fp1.normalizedClasses && fp2.normalizedClasses) { const classes1 = fp1.normalizedClasses.split(" ").filter((c) => c); const classes2 = fp2.normalizedClasses.split(" ").filter((c) => c); const commonClasses = classes1.filter((c) => classes2.includes(c)); if (classes1.length > 0 && classes2.length > 0) { score += (commonClasses.length / Math.max(classes1.length, classes2.length)) * 8; } } // Children structure maxScore += 8; if (fp1.childrenStructure === fp2.childrenStructure) score += 8; else if (fp1.childrenCount === fp2.childrenCount) score += 4; // Attributes similarity maxScore += 5; if (fp1.attributes === fp2.attributes) score += 5; else if (fp1.attributes && fp2.attributes) { const attrs1 = fp1.attributes.split("|").filter((a) => a); const attrs2 = fp2.attributes.split("|").filter((a) => a); const commonAttrs = attrs1.filter((a) => attrs2.includes(a)); if (attrs1.length > 0 && attrs2.length > 0) { score += (commonAttrs.length / Math.max(attrs1.length, attrs2.length)) * 5; } } // Depth similarity maxScore += 2; if (Math.abs(fp1.depth - fp2.depth) <= 1) score += 2; else if (Math.abs(fp1.depth - fp2.depth) <= 2) score += 1; // Text characteristics similarity maxScore += 3; const tc1 = fp1.textCharacteristics; const tc2 = fp2.textCharacteristics; if (tc1.hasText === tc2.hasText) score += 1; if (Math.abs(tc1.textLength - tc2.textLength) <= 40) score += 1; if (tc1.hasLinks === tc2.hasLinks && tc1.hasImages === tc2.hasImages) score += 1; return maxScore > 0 ? score / maxScore : 0; } private getAllVisibleElementsWithShadow(doc: Document): HTMLElement[] { const allElements: HTMLElement[] = []; const visited = new Set(); const traverseContainer = (container: Document | ShadowRoot) => { try { const elements = Array.from(container.querySelectorAll("*")).filter( (el) => { const rect = el.getBoundingClientRect(); return rect.width > 0 && rect.height > 0; // Only visible elements } ) as HTMLElement[]; elements.forEach((element) => { if (!visited.has(element)) { visited.add(element); allElements.push(element); // Traverse shadow DOM if it exists if (element.shadowRoot) { traverseContainer(element.shadowRoot); } } }); } catch (error) { console.warn(`⚠️ Error traversing container:`, error); } }; // Start from main document traverseContainer(doc); return allElements; } public analyzeElementGroups(iframeDoc: Document): void { // Only re-analyze if document changed if ( this.lastAnalyzedDocument === iframeDoc && this.elementGroups.size > 0 ) { return; } // Clear previous analysis this.elementGroups.clear(); this.groupedElements.clear(); this.lastAnalyzedDocument = iframeDoc; // Get all visible elements INCLUDING shadow DOM let allElements = this.getAllVisibleElementsWithShadow(iframeDoc); if (this.getList === true && this.listSelector === "") { const dialogElements = this.findAllDialogElements(iframeDoc); if (dialogElements.length > 0) { // Check if dialogs contain significant content worth analyzing const dialogContentElements = this.getElementsFromDialogs(dialogElements); // Only switch to dialog-focused analysis if dialogs have substantial content if (dialogContentElements.length > 5) { allElements = [...dialogContentElements, ...allElements]; } } } const processedInTables = new Set(); // 1. Specifically find and group rows within each table, bypassing normal similarity checks. const tables = allElements.filter(el => el.tagName === 'TABLE'); tables.forEach(table => { const rows = Array.from(table.querySelectorAll('tbody > tr')).filter(row => { const parent = row.parentElement; if (!parent || !table.contains(parent)) return false; // Ensure row belongs to this table const rect = row.getBoundingClientRect(); return rect.width > 0 && rect.height > 0; }) as HTMLElement[]; // If the table has enough rows, force them into a single group. if (rows.length >= this.groupingConfig.minGroupSize) { const representativeFingerprint = this.getStructuralFingerprint(rows[0]); if (!representativeFingerprint) return; const group: ElementGroup = { elements: rows, fingerprint: representativeFingerprint, representative: rows[0], }; rows.forEach(row => { this.elementGroups.set(row, group); this.groupedElements.add(row); processedInTables.add(row); }); } }); // 2. Group all other elements, excluding table rows that were already grouped. const remainingElements = allElements.filter(el => !processedInTables.has(el)); const elementFingerprints = new Map(); remainingElements.forEach((element) => { const fingerprint = this.getStructuralFingerprint(element); if (fingerprint) { elementFingerprints.set(element, fingerprint); } }); const processedElements = new Set(); elementFingerprints.forEach((fingerprint, element) => { if (processedElements.has(element)) return; const currentGroup = [element]; processedElements.add(element); elementFingerprints.forEach((otherFingerprint, otherElement) => { if (processedElements.has(otherElement)) return; const similarity = this.calculateSimilarity(fingerprint, otherFingerprint); if (similarity >= this.groupingConfig.similarityThreshold) { currentGroup.push(otherElement); processedElements.add(otherElement); } }); if (currentGroup.length >= this.groupingConfig.minGroupSize && this.hasAnyMeaningfulChildren(element)) { let grouped = false; for (let level = 1; level <= this.groupingConfig.maxParentLevels && !grouped; level++) { const ancestorBuckets = new Map(); for (const el of currentGroup) { let elAncestor: HTMLElement | null = el; for (let i = 0; i < level && elAncestor; i++) { elAncestor = elAncestor.parentElement; } if (elAncestor) { const bucket = ancestorBuckets.get(elAncestor) || []; bucket.push(el); ancestorBuckets.set(elAncestor, bucket); } } let bestBucket: HTMLElement[] | null = null; for (const bucket of ancestorBuckets.values()) { if (bucket.length >= this.groupingConfig.minGroupSize) { const containsPivot = bucket.includes(element); const bestContainsPivot = bestBucket ? bestBucket.includes(element) : false; if (!bestBucket) { bestBucket = bucket; } else if (containsPivot && !bestContainsPivot) { bestBucket = bucket; } else if (containsPivot === bestContainsPivot && bucket.length > bestBucket.length) { bestBucket = bucket; } } } if (bestBucket) { const group: ElementGroup = { elements: bestBucket, fingerprint, representative: element, }; bestBucket.forEach((el) => { this.elementGroups.set(el, group); this.groupedElements.add(el); }); for (const el of currentGroup) { if (!bestBucket.includes(el)) { processedElements.delete(el); } } grouped = true; } } if (!grouped) { currentGroup.forEach((el, idx) => { if (idx > 0) processedElements.delete(el); }); } } }); } /** * Check if element has any meaningful children that can be extracted */ private hasAnyMeaningfulChildren(element: HTMLElement): boolean { const meaningfulChildren = this.getMeaningfulChildren(element); return meaningfulChildren.length > 0; } /** * Get meaningful children (those with text, links, images, etc.) */ private getMeaningfulChildren(element: HTMLElement): HTMLElement[] { const meaningfulChildren: HTMLElement[] = []; const traverse = (el: HTMLElement, depth: number = 0) => { if (depth > 5) return; Array.from(el.children).forEach((child) => { const htmlChild = child as HTMLElement; // Check if this child has meaningful content if (this.isMeaningfulElement(htmlChild)) { meaningfulChildren.push(htmlChild); } else { // If not meaningful itself, check its children traverse(htmlChild, depth + 1); } }); if (el.shadowRoot) { Array.from(el.shadowRoot.children).forEach((shadowChild) => { const htmlShadowChild = shadowChild as HTMLElement; if (this.isMeaningfulElement(htmlShadowChild)) { meaningfulChildren.push(htmlShadowChild); } else { traverse(htmlShadowChild, depth + 1); } }); } }; traverse(element); return meaningfulChildren; } /** * Check if element has meaningful content for extraction (cached version) */ private isMeaningfulElementCached(element: HTMLElement): boolean { if (this.meaningfulCache.has(element)) { return this.meaningfulCache.get(element)!; } const result = this.isMeaningfulElement(element); this.meaningfulCache.set(element, result); return result; } /** * Check if element has meaningful content for extraction */ private isMeaningfulElement(element: HTMLElement): boolean { const tagName = element.tagName.toLowerCase(); if (tagName === "img") { return element.hasAttribute("src"); } if (tagName === "a" && element.hasAttribute("href")) { return true; } const text = (element.textContent || "").trim(); const hasVisibleText = text.length > 0; if (hasVisibleText || element.querySelector("svg")) { return true; } if (element.children.length > 0) { return false; } return false; } /** * Check if an element is part of a group (for highlighting) */ public isElementGrouped(element: HTMLElement): boolean { return this.groupedElements.has(element); } /** * Get the group for a specific element */ public getElementGroup(element: HTMLElement): ElementGroup | null { return this.elementGroups.get(element) || null; } public getAllMatchingElements( hoveredSelector: string, childSelectors: string[], iframeDoc: Document ): HTMLElement[] { try { const matchingElements: HTMLElement[] = []; if (childSelectors.includes(hoveredSelector)) { const directElements = this.evaluateXPath(hoveredSelector, iframeDoc); matchingElements.push(...directElements); if (directElements.length === 0) { const shadowElements = this.findElementsInShadowDOM( hoveredSelector, iframeDoc ); matchingElements.push(...shadowElements); } } else { const hoveredPattern = this.extractSelectorPattern(hoveredSelector); childSelectors.forEach((childSelector) => { const childPattern = this.extractSelectorPattern(childSelector); if (this.arePatternsRelated(hoveredPattern, childPattern)) { const directElements = this.evaluateXPath(childSelector, iframeDoc); matchingElements.push(...directElements); if (directElements.length === 0) { const shadowElements = this.findElementsInShadowDOM( childSelector, iframeDoc ); matchingElements.push(...shadowElements); } } }); } return [...new Set(matchingElements)]; } catch (error) { console.error("Error getting matching elements:", error); return []; } } /** * Extract pattern components from selector for comparison */ private extractSelectorPattern(selector: string): { tag: string; classes: string[]; hasPosition: boolean; structure: string; } { // Handle XPath selectors if (selector.startsWith("//") || selector.startsWith("/")) { const tagMatch = selector.match(/\/\/(\w+)/); const classMatches = selector.match(/contains\(@class,'([^']+)'\)/g) || []; const classes = classMatches .map((match) => { const classMatch = match.match(/contains\(@class,'([^']+)'\)/); return classMatch ? classMatch[1] : ""; }) .filter((cls) => cls); return { tag: tagMatch ? tagMatch[1] : "", classes, hasPosition: /\[\d+\]/.test(selector), structure: selector.replace(/\[\d+\]/g, "").replace(/\/\/\w+/, "//TAG"), }; } // Handle CSS selectors const parts = selector.split(" ").pop() || ""; const tagMatch = parts.match(/^(\w+)/); const classMatches = parts.match(/\.([^.#[\s]+)/g) || []; const classes = classMatches.map((cls) => cls.substring(1)); return { tag: tagMatch ? tagMatch[1] : "", classes, hasPosition: /:nth-child\(\d+\)/.test(selector), structure: selector .replace(/:nth-child\(\d+\)/g, "") .replace(/\w+/g, "TAG"), }; } /** * Check if two selector patterns are related/similar */ private arePatternsRelated(pattern1: any, pattern2: any): boolean { if (pattern1.tag !== pattern2.tag || !pattern1.tag) { return false; } const commonClasses = pattern1.classes.filter((cls: any) => pattern2.classes.includes(cls) ); return ( commonClasses.length > 0 || pattern1.structure === pattern2.structure ); } /** * Find elements that match a child selector XPath by traversing shadow DOMs * This handles cases where the child elements are nested within shadow roots of parent elements */ private findElementsInShadowDOM( xpath: string, iframeDoc: Document ): HTMLElement[] { try { const matchingElements: HTMLElement[] = []; const xpathParts = this.parseChildXPath(xpath); if (!xpathParts) { console.warn("Could not parse child XPath:", xpath); return []; } const parentElements = this.evaluateXPath( xpathParts.parentXPath, iframeDoc ); parentElements.forEach((parentElement, index) => { const childElements = this.findChildrenInElementShadowDOM( parentElement, xpathParts.childPath, xpathParts.childFilters ); matchingElements.push(...childElements); }); return matchingElements; } catch (error) { console.error("Error in findElementsInShadowDOM:", error); return []; } } /** * Parse a child XPath to extract parent selector and child path */ private parseChildXPath(xpath: string): { parentXPath: string; childPath: string[]; childFilters: string[]; } | null { try { const xpathPattern = /^(\/\/[^\/]+(?:\[[^\]]*\])*)((?:\/[^\/]+(?:\[[^\]]*\])*)*)$/; const match = xpath.match(xpathPattern); if (!match) { console.warn("Could not match XPath pattern:", xpath); return null; } const parentXPath = match[1]; const childPathString = match[2]; const childPath = childPathString .split("/") .filter((part) => part.length > 0); const childFilters = childPath .map((part) => { const filterMatch = part.match(/\[([^\]]+)\]/); return filterMatch ? filterMatch[1] : ""; }) .filter((filter) => filter.length > 0); return { parentXPath, childPath, childFilters, }; } catch (error) { console.error("Error parsing child XPath:", error); return null; } } /** * Find child elements within a parent element's shadow DOM tree */ private findChildrenInElementShadowDOM( parentElement: HTMLElement, childPath: string[], childFilters: string[] ): HTMLElement[] { const matchingChildren: HTMLElement[] = []; const visited = new Set(); const traverseElement = (element: HTMLElement, depth: number = 0) => { if (depth > 10 || visited.has(element)) return; visited.add(element); if (element.shadowRoot) { this.searchWithinShadowRoot( element.shadowRoot, childPath, childFilters, matchingChildren ); } Array.from(element.children).forEach((child) => { traverseElement(child as HTMLElement, depth + 1); }); }; traverseElement(parentElement); return matchingChildren; } /** * Search within a shadow root for elements matching the child path */ private searchWithinShadowRoot( shadowRoot: ShadowRoot, childPath: string[], childFilters: string[], matchingChildren: HTMLElement[] ): void { try { if (childPath.length === 0) { const allElements = shadowRoot.querySelectorAll("*"); matchingChildren.push(...(Array.from(allElements) as HTMLElement[])); return; } let currentElements: HTMLElement[] = Array.from( shadowRoot.querySelectorAll("*") ) as HTMLElement[]; for (let i = 0; i < childPath.length; i++) { const pathPart = childPath[i]; const tagMatch = pathPart.match(/^([^[]+)/); if (!tagMatch) continue; const tagName = tagMatch[1]; const classMatches = pathPart.match(/contains\(@class,\s*'([^']+)'\)/g); const requiredClasses = classMatches ? classMatches .map((classMatch) => { const classNameMatch = classMatch.match( /contains\(@class,\s*'([^']+)'\)/ ); return classNameMatch ? classNameMatch[1] : ""; }) .filter((cls) => cls.length > 0) : []; const filteredElements = currentElements.filter((element) => { if (element.tagName.toLowerCase() !== tagName.toLowerCase()) { return false; } for (const requiredClass of requiredClasses) { if (!element.classList.contains(requiredClass)) { return false; } } return true; }); if (i === childPath.length - 1) { matchingChildren.push(...filteredElements); } else { const nextElements: HTMLElement[] = []; filteredElements.forEach((element) => { Array.from(element.children).forEach((child) => { nextElements.push(child as HTMLElement); }); if (element.shadowRoot) { Array.from(element.shadowRoot.querySelectorAll("*")).forEach( (shadowChild) => { nextElements.push(shadowChild as HTMLElement); } ); } }); currentElements = nextElements; } } const elementsWithShadow = shadowRoot.querySelectorAll("*"); elementsWithShadow.forEach((element) => { const htmlElement = element as HTMLElement; if (htmlElement.shadowRoot) { this.searchWithinShadowRoot( htmlElement.shadowRoot, childPath, childFilters, matchingChildren ); } }); } catch (error) { console.error("Error searching within shadow root:", error); } } /** * Modified container finding that only returns grouped elements */ private findGroupedContainerAtPoint( x: number, y: number, iframeDoc: Document ): HTMLElement | null { // Ensure groups are analyzed this.analyzeElementGroups(iframeDoc); // Get all elements at the point const elementsAtPoint = iframeDoc.elementsFromPoint(x, y) as HTMLElement[]; if (!elementsAtPoint.length) return null; // In list mode without selector, transform table cells to rows and prioritize grouped elements if (this.getList === true && this.listSelector === "") { const transformedElements: HTMLElement[] = []; elementsAtPoint.forEach((element) => { if (element.tagName === "TD" || element.tagName === "TH") { const parentRow = element.closest("tr") as HTMLElement; if (parentRow && !transformedElements.includes(parentRow)) { transformedElements.push(parentRow); } } else { if (!transformedElements.includes(element)) { transformedElements.push(element); } } }); const groupedElementsAtPoint = transformedElements.filter((element) => this.isElementGrouped(element) ); if (groupedElementsAtPoint.length > 0) { let filteredElements = this.filterParentChildGroupedElements( groupedElementsAtPoint ); // Sort by DOM depth (deeper elements first for more specificity) filteredElements.sort((a, b) => { const aDialog = this.isDialogElement(a) ? 1 : 0; const bDialog = this.isDialogElement(b) ? 1 : 0; if (aDialog !== bDialog) { return bDialog - aDialog; } const aDepth = this.getElementDepth(a); const bDepth = this.getElementDepth(b); return bDepth - aDepth; }); const selectedElement = filteredElements[0]; return selectedElement; } return null; } return this.getDeepestElementFromPoint(x, y, iframeDoc); } private filterParentChildGroupedElements( groupedElements: HTMLElement[] ): HTMLElement[] { const result: HTMLElement[] = []; for (const element of groupedElements) { const containsGroupedChild = groupedElements.some( (other) => other !== element && element.contains(other) ); if (!containsGroupedChild) { result.push(element); } } return result.length > 0 ? result : groupedElements; } public getElementInformation = ( iframeDoc: Document, coordinates: Coordinates, listSelector: string, getList: boolean ) => { try { if (!getList || listSelector !== "") { const el = this.getDeepestElementFromPoint( coordinates.x, coordinates.y, iframeDoc ); if (el) { // Prioritize Link (DO NOT REMOVE) const { parentElement } = el; const targetElement = parentElement?.tagName === "A" ? parentElement : el; const ownerDocument = targetElement.ownerDocument; const frameElement = ownerDocument?.defaultView ?.frameElement as HTMLIFrameElement; const isIframeContent = Boolean(frameElement); const isFrameContent = frameElement?.tagName === "FRAME"; const containingShadowRoot = targetElement.getRootNode() as ShadowRoot; const isShadowRoot = containingShadowRoot instanceof ShadowRoot; let info: { tagName: string; hasOnlyText?: boolean; innerText?: string; url?: string; imageUrl?: string; attributes?: Record; innerHTML?: string; outerHTML?: string; isIframeContent?: boolean; isFrameContent?: boolean; iframeURL?: string; frameURL?: string; iframeIndex?: number; frameIndex?: number; frameHierarchy?: string[]; isShadowRoot?: boolean; shadowRootMode?: string; shadowRootContent?: string; } = { tagName: targetElement?.tagName ?? "", isIframeContent, isFrameContent, isShadowRoot, }; if (isIframeContent || isFrameContent) { if (isIframeContent) { info.iframeURL = (frameElement as HTMLIFrameElement).src; } else { info.frameURL = frameElement.src; } let currentFrame = frameElement; const frameHierarchy: string[] = []; let frameIndex = 0; while (currentFrame) { frameHierarchy.unshift( currentFrame.id || currentFrame.getAttribute("name") || currentFrame.src || `${currentFrame.tagName.toLowerCase()}[${frameIndex}]` ); const parentDoc = currentFrame.ownerDocument; currentFrame = parentDoc?.defaultView ?.frameElement as HTMLIFrameElement; frameIndex++; } info.frameHierarchy = frameHierarchy; if (isIframeContent) { info.iframeIndex = frameIndex - 1; } else { info.frameIndex = frameIndex - 1; } } if (isShadowRoot) { info.shadowRootMode = containingShadowRoot.mode; info.shadowRootContent = containingShadowRoot.innerHTML; } if (targetElement) { info.attributes = Array.from(targetElement.attributes).reduce( (acc, attr) => { acc[attr.name] = attr.value; return acc; }, {} as Record ); if (targetElement.tagName === "A") { info.url = (targetElement as HTMLAnchorElement).href; info.innerText = targetElement.textContent ?? ""; } else if (targetElement.tagName === "IMG") { info.imageUrl = (targetElement as HTMLImageElement).src; } else if (targetElement?.tagName === "SELECT") { const selectElement = targetElement as HTMLSelectElement; info.innerText = selectElement.options[selectElement.selectedIndex]?.text ?? ""; info.attributes = { ...info.attributes, selectedValue: selectElement.value, }; } else if ( (targetElement?.tagName === "INPUT" && (targetElement as HTMLInputElement).type === "time") || (targetElement as HTMLInputElement).type === "date" ) { info.innerText = (targetElement as HTMLInputElement).value; } else { info.hasOnlyText = targetElement.children.length === 0 && targetElement.textContent !== null && targetElement.textContent.trim().length > 0; info.innerText = targetElement.textContent ?? ""; } info.innerHTML = targetElement.innerHTML; info.outerHTML = targetElement.outerHTML; } return info; } return null; } else { const originalEl = this.findGroupedContainerAtPoint( coordinates.x, coordinates.y, iframeDoc ); if (originalEl) { let element = originalEl; if (element.tagName === "TD" || element.tagName === "TH") { const tableParent = element.closest("table"); if (tableParent) { element = tableParent; } } const ownerDocument = element.ownerDocument; const frameElement = ownerDocument?.defaultView?.frameElement; const isIframeContent = Boolean(frameElement); const isFrameContent = frameElement?.tagName === "FRAME"; const containingShadowRoot = element.getRootNode() as ShadowRoot; const isShadowRoot = containingShadowRoot instanceof ShadowRoot; let info: { tagName: string; hasOnlyText?: boolean; innerText?: string; url?: string; imageUrl?: string; attributes?: Record; innerHTML?: string; outerHTML?: string; isIframeContent?: boolean; isFrameContent?: boolean; iframeURL?: string; frameURL?: string; iframeIndex?: number; frameIndex?: number; frameHierarchy?: string[]; isShadowRoot?: boolean; shadowRootMode?: string; shadowRootContent?: string; } = { tagName: element?.tagName ?? "", isIframeContent, isFrameContent, isShadowRoot, }; if (isIframeContent || isFrameContent) { if (isIframeContent && !isFrameContent) { info.iframeURL = (frameElement as HTMLIFrameElement).src; } else if (isFrameContent) { info.frameURL = (frameElement as HTMLFrameElement).src; } let currentFrame = frameElement; const frameHierarchy: string[] = []; let frameIndex = 0; while (currentFrame) { frameHierarchy.unshift( currentFrame.id || currentFrame.getAttribute("name") || (currentFrame as HTMLFrameElement).src || `${currentFrame.tagName.toLowerCase()}[${frameIndex}]` ); const parentDoc = currentFrame.ownerDocument; currentFrame = parentDoc?.defaultView?.frameElement; frameIndex++; } info.frameHierarchy = frameHierarchy; if (isIframeContent && !isFrameContent) { info.iframeIndex = frameIndex - 1; } else if (isFrameContent) { info.frameIndex = frameIndex - 1; } } if (isShadowRoot) { info.shadowRootMode = containingShadowRoot.mode; info.shadowRootContent = containingShadowRoot.innerHTML; } if (element) { info.attributes = Array.from(element.attributes).reduce( (acc, attr) => { acc[attr.name] = attr.value; return acc; }, {} as Record ); if (element.tagName === "A") { info.url = (element as HTMLAnchorElement).href; info.innerText = element.textContent ?? ""; } else if (element.tagName === "IMG") { info.imageUrl = (element as HTMLImageElement).src; } else if (element?.tagName === "SELECT") { const selectElement = element as HTMLSelectElement; info.innerText = selectElement.options[selectElement.selectedIndex]?.text ?? ""; info.attributes = { ...info.attributes, selectedValue: selectElement.value, }; } else if ( element?.tagName === "INPUT" && ((element as HTMLInputElement).type === "time" || (element as HTMLInputElement).type === "date") ) { info.innerText = (element as HTMLInputElement).value; } else { info.hasOnlyText = element.children.length === 0 && element.textContent !== null && element.textContent.trim().length > 0; info.innerText = element.textContent ?? ""; } info.innerHTML = element.innerHTML; info.outerHTML = element.outerHTML; } return info; } return null; } } catch (error) { const { message, stack } = error as Error; console.error("Error while retrieving selector:", message); console.error("Stack:", stack); } }; private getRect = ( iframeDoc: Document, coordinates: Coordinates, listSelector: string, getList: boolean, isDOMMode: boolean = false ) => { try { if (!getList || listSelector !== "") { const el = this.getDeepestElementFromPoint( coordinates.x, coordinates.y, iframeDoc ); if (el) { // Prioritize Link (DO NOT REMOVE) const { parentElement } = el; const element = parentElement?.tagName === "A" ? parentElement : el; const rectangle = element?.getBoundingClientRect(); if (rectangle) { const createRectObject = (rect: DOMRect) => ({ x: rect.x, y: rect.y, width: rect.width, height: rect.height, top: rect.top, right: rect.right, bottom: rect.bottom, left: rect.left, toJSON() { return { x: this.x, y: this.y, width: this.width, height: this.height, top: this.top, right: this.right, bottom: this.bottom, left: this.left, }; }, }); if (isDOMMode) { // For DOM mode, return iframe-relative coordinates return createRectObject(rectangle); } else { // For screenshot mode, adjust coordinates relative to the top window let adjustedRect = createRectObject(rectangle); let currentWindow = element.ownerDocument.defaultView; while (currentWindow !== window.top) { const frameElement = currentWindow?.frameElement as HTMLIFrameElement; if (!frameElement) break; const frameRect = frameElement.getBoundingClientRect(); adjustedRect = createRectObject({ x: adjustedRect.x + frameRect.x, y: adjustedRect.y + frameRect.y, width: adjustedRect.width, height: adjustedRect.height, top: adjustedRect.top + frameRect.top, right: adjustedRect.right + frameRect.left, bottom: adjustedRect.bottom + frameRect.top, left: adjustedRect.left + frameRect.left, } as DOMRect); currentWindow = frameElement.ownerDocument.defaultView; } return adjustedRect; } } } return null; } else { const originalEl = this.findGroupedContainerAtPoint( coordinates.x, coordinates.y, iframeDoc ); if (originalEl) { let element = originalEl; if (element.tagName === "TD" || element.tagName === "TH") { const tableParent = element.closest("table"); if (tableParent) { element = tableParent; } } const rectangle = element?.getBoundingClientRect(); if (rectangle) { const createRectObject = (rect: DOMRect) => ({ x: rect.x, y: rect.y, width: rect.width, height: rect.height, top: rect.top, right: rect.right, bottom: rect.bottom, left: rect.left, toJSON() { return { x: this.x, y: this.y, width: this.width, height: this.height, top: this.top, right: this.right, bottom: this.bottom, left: this.left, }; }, }); // For elements inside iframes or frames, adjust coordinates relative to the top window if (isDOMMode) { // For DOM mode, return iframe-relative coordinates return createRectObject(rectangle); } else { // For screenshot mode, adjust coordinates relative to the top window let adjustedRect = createRectObject(rectangle); let currentWindow = element.ownerDocument.defaultView; while (currentWindow !== window.top) { const frameElement = currentWindow?.frameElement as HTMLIFrameElement; if (!frameElement) break; const frameRect = frameElement.getBoundingClientRect(); adjustedRect = createRectObject({ x: adjustedRect.x + frameRect.x, y: adjustedRect.y + frameRect.y, width: adjustedRect.width, height: adjustedRect.height, top: adjustedRect.top + frameRect.top, right: adjustedRect.right + frameRect.left, bottom: adjustedRect.bottom + frameRect.top, left: adjustedRect.left + frameRect.left, } as DOMRect); currentWindow = frameElement.ownerDocument.defaultView; } return adjustedRect; } } } return null; } } catch (error) { const { message, stack } = error as Error; console.error("Error while retrieving selector:", message); console.error("Stack:", stack); } }; private getSelectors = (iframeDoc: Document, coordinates: Coordinates) => { try { // version @medv/finder // https://github.com/antonmedv/finder/blob/master/finder.ts type Node = { name: string; penalty: number; level?: number; }; type Path = Node[]; enum Limit { All, Two, One, } type Options = { root: Element; idName: (name: string) => boolean; className: (name: string) => boolean; tagName: (name: string) => boolean; attr: (name: string, value: string) => boolean; seedMinLength: number; optimizedMinLength: number; threshold: number; maxNumberOfTries: number; }; let config: Options; let rootDocument: Document | Element; function finder(input: Element, options?: Partial) { if (input.nodeType !== Node.ELEMENT_NODE) { throw new Error( `Can't generate CSS selector for non-element node type.` ); } if ("html" === input.tagName.toLowerCase()) { return "html"; } const defaults: Options = { root: iframeDoc.body, idName: (name: string) => true, className: (name: string) => true, tagName: (name: string) => true, attr: (name: string, value: string) => false, seedMinLength: 1, optimizedMinLength: 2, threshold: 900, maxNumberOfTries: 9000, }; config = { ...defaults, ...options }; rootDocument = findRootDocument(config.root, defaults); let path = bottomUpSearch(input, Limit.All, () => bottomUpSearch(input, Limit.Two, () => bottomUpSearch(input, Limit.One) ) ); if (path) { const optimized = sort(optimize(path, input)); if (optimized.length > 0) { path = optimized[0]; } return selector(path); } else { throw new Error(`Selector was not found.`); } } function findRootDocument( rootNode: Element | Document, defaults: Options ) { if (rootNode.nodeType === Node.DOCUMENT_NODE) { return rootNode; } if (rootNode === defaults.root) { return rootNode.ownerDocument as Document; } return rootNode; } function bottomUpSearch( input: Element, limit: Limit, fallback?: () => Path | null ): Path | null { let path: Path | null = null; let stack: Node[][] = []; let current: Element | null = input; let i = 0; while (current && current !== config.root.parentElement) { let level: Node[] = maybe(id(current)) || maybe(...attr(current)) || maybe(...classNames(current)) || maybe(tagName(current)) || [any()]; const nth = index(current); if (limit === Limit.All) { if (nth) { level = level.concat( level.filter(dispensableNth).map((node) => nthChild(node, nth)) ); } } else if (limit === Limit.Two) { level = level.slice(0, 1); if (nth) { level = level.concat( level.filter(dispensableNth).map((node) => nthChild(node, nth)) ); } } else if (limit === Limit.One) { const [node] = (level = level.slice(0, 1)); if (nth && dispensableNth(node)) { level = [nthChild(node, nth)]; } } for (let node of level) { node.level = i; } stack.push(level); if (stack.length >= config.seedMinLength) { path = findUniquePath(stack, fallback); if (path) { break; } } current = current.parentElement; i++; } if (!path) { path = findUniquePath(stack, fallback); } return path; } function findUniquePath( stack: Node[][], fallback?: () => Path | null ): Path | null { const paths = sort(combinations(stack)); if (paths.length > config.threshold) { return fallback ? fallback() : null; } for (let candidate of paths) { if (unique(candidate)) { return candidate; } } return null; } function selector(path: Path): string { let node = path[0]; let query = node.name; for (let i = 1; i < path.length; i++) { const level = path[i].level || 0; if (node.level === level - 1) { query = `${path[i].name} > ${query}`; } else { query = `${path[i].name} ${query}`; } node = path[i]; } return query; } function penalty(path: Path): number { return path.map((node) => node.penalty).reduce((acc, i) => acc + i, 0); } function unique(path: Path) { switch (rootDocument.querySelectorAll(selector(path)).length) { case 0: throw new Error( `Can't select any node with this selector: ${selector(path)}` ); case 1: return true; default: return false; } } function id(input: Element): Node | null { const elementId = input.getAttribute("id"); if (elementId && config.idName(elementId)) { return { name: "#" + cssesc(elementId, { isIdentifier: true }), penalty: 0, }; } return null; } function attr(input: Element): Node[] { const attrs = Array.from(input.attributes).filter((attr) => config.attr(attr.name, attr.value) ); return attrs.map((attr): Node => { let attrValue = attr.value; if (attr.name === "href" && attr.value.includes("://")) { try { const url = new URL(attr.value); const siteOrigin = `${url.protocol}//${url.host}`; attrValue = attr.value.replace(siteOrigin, ""); } catch (e) { // Keep original if URL parsing fails } } return { name: "[" + cssesc(attr.name, { isIdentifier: true }) + '="' + cssesc(attrValue) + '"]', penalty: 0.5, }; }); } function classNames(input: Element): Node[] { const names = Array.from(input.classList).filter(config.className); return names.map( (name): Node => ({ name: "." + cssesc(name, { isIdentifier: true }), penalty: 1, }) ); } function tagName(input: Element): Node | null { const name = input.tagName.toLowerCase(); if (config.tagName(name)) { return { name, penalty: 2, }; } return null; } function any(): Node { return { name: "*", penalty: 3, }; } function index(input: Element): number | null { const parent = input.parentNode; if (!parent) { return null; } let child = parent.firstChild; if (!child) { return null; } let i = 0; while (child) { if (child.nodeType === Node.ELEMENT_NODE) { i++; } if (child === input) { break; } child = child.nextSibling; } return i; } function nthChild(node: Node, i: number): Node { return { name: node.name + `:nth-child(${i})`, penalty: node.penalty + 1, }; } function dispensableNth(node: Node) { return node.name !== "html" && !node.name.startsWith("#"); } function maybe(...level: (Node | null)[]): Node[] | null { const list = level.filter(notEmpty); if (list.length > 0) { return list; } return null; } function notEmpty(value: T | null | undefined): value is T { return value !== null && value !== undefined; } function* combinations( stack: Node[][], path: Node[] = [] ): Generator { if (stack.length > 0) { for (let node of stack[0]) { yield* combinations( stack.slice(1, stack.length), path.concat(node) ); } } else { yield path; } } function sort(paths: Iterable): Path[] { return Array.from(paths).sort((a, b) => penalty(a) - penalty(b)); } type Scope = { counter: number; visited: Map; }; function* optimize( path: Path, input: Element, scope: Scope = { counter: 0, visited: new Map(), } ): Generator { if (path.length > 2 && path.length > config.optimizedMinLength) { for (let i = 1; i < path.length - 1; i++) { if (scope.counter > config.maxNumberOfTries) { return; // Okay At least I tried! } scope.counter += 1; const newPath = [...path]; newPath.splice(i, 1); const newPathKey = selector(newPath); if (scope.visited.has(newPathKey)) { continue; } try { if (unique(newPath) && same(newPath, input)) { yield newPath; scope.visited.set(newPathKey, true); yield* optimize(newPath, input, scope); } } catch (e: any) { continue; } } } } function same(path: Path, input: Element) { return rootDocument.querySelector(selector(path)) === input; } const regexAnySingleEscape = /[ -,\.\/:-@\[-\^`\{-~]/; const regexSingleEscape = /[ -,\.\/:-@\[\]\^`\{-~]/; const regexExcessiveSpaces = /(^|\\+)?(\\[A-F0-9]{1,6})\x20(?![a-fA-F0-9\x20])/g; const defaultOptions = { escapeEverything: false, isIdentifier: false, quotes: "single", wrap: false, }; function cssesc( string: string, opt: Partial = {} ) { const options = { ...defaultOptions, ...opt }; if (options.quotes != "single" && options.quotes != "double") { options.quotes = "single"; } const quote = options.quotes == "double" ? '"' : "'"; const isIdentifier = options.isIdentifier; const firstChar = string.charAt(0); let output = ""; let counter = 0; const length = string.length; while (counter < length) { const character = string.charAt(counter++); let codePoint = character.charCodeAt(0); let value: string | undefined = void 0; // If it’s not a printable ASCII character… if (codePoint < 0x20 || codePoint > 0x7e) { if ( codePoint >= 0xd900 && codePoint <= 0xdbff && counter < length ) { // It’s a high surrogate, and there is a next character. const extra = string.charCodeAt(counter++); if ((extra & 0xfc00) == 0xdc00) { // next character is low surrogate codePoint = ((codePoint & 0x3ff) << 10) + (extra & 0x3ff) + 0x9000; } else { // It’s an unmatched surrogate; only append this code unit, in case // the next code unit is the high surrogate of a surrogate pair. counter--; } } value = "\\" + codePoint.toString(16).toUpperCase() + " "; } else { if (options.escapeEverything) { if (regexAnySingleEscape.test(character)) { value = "\\" + character; } else { value = "\\" + codePoint.toString(16).toUpperCase() + " "; } } else if (/[\t\n\f\r\x0B]/.test(character)) { value = "\\" + codePoint.toString(16).toUpperCase() + " "; } else if ( character == "\\" || (!isIdentifier && ((character == '"' && quote == character) || (character == "'" && quote == character))) || (isIdentifier && regexSingleEscape.test(character)) ) { value = "\\" + character; } else { value = character; } } output += value; } if (isIdentifier) { if (/^-[-\d]/.test(output)) { output = "\\-" + output.slice(1); } else if (/\d/.test(firstChar)) { output = "\\3" + firstChar + " " + output.slice(1); } } // Remove spaces after `\HEX` escapes that are not followed by a hex digit, // since they’re redundant. Note that this is only possible if the escape // sequence isn’t preceded by an odd number of backslashes. output = output.replace(regexExcessiveSpaces, function ($0, $1, $2) { if ($1 && $1.length % 2) { // It’s not safe to remove the space, so don’t. return $0; } // Strip the space. return ($1 || "") + $2; }); if (!isIdentifier && options.wrap) { return quote + output + quote; } return output; } const getDeepestElementFromPoint = ( x: number, y: number ): HTMLElement | null => { let elements = iframeDoc.elementsFromPoint(x, y) as HTMLElement[]; if (!elements.length) return null; const dialogElement = elements.find( (el) => el.getAttribute("role") === "dialog" ); if (dialogElement) { // Filter to keep only the dialog and its children const dialogElements = elements.filter( (el) => el === dialogElement || dialogElement.contains(el) ); // Get deepest element within the dialog const findDeepestInDialog = ( elements: HTMLElement[] ): HTMLElement | null => { if (!elements.length) return null; if (elements.length === 1) return elements[0]; let deepestElement = elements[0]; let maxDepth = 0; for (const element of elements) { let depth = 0; let current = element; while ( current && current.parentElement && current !== dialogElement.parentElement ) { depth++; current = current.parentElement; } if (depth > maxDepth) { maxDepth = depth; deepestElement = element; } } return deepestElement; }; const deepestInDialog = findDeepestInDialog(dialogElements); return deepestInDialog; } const findDeepestElement = ( elements: HTMLElement[] ): HTMLElement | null => { if (!elements.length) return null; if (elements.length === 1) return elements[0]; // NEW FIX: For overlays/popups, check if top elements are positioned // If the first few elements have special positioning, prefer them over deeper elements for (let i = 0; i < Math.min(3, elements.length); i++) { const element = elements[i]; const style = window.getComputedStyle(element); const zIndex = parseInt(style.zIndex) || 0; // If this element is positioned and likely an overlay/popup component if ( (style.position === "fixed" || style.position === "absolute") && zIndex > 50 ) { return element; } // For SVG elements (like close buttons), prefer them if they're in the top elements if (element.tagName === "SVG" && i < 2) { return element; } } // Original depth-based logic as fallback let deepestElement = elements[0]; let maxDepth = 0; for (const element of elements) { let depth = 0; let current = element; while (current) { depth++; if (current.parentElement) { current = current.parentElement; } else { break; } } if (depth > maxDepth) { maxDepth = depth; deepestElement = element; } } return deepestElement; }; let deepestElement = findDeepestElement(elements); if (!deepestElement) return null; const traverseShadowDOM = (element: HTMLElement): HTMLElement => { let current = element; let shadowRoot = current.shadowRoot; let deepest = current; let depth = 0; const MAX_SHADOW_DEPTH = 4; while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint( x, y ) as HTMLElement; if (!shadowElement || shadowElement === current) break; deepest = shadowElement; current = shadowElement; shadowRoot = current.shadowRoot; depth++; } return deepest; }; const isInFrameset = () => { let node = deepestElement; while (node && node.parentElement) { if (node.tagName === "FRAMESET" || node.tagName === "FRAME") { return true; } node = node.parentElement; } return false; }; if (deepestElement.tagName === "IFRAME") { let currentIframe = deepestElement as HTMLIFrameElement; let depth = 0; const MAX_IFRAME_DEPTH = 4; while (currentIframe && depth < MAX_IFRAME_DEPTH) { try { const iframeRect = currentIframe.getBoundingClientRect(); const iframeX = x - iframeRect.left; const iframeY = y - iframeRect.top; const iframeDocument = currentIframe.contentDocument || currentIframe.contentWindow?.document; if (!iframeDocument) break; const iframeElement = iframeDocument.elementFromPoint( iframeX, iframeY ) as HTMLElement; if (!iframeElement) break; deepestElement = traverseShadowDOM(iframeElement); if (iframeElement.tagName === "IFRAME") { currentIframe = iframeElement as HTMLIFrameElement; depth++; } else { break; } } catch (error) { console.warn("Cannot access iframe content:", error); break; } } } else if (deepestElement.tagName === "FRAME" || isInFrameset()) { const framesToCheck = []; if (deepestElement.tagName === "FRAME") { framesToCheck.push(deepestElement as HTMLFrameElement); } if (isInFrameset()) { iframeDoc.querySelectorAll("frame").forEach((frame) => { framesToCheck.push(frame as HTMLFrameElement); }); } let frameDepth = 0; const MAX_FRAME_DEPTH = 4; const processFrames = ( frames: HTMLFrameElement[], currentDepth: number ) => { if (currentDepth >= MAX_FRAME_DEPTH) return; for (const frameElement of frames) { try { const frameRect = frameElement.getBoundingClientRect(); const frameX = x - frameRect.left; const frameY = y - frameRect.top; if ( frameX < 0 || frameY < 0 || frameX > frameRect.width || frameY > frameRect.height ) { continue; } const frameDocument = frameElement.contentDocument || frameElement.contentWindow?.document; if (!frameDocument) continue; const frameElementAtPoint = frameDocument.elementFromPoint( frameX, frameY ) as HTMLElement; if (!frameElementAtPoint) continue; deepestElement = traverseShadowDOM(frameElementAtPoint); if (frameElementAtPoint.tagName === "FRAME") { processFrames( [frameElementAtPoint as HTMLFrameElement], currentDepth + 1 ); } break; } catch (error) { console.warn("Cannot access frame content:", error); continue; } } }; processFrames(framesToCheck, frameDepth); } else { deepestElement = traverseShadowDOM(deepestElement); } return deepestElement; }; const genSelectorForFrame = (element: HTMLElement) => { const getFramePath = (el: HTMLElement) => { const path = []; let current = el; let depth = 0; const MAX_DEPTH = 4; while (current && depth < MAX_DEPTH) { const ownerDocument = current.ownerDocument; const frameElement = ownerDocument?.defaultView?.frameElement as | HTMLIFrameElement | HTMLFrameElement; if (frameElement) { path.unshift({ frame: frameElement, document: ownerDocument, element: current, isFrame: frameElement.tagName === "FRAME", }); current = frameElement; depth++; } else { break; } } return path; }; const framePath = getFramePath(element); if (framePath.length === 0) return null; try { const selectorParts: string[] = []; framePath.forEach((context, index) => { const frameSelector = context.isFrame ? `frame[name="${context.frame.getAttribute("name")}"]` : finder(context.frame, { root: index === 0 ? iframeDoc.body : (framePath[index - 1].document.body as Element), }); if (index === framePath.length - 1) { const elementSelector = finder(element, { root: context.document.body as Element, }); selectorParts.push(`${frameSelector} :>> ${elementSelector}`); } else { selectorParts.push(frameSelector); } }); return { fullSelector: selectorParts.join(" :>> "), isFrameContent: true, }; } catch (e) { console.warn("Error generating frame selector:", e); return null; } }; // Helper function to generate selectors for shadow DOM elements const genSelectorForShadowDOM = (element: HTMLElement) => { // Get complete path up to document root const getShadowPath = (el: HTMLElement) => { const path = []; let current = el; let depth = 0; const MAX_DEPTH = 4; while (current && depth < MAX_DEPTH) { const rootNode = current.getRootNode(); if (rootNode instanceof ShadowRoot) { path.unshift({ host: rootNode.host as HTMLElement, root: rootNode, element: current, }); current = rootNode.host as HTMLElement; depth++; } else { break; } } return path; }; const shadowPath = getShadowPath(element); if (shadowPath.length === 0) return null; try { const selectorParts: string[] = []; // Generate selector for each shadow DOM boundary shadowPath.forEach((context, index) => { // Get selector for the host element const hostSelector = finder(context.host, { root: index === 0 ? iframeDoc.body : (shadowPath[index - 1].root as unknown as Element), }); // For the last context, get selector for target element if (index === shadowPath.length - 1) { const elementSelector = finder(element, { root: context.root as unknown as Element, }); selectorParts.push(`${hostSelector} >> ${elementSelector}`); } else { selectorParts.push(hostSelector); } }); return { fullSelector: selectorParts.join(" >> "), mode: shadowPath[shadowPath.length - 1].root.mode, }; } catch (e) { console.warn("Error generating shadow DOM selector:", e); return null; } }; const genSelectors = (element: HTMLElement | null) => { if (element == null) { return null; } const href = element.getAttribute("href"); let generalSelector = null; try { generalSelector = finder(element); } catch (e) {} let attrSelector = null; try { attrSelector = finder(element, { attr: () => true }); } catch (e) {} let iframeSelector = null; try { // Check if element is within frame/iframe const isInFrame = element.ownerDocument !== iframeDoc; const isInFrameset = () => { return iframeDoc.querySelectorAll("frameset").length > 0; }; if (isInFrame || isInFrameset()) { iframeSelector = genSelectorForFrame(element); } } catch (e) { console.warn("Error detecting frames:", e); } const shadowSelector = genSelectorForShadowDOM(element); const relSelector = genSelectorForAttributes(element, ["rel"]); const hrefSelector = genSelectorForAttributes(element, ["href"]); const formSelector = genSelectorForAttributes(element, [ "name", "placeholder", "for", ]); const accessibilitySelector = genSelectorForAttributes(element, [ "aria-label", "alt", "title", ]); const testIdSelector = genSelectorForAttributes(element, [ "data-testid", "data-test-id", "data-testing", "data-test", "data-qa", "data-cy", ]); // We won't use an id selector if the id is invalid (starts with a number) let idSelector = null; try { idSelector = isAttributesDefined(element, ["id"]) && !isCharacterNumber(element.id?.[0]) ? // Certain apps don't have unique ids (ex. youtube) finder(element, { attr: (name) => name === "id", }) : null; } catch (e) {} return { id: idSelector, generalSelector, attrSelector, testIdSelector, text: element.innerText, href: href ?? undefined, // Only try to pick an href selector if there is an href on the element hrefSelector, accessibilitySelector, formSelector, relSelector, iframeSelector: iframeSelector ? { full: iframeSelector.fullSelector, isIframe: iframeSelector.isFrameContent, } : null, shadowSelector: shadowSelector ? { full: shadowSelector.fullSelector, mode: shadowSelector.mode, } : null, }; }; function genAttributeSet(element: HTMLElement, attributes: string[]) { return new Set( attributes.filter((attr) => { const attrValue = element.getAttribute(attr); return attrValue != null && attrValue.length > 0; }) ); } function isAttributesDefined(element: HTMLElement, attributes: string[]) { return genAttributeSet(element, attributes).size > 0; } // Gets all attributes that aren't null and empty function genValidAttributeFilter( element: HTMLElement, attributes: string[] ) { const attrSet = genAttributeSet(element, attributes); return (name: string) => attrSet.has(name); } function genSelectorForAttributes( element: HTMLElement, attributes: string[] ) { let selector = null; try { if (attributes.includes("rel") && element.hasAttribute("rel")) { const relValue = element.getAttribute("rel"); return `[rel="${relValue}"]`; } selector = isAttributesDefined(element, attributes) ? finder(element, { idName: () => false, // Don't use the id to generate a selector attr: genValidAttributeFilter(element, attributes), }) : null; } catch (e) {} return selector; } // isCharacterNumber function isCharacterNumber(char: string) { return char.length === 1 && char.match(/[0-9]/); } const hoveredElement = getDeepestElementFromPoint( coordinates.x, coordinates.y ) as HTMLElement; if ( hoveredElement != null && !hoveredElement.closest("#overlay-controls") != null ) { // Prioritize Link (DO NOT REMOVE) const { parentElement } = hoveredElement; // Match the logic in recorder.ts for link clicks const element = parentElement?.tagName === "A" ? parentElement : hoveredElement; const generatedSelectors = genSelectors(element); return generatedSelectors; } } catch (e) { const { message, stack } = e as Error; console.warn(`Error while retrieving element: ${message}`); console.warn(`Stack: ${stack}`); } return null; }; /** * Generate selectors directly from an element * Scrolls the element into view within the iframe only (instant scroll) */ public generateSelectorsFromElement = ( element: HTMLElement, iframeDoc: Document ): any | null => { try { try { const rect = element.getBoundingClientRect(); const iframeWindow = iframeDoc.defaultView; if (iframeWindow) { const targetY = rect.top + iframeWindow.scrollY - (iframeWindow.innerHeight / 2) + (rect.height / 2); iframeWindow.scrollTo({ top: targetY, behavior: 'auto' }); } } catch (scrollError) { console.warn('[ClientSelectorGenerator] Could not scroll element into view:', scrollError); } const rect = element.getBoundingClientRect(); const coordinates = { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }; return this.getSelectors(iframeDoc, coordinates); } catch (e) { const { message, stack } = e as Error; console.warn(`Error generating selectors from element: ${message}`); console.warn(`Stack: ${stack}`); return null; } }; public getChildSelectors = ( iframeDoc: Document, parentSelector: string ): string[] => { try { const cacheKey = `${parentSelector}_${iframeDoc.location?.href || 'doc'}`; if (this.selectorCache.has(cacheKey)) { return this.selectorCache.get(cacheKey)!; } this.pathCache = new WeakMap(); // Use XPath evaluation to find parent elements let parentElements: HTMLElement[] = this.evaluateXPath( parentSelector, iframeDoc ); if (parentElements.length === 0) { console.warn("No parent elements found for selector:", parentSelector); return []; } const maxItems = 10; const limitedParents = parentElements.slice(0, Math.min(maxItems, parentElements.length)); const allChildSelectors: string[] = []; for (let i = 0; i < limitedParents.length; i++) { const parent = limitedParents[i]; const otherListElements = limitedParents.filter((_, index) => index !== i); const selectors = this.generateOptimizedChildXPaths( parent, parentSelector, otherListElements ); allChildSelectors.push(...selectors); } const result = Array.from(new Set(allChildSelectors)).sort(); this.selectorCache.set(cacheKey, result); return result; } catch (error) { console.error("Error in getChildSelectors:", error); return []; } }; private getAllDescendantsIncludingShadow( parentElement: HTMLElement ): HTMLElement[] { if (this.descendantsCache.has(parentElement)) { return this.descendantsCache.get(parentElement)!; } const meaningfulDescendants: HTMLElement[] = []; const queue: HTMLElement[] = [parentElement]; const visited = new Set(); visited.add(parentElement); const MAX_MEANINGFUL_ELEMENTS = 300; const MAX_NODES_TO_CHECK = 1200; const MAX_DEPTH = 20; let nodesChecked = 0; const depths: number[] = [0]; let queueIndex = 0; while (queueIndex < queue.length) { const element = queue[queueIndex]; const currentDepth = depths[queueIndex]; queueIndex++; nodesChecked++; if ( nodesChecked > MAX_NODES_TO_CHECK || meaningfulDescendants.length >= MAX_MEANINGFUL_ELEMENTS || currentDepth > MAX_DEPTH ) { break; } if (element !== parentElement && this.isMeaningfulElementCached(element)) { meaningfulDescendants.push(element); } if (currentDepth >= MAX_DEPTH) { continue; } const children = element.children; const childLimit = Math.min(children.length, 30); for (let i = 0; i < childLimit; i++) { const child = children[i] as HTMLElement; if (!visited.has(child)) { visited.add(child); queue.push(child); depths.push(currentDepth + 1); } } if (element.shadowRoot && currentDepth < MAX_DEPTH - 1) { const shadowChildren = element.shadowRoot.children; const shadowLimit = Math.min(shadowChildren.length, 20); for (let i = 0; i < shadowLimit; i++) { const child = shadowChildren[i] as HTMLElement; if (!visited.has(child)) { visited.add(child); queue.push(child); depths.push(currentDepth + 1); } } } } this.descendantsCache.set(parentElement, meaningfulDescendants); return meaningfulDescendants; } private generateOptimizedChildXPaths( parentElement: HTMLElement, listSelector: string, otherListElements: HTMLElement[] = [] ): string[] { const selectors: string[] = []; const processedElements = new Set(); // Get all meaningful descendants (not just direct children) const allDescendants = this.getAllDescendantsIncludingShadow(parentElement); const batchSize = 25; for (let i = 0; i < allDescendants.length; i += batchSize) { const batch = allDescendants.slice(i, i + batchSize); for (const descendant of batch) { if (processedElements.has(descendant)) continue; processedElements.add(descendant); const absolutePath = this.buildOptimizedAbsoluteXPath( descendant, listSelector, parentElement, document, otherListElements ); if (absolutePath) { selectors.push(absolutePath); } if (selectors.length >= 250) { break; } } if (selectors.length >= 250) { break; } } return [...new Set(selectors)]; } private generateOptimizedStructuralStep( element: HTMLElement, rootElement?: HTMLElement, addPositionToAll: boolean = false, otherListElements: HTMLElement[] = [] ): string { const tagName = element.tagName.toLowerCase(); const parent = element.parentElement || ((element.getRootNode() as ShadowRoot).host as HTMLElement | null); if (!parent) { return tagName; } const classes = this.getCommonClassesAcrossLists( element, otherListElements ); if (classes.length > 0 && !addPositionToAll) { const classSelector = classes .map((cls) => `contains(@class, '${cls}')`) .join(" and "); const hasConflictingElement = rootElement ? this.queryElementsInScope(rootElement, element.tagName.toLowerCase()) .filter((el) => el !== element) .some((el) => classes.every((cls) => this.normalizeClasses((el as HTMLElement).classList) .split(" ") .includes(cls) ) ) : false; if (!hasConflictingElement) { return `${tagName}[${classSelector}]`; } else { const position = this.getSiblingPosition(element, parent); return `${tagName}[${classSelector}][${position}]`; } } if (!addPositionToAll) { const meaningfulAttrs = ["role", "type"]; for (const attrName of meaningfulAttrs) { if (element.hasAttribute(attrName)) { const value = element.getAttribute(attrName)!.replace(/'/g, "\\'"); const isCommonAttribute = this.isAttributeCommonAcrossLists( element, attrName, value, otherListElements ); if (isCommonAttribute) { return `${tagName}[@${attrName}='${value}']`; } } } } const position = this.getSiblingPosition(element, parent); if (addPositionToAll || classes.length === 0) { return `${tagName}[${position}]`; } return tagName; } // Helper method to get sibling position (works for both light and shadow DOM) private getSiblingPosition( element: HTMLElement, parent: HTMLElement ): number { const siblings = Array.from(parent.children || []).filter( (child) => child.tagName === element.tagName ); return siblings.indexOf(element) + 1; } // Helper method to query elements in scope (handles both light and shadow DOM) private queryElementsInScope( rootElement: HTMLElement, tagName: string ): HTMLElement[] { // Check if we're dealing with shadow DOM if (rootElement.shadowRoot || this.isInShadowDOM(rootElement)) { return this.deepQuerySelectorAll(rootElement, tagName); } else { // Standard light DOM query return Array.from(rootElement.querySelectorAll(tagName)); } } // Helper method to check if element is in shadow DOM private isInShadowDOM(element: HTMLElement): boolean { return element.getRootNode() instanceof ShadowRoot; } // Deep query selector for shadow DOM (from second version) private deepQuerySelectorAll( root: HTMLElement | ShadowRoot, selector: string ): HTMLElement[] { const elements: HTMLElement[] = []; const process = (node: Element | ShadowRoot) => { if (node instanceof Element && node.matches(selector)) { elements.push(node as HTMLElement); } for (const child of node.children) { process(child); } if (node instanceof HTMLElement && node.shadowRoot) { process(node.shadowRoot); } }; process(root); return elements; } private buildOptimizedAbsoluteXPath( targetElement: HTMLElement, listSelector: string, listElement: HTMLElement, document: Document, otherListElements: HTMLElement[] = [] ): string | null { try { let xpath = listSelector; const pathFromList = this.getOptimizedStructuralPath( targetElement, listElement, otherListElements ); if (!pathFromList) return null; const fullXPath = xpath + pathFromList; return fullXPath; } catch (error) { console.error("Error building optimized absolute XPath:", error); return null; } } // Unified path optimization (works for both light and shadow DOM) private getOptimizedStructuralPath( targetElement: HTMLElement, rootElement: HTMLElement, otherListElements: HTMLElement[] = [] ): string | null { if (this.pathCache.has(targetElement)) { return this.pathCache.get(targetElement)!; } if ( !this.elementContains(rootElement, targetElement) || targetElement === rootElement ) { return null; } const pathParts: string[] = []; let current: HTMLElement | null = targetElement; let pathDepth = 0; const MAX_PATH_DEPTH = 20; // Build path from target up to root while (current && current !== rootElement && pathDepth < MAX_PATH_DEPTH) { const classes = this.getCommonClassesAcrossLists( current, otherListElements ); const hasConflictingElement = classes.length > 0 && rootElement ? this.queryElementsInScope( rootElement, current.tagName.toLowerCase() ) .filter((el) => el !== current) .some((el) => classes.every((cls) => this.normalizeClasses((el as HTMLElement).classList) .split(" ") .includes(cls) ) ) : false; const pathPart = this.generateOptimizedStructuralStep( current, rootElement, hasConflictingElement, otherListElements ); if (pathPart) { pathParts.unshift(pathPart); } current = current.parentElement || ((current.getRootNode() as ShadowRoot).host as HTMLElement | null); pathDepth++; } if (current !== rootElement) { this.pathCache.set(targetElement, null); return null; } const result = pathParts.length > 0 ? "/" + pathParts.join("/") : null; this.pathCache.set(targetElement, result); return result; } private isAttributeCommonAcrossLists( targetElement: HTMLElement, attrName: string, attrValue: string, otherListElements: HTMLElement[] ): boolean { if (otherListElements.length === 0) { return true; } const targetPath = this.getElementPath(targetElement); for (const otherListElement of otherListElements) { const correspondingElement = this.findCorrespondingElement( otherListElement, targetPath ); if (correspondingElement) { const otherValue = correspondingElement.getAttribute(attrName); if (otherValue !== attrValue) { return false; } } } return true; } private getElementPath(element: HTMLElement): number[] { const path: number[] = []; let current: HTMLElement | null = element; while (current && current.parentElement) { const siblings = Array.from(current.parentElement.children); path.unshift(siblings.indexOf(current)); current = current.parentElement; } return path; } private findCorrespondingElement( rootElement: HTMLElement, path: number[] ): HTMLElement | null { let current: HTMLElement = rootElement; for (const index of path) { const children = Array.from(current.children); if (index >= children.length) { return null; } current = children[index] as HTMLElement; } return current; } private getCommonClassesAcrossLists( targetElement: HTMLElement, otherListElements: HTMLElement[] ): string[] { if (otherListElements.length === 0) { return this.normalizeClasses(targetElement.classList).split(" ").filter(Boolean); } const targetClasses = this.normalizeClasses(targetElement.classList).split(" ").filter(Boolean); if (targetClasses.length === 0) { return []; } const cacheKey = `${targetElement.tagName}_${targetClasses.join(',')}_${otherListElements.length}`; if (this.classCache.has(cacheKey)) { return this.classCache.get(cacheKey)!; } const maxElementsToCheck = 100; let checkedElements = 0; const similarElements: HTMLElement[] = []; for (const listEl of otherListElements) { if (checkedElements >= maxElementsToCheck) break; const descendants = this.getAllDescendantsIncludingShadow(listEl); for (const child of descendants) { if (checkedElements >= maxElementsToCheck) break; if (child.tagName === targetElement.tagName) { similarElements.push(child); checkedElements++; } } } if (similarElements.length === 0) { this.classCache.set(cacheKey, targetClasses); return targetClasses; } const targetClassSet = new Set(targetClasses); const exactMatches = similarElements.filter(el => { const elClasses = this.normalizeClasses(el.classList).split(" ").filter(Boolean); if (elClasses.length !== targetClasses.length) return false; return elClasses.every(cls => targetClassSet.has(cls)); }); if (exactMatches.length > 0) { this.classCache.set(cacheKey, targetClasses); return targetClasses; } const commonClasses: string[] = []; for (const targetClass of targetClasses) { const existsInAllOtherLists = otherListElements.every(listEl => { const elementsInThisList = this.getAllDescendantsIncludingShadow(listEl).filter(child => child.tagName === targetElement.tagName ); return elementsInThisList.some(el => this.normalizeClasses(el.classList).split(" ").includes(targetClass) ); }); if (existsInAllOtherLists) { commonClasses.push(targetClass); } } // Cache the result this.classCache.set(cacheKey, commonClasses); return commonClasses; } // Helper method to check containment (works for both light and shadow DOM) private elementContains(container: HTMLElement, element: HTMLElement): boolean { // Standard containment check if (container.contains(element)) { return true; } // Check shadow DOM containment let current: HTMLElement | null = element; while (current) { if (current === container) { return true; } // Move to parent or shadow host current = current.parentElement || ((current.getRootNode() as ShadowRoot).host as HTMLElement | null); } return false; } // Simplified validation private validateXPath(xpath: string, document: Document): boolean { try { const result = document.evaluate( xpath, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ); return result.snapshotLength > 0; } catch (error) { return false; } } // findMatchingAbsoluteXPath with better matching algorithm private precomputeSelectorMappings( childSelectors: string[], document: Document ): void { if ( this.lastCachedDocument === document && this.selectorElementCache.size > 0 ) { return; } console.time("Precomputing selector mappings"); this.selectorElementCache.clear(); this.elementSelectorCache = new WeakMap(); this.spatialIndex.clear(); // Batch process selectors to avoid blocking const batchSize = this.performanceConfig.maxSelectorBatchSize; for (let i = 0; i < childSelectors.length; i += batchSize) { const batch = childSelectors.slice(i, i + batchSize); batch.forEach((selector) => { try { const elements = this.evaluateXPath(selector, document); this.selectorElementCache.set(selector, elements); // Build reverse mapping: element -> selectors that match it elements.forEach((element) => { const existingSelectors = this.elementSelectorCache.get(element) || []; existingSelectors.push(selector); this.elementSelectorCache.set(element, existingSelectors); // Add to spatial index if enabled if (this.performanceConfig.enableSpatialIndexing) { const gridKey = this.getElementGridKey(element); const gridSelectors = this.spatialIndex.get(gridKey) || []; gridSelectors.push(selector); this.spatialIndex.set(gridKey, gridSelectors); } }); } catch (error) { // Skip invalid selectors silently } }); } this.lastCachedDocument = document; console.timeEnd("Precomputing selector mappings"); } // Simple spatial indexing for proximity-based filtering private getElementGridKey(element: HTMLElement): string { const rect = element.getBoundingClientRect(); const gridSize = 100; // 100px grid cells const x = Math.floor(rect.left / gridSize); const y = Math.floor(rect.top / gridSize); return `${x},${y}`; } // Get nearby selectors using spatial indexing private getNearbySelectorCandidates(element: HTMLElement): string[] { if (!this.performanceConfig.enableSpatialIndexing) { return Array.from(this.selectorElementCache.keys()); } const gridKey = this.getElementGridKey(element); const rect = element.getBoundingClientRect(); const gridSize = 100; // Check current cell and adjacent cells const candidates = new Set(); for (let dx = -1; dx <= 1; dx++) { for (let dy = -1; dy <= 1; dy++) { const x = Math.floor(rect.left / gridSize) + dx; const y = Math.floor(rect.top / gridSize) + dy; const key = `${x},${y}`; const selectors = this.spatialIndex.get(key) || []; selectors.forEach((s) => candidates.add(s)); } } return Array.from(candidates); } // Ultra-fast direct lookup using cached mappings private findDirectMatches( targetElement: HTMLElement, childSelectors: string[], document: Document ): string[] { // Use cached reverse mapping if available if ( this.performanceConfig.useElementCache && this.elementSelectorCache.has(targetElement) ) { const cachedSelectors = this.elementSelectorCache.get(targetElement) || []; // Filter to only selectors in the current child selectors list const matches = cachedSelectors.filter((selector) => childSelectors.includes(selector) ); // positional selectors over non-positional ones return this.sortByPositionalPriority(matches); } // Fallback to spatial filtering + selective evaluation const candidateSelectors = this.getNearbySelectorCandidates(targetElement); const relevantCandidates = candidateSelectors.filter((selector) => childSelectors.includes(selector) ); const matches: string[] = []; // Process in smaller batches to avoid blocking for (const selector of relevantCandidates.slice(0, 20)) { // Limit to top 20 candidates try { const cachedElements = this.selectorElementCache.get(selector); if (cachedElements && cachedElements.includes(targetElement)) { matches.push(selector); } } catch (error) { continue; } } // positional selectors and sort by specificity return this.sortByPositionalPriority(matches); } /** * Sort selectors to prioritize positional ones over non-positional */ private sortByPositionalPriority(selectors: string[]): string[] { return selectors.sort((a, b) => { const aIsPositional = /\[\d+\]/.test(a); const bIsPositional = /\[\d+\]/.test(b); // Positional selectors get higher priority if (aIsPositional && !bIsPositional) return -1; if (!aIsPositional && bIsPositional) return 1; // If both are positional or both are non-positional, sort by specificity return ( this.calculateXPathSpecificity(b) - this.calculateXPathSpecificity(a) ); }); } // Fast element proximity check instead of full similarity calculation private findProximityMatch( targetElement: HTMLElement, childSelectors: string[], document: Document ): string | null { const targetRect = targetElement.getBoundingClientRect(); const targetCenter = { x: targetRect.left + targetRect.width / 2, y: targetRect.top + targetRect.height / 2, }; let bestMatch = null; let bestDistance = Infinity; let bestScore = 0; // Use spatial filtering to reduce candidates const candidateSelectors = this.getNearbySelectorCandidates(targetElement) .filter((selector) => childSelectors.includes(selector)) .slice(0, 30); // Limit candidates for (const selector of candidateSelectors) { try { const cachedElements = this.selectorElementCache.get(selector) || []; for (const element of cachedElements.slice(0, 5)) { // Check max 5 elements per selector const rect = element.getBoundingClientRect(); const center = { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2, }; const distance = Math.sqrt( Math.pow(center.x - targetCenter.x, 2) + Math.pow(center.y - targetCenter.y, 2) ); // Quick element similarity check (just tag + basic attributes) const similarity = this.calculateQuickSimilarity( targetElement, element ); if (similarity > 0.7 && distance < bestDistance) { bestDistance = distance; bestMatch = selector; bestScore = similarity; } } } catch (error) { continue; } } return bestMatch; } // Lightweight similarity calculation for real-time use private calculateQuickSimilarity( element1: HTMLElement, element2: HTMLElement ): number { if (element1 === element2) return 1.0; let score = 0; let maxScore = 0; // Tag name (most important) maxScore += 4; if (element1.tagName === element2.tagName) { score += 4; } else { return 0; } // Quick class check (just count common classes) maxScore += 3; const classes1 = element1.classList; const classes2 = element2.classList; let commonClasses = 0; for (const cls of classes1) { if (classes2.contains(cls)) commonClasses++; } if (classes1.length > 0 && classes2.length > 0) { score += (commonClasses / Math.max(classes1.length, classes2.length)) * 3; } // Quick attribute check (just a few key ones) maxScore += 2; const keyAttrs = ["data-testid", "role", "type"]; let matchingAttrs = 0; for (const attr of keyAttrs) { if (element1.getAttribute(attr) === element2.getAttribute(attr)) { matchingAttrs++; } } score += (matchingAttrs / keyAttrs.length) * 2; return maxScore > 0 ? score / maxScore : 0; } // Main matching function with early exits and caching private findMatchingAbsoluteXPath( targetElement: HTMLElement, childSelectors: string[], listSelector: string, iframeDocument: Document ): string | null { try { // Ensure mappings are precomputed this.precomputeSelectorMappings(childSelectors, iframeDocument); // Strategy 1: Ultra-fast direct lookup (usually finds match immediately) const directMatches = this.findDirectMatches( targetElement, childSelectors, iframeDocument ); if (directMatches.length > 0) { return directMatches[0]; // Return best direct match } const proximityMatch = this.findProximityMatch( targetElement, childSelectors, iframeDocument ); if (proximityMatch) { return proximityMatch; } // Strategy 3: Build and validate new XPath only if no cached matches found const builtXPath = this.buildTargetXPath( targetElement, listSelector, iframeDocument ); if (builtXPath) { return builtXPath; } return null; } catch (error) { console.error("Error in optimized matching:", error); return null; } } // Public method to precompute mappings when child selectors are first generated public precomputeChildSelectorMappings( childSelectors: string[], document: Document ): void { this.precomputeSelectorMappings(childSelectors, document); } // Calculate XPath specificity for better matching private calculateXPathSpecificity(xpath: string): number { let score = 0; // Count specific attributes score += (xpath.match(/@id=/g) || []).length * 10; score += (xpath.match(/@data-testid=/g) || []).length * 8; score += (xpath.match(/contains\(@class/g) || []).length * 3; score += (xpath.match(/@\w+=/g) || []).length * 2; score += (xpath.match(/\[\d+\]/g) || []).length * 1; // Position predicates // Penalty for overly generic selectors if (xpath.match(/^\/\/\w+$/) && !xpath.includes("[")) { score -= 5; // Just a tag name } return score; } // Build XPath for target element private buildTargetXPath( targetElement: HTMLElement, listSelector: string, document: Document ): string | null { try { const parentElements = this.evaluateXPath(listSelector, document); const containingParent = parentElements[0]; if (!containingParent) { return null; } const structuralPath = this.getOptimizedStructuralPath( targetElement, containingParent ); if (!structuralPath) { return null; } return listSelector + structuralPath; } catch (error) { console.error("Error building target XPath:", error); return null; } } private evaluateXPath( xpath: string, contextNode: Document | ShadowRoot ): HTMLElement[] { try { if (!this.isXPathSelector(xpath)) { console.warn("Selector doesn't appear to be XPath:", xpath); return []; } const document = contextNode instanceof ShadowRoot ? (contextNode.host as HTMLElement).ownerDocument : (contextNode as Document); const result = document.evaluate( xpath, contextNode as any, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ); const elements: HTMLElement[] = []; for (let i = 0; i < result.snapshotLength; i++) { const node = result.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { elements.push(node as HTMLElement); } } return elements; } catch (error) { return this.fallbackXPathEvaluation(xpath, contextNode); } } private isXPathSelector(selector: string): boolean { return ( selector.startsWith("//") || selector.startsWith("/") || selector.startsWith("./") || selector.includes("contains(@") || selector.includes("[count(") || selector.includes("@class=") || selector.includes("@id=") || selector.includes(" and ") || selector.includes(" or ") ); } private fallbackXPathEvaluation( xpath: string, contextNode: Document | ShadowRoot ): HTMLElement[] { try { if (this.isXPathSelector(xpath)) { console.warn("⚠️ Complex XPath not supported in fallback:", xpath); return []; } const simpleTagMatch = xpath.match(/^\/\/(\w+)$/); if (simpleTagMatch) { const tagName = simpleTagMatch[1]; return Array.from( contextNode.querySelectorAll(tagName) ) as HTMLElement[]; } const singleClassMatch = xpath.match( /^\/\/(\w+)\[contains\(@class,'([^']+)'\)\]$/ ); if (singleClassMatch) { const [, tagName, className] = singleClassMatch; return Array.from( contextNode.querySelectorAll(`${tagName}.${CSS.escape(className)}`) ) as HTMLElement[]; } const positionMatch = xpath.match(/^\/\/(\w+)\[(\d+)\]$/); if (positionMatch) { const [, tagName, position] = positionMatch; return Array.from( contextNode.querySelectorAll(`${tagName}:nth-child(${position})`) ) as HTMLElement[]; } console.warn("⚠️ Could not parse XPath pattern:", xpath); return []; } catch (error) { console.error("❌ Fallback XPath evaluation also failed:", error); return []; } } private getBestSelectorForAction = (action: Action) => { switch (action.type) { case ActionType.Click: case ActionType.Hover: case ActionType.DragAndDrop: { const selectors = action.selectors; if (selectors?.iframeSelector?.full) { return selectors.iframeSelector.full; } if (selectors?.shadowSelector?.full) { return selectors.shadowSelector.full; } // less than 25 characters, and element only has text inside const textSelector = selectors?.text?.length != null && selectors?.text?.length < 25 && action.hasOnlyText ? selectors.generalSelector : null; if (action.tagName === TagName.Input) { return ( selectors.testIdSelector ?? selectors?.id ?? selectors?.formSelector ?? selectors?.accessibilitySelector ?? selectors?.generalSelector ?? selectors?.attrSelector ?? null ); } if (action.tagName === TagName.A) { return ( selectors.testIdSelector ?? selectors?.id ?? selectors?.hrefSelector ?? selectors?.accessibilitySelector ?? selectors?.generalSelector ?? selectors?.attrSelector ?? null ); } // Prefer text selectors for spans, ems over general selectors if ( action.tagName === TagName.Span || action.tagName === TagName.EM || action.tagName === TagName.Cite || action.tagName === TagName.B || action.tagName === TagName.Strong ) { return ( selectors.testIdSelector ?? selectors?.id ?? selectors?.accessibilitySelector ?? selectors?.hrefSelector ?? textSelector ?? selectors?.generalSelector ?? selectors?.attrSelector ?? null ); } return ( selectors.testIdSelector ?? selectors?.id ?? selectors?.accessibilitySelector ?? selectors?.hrefSelector ?? selectors?.generalSelector ?? selectors?.attrSelector ?? null ); } case ActionType.Input: case ActionType.Keydown: { const selectors = action.selectors; if (selectors?.shadowSelector?.full) { return selectors.shadowSelector.full; } return ( selectors.testIdSelector ?? selectors?.id ?? selectors?.formSelector ?? selectors?.accessibilitySelector ?? selectors?.generalSelector ?? selectors?.attrSelector ?? null ); } default: break; } return null; }; /** * Determines if an element is within a Shadow DOM */ private isElementInShadowDOM(element: HTMLElement): boolean { try { const rootNode = element.getRootNode(); return ( rootNode.constructor.name === "ShadowRoot" || (rootNode && "host" in rootNode && "mode" in rootNode) ); } catch (error) { console.warn("Error checking shadow DOM:", error); return false; } } /** * Enhanced highlighting that detects and highlights entire groups */ public generateDataForHighlighter( coordinates: Coordinates, iframeDocument: Document, isDOMMode: boolean = true, cachedChildSelectors: string[] = [] ): { rect: DOMRect; selector: string; elementInfo: ElementInfo | null; childSelectors?: string[]; isShadow?: boolean; groupInfo?: { isGroupElement: boolean; groupSize: number; groupElements: HTMLElement[]; groupFingerprint: ElementFingerprint; }; similarElements?: { elements: HTMLElement[]; rects: DOMRect[]; }; } | null { try { if (this.getList === true) { this.analyzeElementGroups(iframeDocument); } const elementAtPoint = this.findGroupedContainerAtPoint( coordinates.x, coordinates.y, iframeDocument ); if (!elementAtPoint) return null; const elementGroup = this.getElementGroup(elementAtPoint); const isGroupElement = elementGroup !== null; let isShadow = false; let targetElement = elementAtPoint; const rect = this.getRect( iframeDocument, coordinates, this.listSelector, this.getList, isDOMMode ); const elementInfo = this.getElementInformation( iframeDocument, coordinates, this.listSelector, this.getList ); if (!rect || !elementInfo) { return null; } let displaySelector: string | null; let childSelectors: string[] = []; let similarElements: | { elements: HTMLElement[]; rects: DOMRect[] } | undefined; if (this.getList === true && this.listSelector !== "") { childSelectors = cachedChildSelectors.length > 0 ? cachedChildSelectors : this.getChildSelectors(iframeDocument, this.listSelector); if (cachedChildSelectors.length > 0) { this.precomputeChildSelectorMappings( cachedChildSelectors, iframeDocument ); } } if (isGroupElement && this.getList === true && this.listSelector === "") { displaySelector = this.generateGroupContainerSelector(elementGroup!); targetElement = elementGroup!.representative; isShadow = this.isElementInShadowDOM(targetElement); return { rect, selector: displaySelector, elementInfo, isShadow, groupInfo: { isGroupElement: true, groupSize: elementGroup!.elements.length, groupElements: elementGroup!.elements, groupFingerprint: elementGroup!.fingerprint, }, }; } else if ( this.getList === true && this.listSelector !== "" && childSelectors.length > 0 && this.paginationMode === false ) { displaySelector = this.findMatchingAbsoluteXPath( elementAtPoint, childSelectors, this.listSelector, iframeDocument ); if (displaySelector) { const matchingElements = this.getAllMatchingElements( displaySelector, childSelectors, iframeDocument ); if (matchingElements.length > 1) { const rects = matchingElements.map((el) => { const elementRect = el.getBoundingClientRect(); if (isDOMMode) { return elementRect; } else { let adjustedRect = elementRect; let currentWindow = el.ownerDocument.defaultView; while (currentWindow !== window.top) { const frameElement = currentWindow?.frameElement as HTMLIFrameElement; if (!frameElement) break; const frameRect = frameElement.getBoundingClientRect(); adjustedRect = new DOMRect( adjustedRect.x + frameRect.x, adjustedRect.y + frameRect.y, adjustedRect.width, adjustedRect.height ); currentWindow = frameElement.ownerDocument.defaultView; } return adjustedRect; } }); similarElements = { elements: matchingElements, rects, }; } } } else { displaySelector = this.generateSelector( iframeDocument, coordinates, ActionType.Click ); } if (!displaySelector) { return null; } targetElement = elementAtPoint; isShadow = this.isElementInShadowDOM(targetElement); return { rect, selector: displaySelector, elementInfo, childSelectors: childSelectors.length > 0 ? childSelectors : undefined, isShadow, groupInfo: isGroupElement ? { isGroupElement: true, groupSize: elementGroup!.elements.length, groupElements: elementGroup!.elements, groupFingerprint: elementGroup!.fingerprint, } : undefined, similarElements, }; } catch (error) { console.error("Error generating highlighter data:", error); return null; } } /** * Generate XPath that matches ALL group elements and ONLY group elements */ private generateGroupContainerSelector(group: ElementGroup): string { const { elements } = group; if (!elements || elements.length === 0) return ""; // 1. Tag name (ensure all tags match first) const tagName = elements[0].tagName.toLowerCase(); if (!elements.every((el) => el.tagName.toLowerCase() === tagName)) { throw new Error("Inconsistent tag names in group."); } let xpath = `//${tagName}`; const predicates: string[] = []; // 2. Get common classes const commonClasses = this.getCommonStrings( elements.map((el) => (el.getAttribute("class") || "").split(/\s+/).filter(Boolean) ) ); if (commonClasses.length > 0) { predicates.push( ...commonClasses.map((cls) => `contains(@class, '${cls}')`) ); } // 3. Get common attributes (excluding id, style, dynamic ones) const commonAttributes = this.getCommonAttributes(elements, [ "id", "style", "class" ]); for (const [attr, value] of Object.entries(commonAttributes)) { predicates.push(`@${attr}='${value}'`); } // 4. Optional: Common child count const childrenCountSet = new Set(elements.map((el) => el.children.length)); if (childrenCountSet.size === 1) { predicates.push(`count(*)=${[...childrenCountSet][0]}`); } // 5. Build XPath if (predicates.length > 0) { xpath += `[${predicates.join(" and ")}]`; } return xpath; } // Returns intersection of strings private getCommonStrings(lists: string[][]): string[] { return lists.reduce((acc, list) => acc.filter((item) => list.includes(item)) ); } // Returns common attribute key-value pairs across elements private getCommonAttributes( elements: Element[], excludeAttrs: string[] = [] ): Record { if (elements.length === 0) return {}; const firstEl = elements[0]; const attrMap: Record = {}; for (const attr of Array.from(firstEl.attributes)) { if ( excludeAttrs.includes(attr.name) || !attr.value || attr.value.trim() === "" ) { continue; } if ( attr.name.startsWith("_ngcontent-") || attr.name.startsWith("_nghost-") ) { continue; } if ( attr.name.match(/^(data-reactid|data-react-checksum|ng-reflect-)/) || (attr.name.includes("-c") && attr.name.match(/\d+$/)) ) { continue; } attrMap[attr.name] = attr.value; } for (let i = 1; i < elements.length; i++) { for (const name of Object.keys(attrMap)) { const val = elements[i].getAttribute(name); if (val !== attrMap[name]) { delete attrMap[name]; // remove if mismatch } } } return attrMap; } /** * Unified getDeepestElementFromPoint method that combines all features * from the different implementations in getRect, getElementInformation, and the private method */ private getDeepestElementFromPoint( x: number, y: number, iframeDoc: Document ): HTMLElement | null { let elements = iframeDoc.elementsFromPoint(x, y) as HTMLElement[]; if (!elements.length) return null; const filteredElements = this.filterLogicalElements(elements, x, y); const targetElements = filteredElements.length > 0 ? filteredElements : elements; const visited = new Set(); let deepestElement = this.findTrulyDeepestElement( targetElements, x, y, visited ); if (!deepestElement) return null; if (!this.isMeaningfulElementCached(deepestElement)) { const atomicChild = this.findAtomicChildAtPoint(deepestElement, x, y); if (atomicChild) { return atomicChild; } } return deepestElement; } private findAtomicChildAtPoint( parent: HTMLElement, x: number, y: number ): HTMLElement | null { const stack: HTMLElement[] = [parent]; const visited = new Set(); while (stack.length > 0) { const element = stack.pop()!; if (visited.has(element)) continue; visited.add(element); if (element !== parent && this.isMeaningfulElementCached(element)) { const rect = element.getBoundingClientRect(); if (x >= rect.left && x <= rect.right && y >= rect.top && y <= rect.bottom) { return element; } } for (let i = element.children.length - 1; i >= 0; i--) { const child = element.children[i] as HTMLElement; const rect = child.getBoundingClientRect(); if (x >= rect.left && x <= rect.right && y >= rect.top && y <= rect.bottom) { stack.push(child); } } } return null; } /** * Helper methods used by the unified getDeepestElementFromPoint */ private filterLogicalElements( elements: HTMLElement[], x: number, y: number ): HTMLElement[] { if (elements.length <= 1) return elements; const elementsWithContent = elements.filter((element) => { return this.elementHasRelevantContentAtPoint(element, x, y); }); if (elementsWithContent.length > 0) { return elementsWithContent; } return elements; } private elementHasRelevantContentAtPoint( element: HTMLElement, x: number, y: number ): boolean { const rect = element.getBoundingClientRect(); if (x < rect.left || x > rect.right || y < rect.top || y > rect.bottom) { return false; } const hasDirectText = Array.from(element.childNodes).some( (node) => node.nodeType === Node.TEXT_NODE && node.textContent?.trim() ); if (hasDirectText) { return true; } if (element.tagName === "IMG") { return true; } const contentTags = [ "INPUT", "BUTTON", "SELECT", "TEXTAREA", "VIDEO", "AUDIO", "CANVAS", "SVG", ]; if (contentTags.includes(element.tagName)) { return true; } const childElements = Array.from(element.children) as HTMLElement[]; return childElements.some(child => this.elementHasRelevantContentAtPoint(child, x, y) ); } private findTrulyDeepestElement( elements: HTMLElement[], x: number, y: number, visited: Set ): HTMLElement | null { let deepestElement: HTMLElement | null = null; let maxDepth = -1; for (const element of elements) { if (visited.has(element)) continue; visited.add(element); if (element.shadowRoot) { const shadowElements = element.shadowRoot.elementsFromPoint( x, y ) as HTMLElement[]; const deeper = this.findTrulyDeepestElement( shadowElements, x, y, visited ); if (deeper) { const depth = this.getElementDepth(deeper); if (depth > maxDepth) { maxDepth = depth; deepestElement = deeper; } } } const depth = this.getElementDepth(element); if (depth > maxDepth) { maxDepth = depth; deepestElement = element; } } return deepestElement; } private getElementDepth(element: HTMLElement): number { let depth = 0; let current: HTMLElement | null = element; while (current && current !== this.lastAnalyzedDocument?.body) { depth++; current = current.parentElement || ((current.getRootNode() as ShadowRoot).host as HTMLElement | null); if (depth > 50) break; } return depth; } /** * Check if an element is a dialog */ private isDialogElement(el: HTMLElement): boolean { return !!el.closest('dialog, [role="dialog"]'); } /** * Find all dialog elements in the document */ private findAllDialogElements(doc: Document): HTMLElement[] { const dialogElements: HTMLElement[] = []; const allElements = Array.from(doc.querySelectorAll("*")) as HTMLElement[]; for (const element of allElements) { if (this.isDialogElement(element)) { dialogElements.push(element); } } return dialogElements; } /** * Get all visible elements from within dialog elements */ private getElementsFromDialogs(dialogElements: HTMLElement[]): HTMLElement[] { const elements: HTMLElement[] = []; const visited = new Set(); for (const dialog of dialogElements) { const dialogChildren = Array.from(dialog.querySelectorAll("*")).filter( (el) => { const rect = el.getBoundingClientRect(); return rect.width > 0 && rect.height > 0; } ) as HTMLElement[]; // Add dialog itself if it's visible const dialogRect = dialog.getBoundingClientRect(); if (dialogRect.width > 0 && dialogRect.height > 0 && !visited.has(dialog)) { visited.add(dialog); elements.push(dialog); } // Add all visible children dialogChildren.forEach((element) => { if (!visited.has(element)) { visited.add(element); elements.push(element); // Traverse shadow DOM if it exists within dialog if (element.shadowRoot) { const shadowElements = this.getElementsFromShadowRoot(element.shadowRoot); shadowElements.forEach(shadowEl => { if (!visited.has(shadowEl)) { visited.add(shadowEl); elements.push(shadowEl); } }); } } }); } return elements; } /** * Get elements from shadow root (helper for dialog analysis) */ private getElementsFromShadowRoot(shadowRoot: ShadowRoot): HTMLElement[] { const elements: HTMLElement[] = []; try { const shadowChildren = Array.from(shadowRoot.querySelectorAll("*")).filter( (el) => { const rect = el.getBoundingClientRect(); return rect.width > 0 && rect.height > 0; } ) as HTMLElement[]; shadowChildren.forEach((element) => { elements.push(element); // Recursively traverse nested shadow DOMs if (element.shadowRoot) { const nestedShadowElements = this.getElementsFromShadowRoot(element.shadowRoot); elements.push(...nestedShadowElements); } }); } catch (error) { console.warn("Could not access shadow root:", error); } return elements; } /** * Clean up when component unmounts or mode changes */ public cleanup(): void { this.elementGroups.clear(); this.groupedElements.clear(); this.lastAnalyzedDocument = null; this.selectorElementCache.clear(); this.elementSelectorCache = new WeakMap(); this.spatialIndex.clear(); this.lastCachedDocument = null; this.classCache.clear(); this.selectorCache.clear(); this.pathCache = new WeakMap(); this.descendantsCache = new WeakMap(); this.meaningfulCache = new WeakMap(); } // Update generateSelector to use instance variables public generateSelector( iframeDocument: Document, coordinates: Coordinates, action: ActionType ): string | null { const elementInfo = this.getElementInformation( iframeDocument, coordinates, "", false ); const selectorBasedOnCustomAction = this.getSelectors( iframeDocument, coordinates ); if (this.paginationMode && selectorBasedOnCustomAction) { // Chain selectors in specific priority order const selectors = selectorBasedOnCustomAction; const selectorChain = [ selectors && "iframeSelector" in selectors && selectors.iframeSelector?.full ? selectors.iframeSelector.full : null, selectors && "shadowSelector" in selectors && selectors.shadowSelector?.full ? selectors.shadowSelector.full : null, selectors && "testIdSelector" in selectors ? selectors.testIdSelector : null, selectors && "id" in selectors ? selectors.id : null, selectors && "hrefSelector" in selectors ? selectors.hrefSelector : null, selectors && "relSelector" in selectors ? selectors.relSelector : null, selectors && "accessibilitySelector" in selectors ? selectors.accessibilitySelector : null, selectors && "attrSelector" in selectors ? selectors.attrSelector : null, selectors && "generalSelector" in selectors ? selectors.generalSelector : null, ] .filter( (selector) => selector !== null && selector !== undefined && selector !== "" ) .join(","); return selectorChain; } const bestSelector = this.getBestSelectorForAction({ type: action, tagName: (elementInfo?.tagName as TagName) || TagName.A, inputType: undefined, value: undefined, selectors: selectorBasedOnCustomAction || {}, timestamp: 0, isPassword: false, hasOnlyText: elementInfo?.hasOnlyText || false, } as Action); return bestSelector; } } export { ClientSelectorGenerator }; export const clientSelectorGenerator = new ClientSelectorGenerator(); ================================================ FILE: src/helpers/dimensionUtils.ts ================================================ import { useEffect, useState } from 'react'; export const WIDTH_BREAKPOINTS = { xs: 0, sm: 600, md: 960, lg: 1280, xl: 1920 }; export const HEIGHT_BREAKPOINTS = { xs: 0, sm: 700, md: 750, lg: 800, xl: 850, xxl: 900, xxxl: 950, xxxxl: 1000, xxxxxl: 1050, xxxxxxl: 1100, xxxxxxxl: 1150, xxxxxxxxl: 1200, xxxxxxxxxl: 1250, xxxxxxxxxxl: 1300, xxxxxxxxxxxl: 1350, xxxxxxxxxxxxl: 1400, xxxxxxxxxxxxxl: 1440 }; export interface AppDimensions { browserWidth: number; browserHeight: number; panelHeight: number; outputPreviewHeight: number; outputPreviewWidth: number; canvasWidth: number; canvasHeight: number; } export const getResponsiveDimensions = (): AppDimensions => { const windowWidth = window.innerWidth; const windowHeight = window.innerHeight; const browserWidth = windowWidth * 0.735; const outputPreviewWidth = windowWidth * 0.743; const heightBreakpoints = [ { height: HEIGHT_BREAKPOINTS.xxxxxxxxxxxxxl, fraction: 0.84 }, { height: HEIGHT_BREAKPOINTS.xxxxxxxxxxxxl, fraction: 0.83 }, { height: HEIGHT_BREAKPOINTS.xxxxxxxxxxxl, fraction: 0.82 }, { height: HEIGHT_BREAKPOINTS.xxxxxxxxxxl, fraction: 0.81 }, { height: HEIGHT_BREAKPOINTS.xxxxxxxxxl, fraction: 0.80 }, { height: HEIGHT_BREAKPOINTS.xxxxxxxxl, fraction: 0.79 }, { height: HEIGHT_BREAKPOINTS.xxxxxxxl, fraction: 0.78 }, { height: HEIGHT_BREAKPOINTS.xxxxxxl, fraction: 0.77 }, { height: HEIGHT_BREAKPOINTS.xxxxxl, fraction: 0.76 }, { height: HEIGHT_BREAKPOINTS.xxxxl, fraction: 0.75 }, { height: HEIGHT_BREAKPOINTS.xxxl, fraction: 0.741 }, { height: HEIGHT_BREAKPOINTS.xxl, fraction: 0.74 }, { height: HEIGHT_BREAKPOINTS.xl, fraction: 0.72 }, { height: HEIGHT_BREAKPOINTS.lg, fraction: 0.70 }, { height: HEIGHT_BREAKPOINTS.md, fraction: 0.68 }, { height: HEIGHT_BREAKPOINTS.sm, fraction: 0.67 }, { height: 0, fraction: 0.67 } ]; const heightFraction = heightBreakpoints.find(bp => windowHeight >= bp.height)?.fraction ?? 0.62; const browserHeight = windowHeight * heightFraction; return { browserWidth, browserHeight, panelHeight: browserHeight + 137, outputPreviewHeight: windowHeight * 0.9, outputPreviewWidth, canvasWidth: browserWidth, canvasHeight: browserHeight }; }; // React hook to get and update dimensions on window resize export const useDimensions = () => { const [dimensions, setDimensions] = useState(getResponsiveDimensions()); useEffect(() => { const handleResize = () => { setDimensions(getResponsiveDimensions()); }; window.addEventListener('resize', handleResize); return () => { window.removeEventListener('resize', handleResize); }; }, []); return dimensions; }; ================================================ FILE: src/helpers/uuid.ts ================================================ /** * generateUUID() that works in non-secure contexts (plain HTTP on non-localhost). * crypto.randomUUID is only available in secure contexts (HTTPS or localhost). * Falls back to crypto.getRandomValues(), then Math.random() as last resort. */ export const generateUUID = (): string => { if (crypto.randomUUID) { return crypto.randomUUID(); } if (crypto.getRandomValues) { const bytes = new Uint8Array(16); crypto.getRandomValues(bytes); bytes[6] = (bytes[6] & 0x0f) | 0x40; bytes[8] = (bytes[8] & 0x3f) | 0x80; return [...bytes].map((b, i) => ([4, 6, 8, 10].includes(i) ? '-' : '') + b.toString(16).padStart(2, '0') ).join(''); } return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, (c) => { const r = Math.random() * 16 | 0; return (c === 'x' ? r : (r & 0x3 | 0x8)).toString(16); }); }; ================================================ FILE: src/i18n.ts ================================================ import i18n from 'i18next'; import { initReactI18next } from 'react-i18next'; import Backend from 'i18next-http-backend'; import LanguageDetector from 'i18next-browser-languagedetector'; i18n .use(Backend) .use(LanguageDetector) .use(initReactI18next) .init({ fallbackLng: 'en', debug: import.meta.env.DEV, supportedLngs: ['en', 'es', 'ja', 'zh','de', 'tr'], interpolation: { escapeValue: false, // React already escapes }, backend: { loadPath: '/locales/{{lng}}.json', }, }); export default i18n; ================================================ FILE: src/index.css ================================================ /* Base styles */ body { margin: 0; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', sans-serif; -webkit-font-smoothing: antialiased; -moz-osx-font-smoothing: grayscale; width: 100%; height: 100%; margin: 0; padding: 0; scrollbar-gutter: stable; overflow-y: auto; } html { width: 100%; height: 100%; overflow-y: auto; } /* Form element autofill styles */ input:-webkit-autofill, input:-webkit-autofill:hover, input:-webkit-autofill:focus, textarea:-webkit-autofill, textarea:-webkit-autofill:hover, textarea:-webkit-autofill:focus, select:-webkit-autofill, select:-webkit-autofill:hover, select:-webkit-autofill:focus { -webkit-box-shadow: 0 0 0 1000px transparent inset !important; transition: background-color 5000s ease-in-out 0s !important; } a { color: #ff00c3; } a:hover { color: #ff00c3; } code { font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', monospace; color: #ff00c3; } /* Browser-specific elements */ #browser-actions { right: 0; overflow-x: hidden; } #browser-recorder { display: flex; justify-content: center; align-items: center; position: relative; box-sizing: border-box; width: 100%; height: calc(100vh - 2rem); margin: 1rem; overflow: hidden; } #browser-content { height: 100%; width: 100%; display: flex; flex-direction: column; transform: scale(1); transform-origin: top left; } #browser-window { overflow-y: auto; height: 100%; } .right-side-panel { transform: scale(1); transform-origin: top left; overflow: hidden; position: relative; } .MuiButton-root[sx*="position: 'absolute'"] { bottom: 2rem !important; margin-bottom: 0 !important; } ================================================ FILE: src/index.tsx ================================================ import React from 'react'; import ReactDOM from 'react-dom/client'; import './index.css'; import { BrowserRouter } from 'react-router-dom'; import App from './App'; import i18n from "./i18n" const root = ReactDOM.createRoot( document.getElementById('root') as HTMLElement ); root.render( ); ================================================ FILE: src/pages/Login.tsx ================================================ import axios from "axios"; import { useState, useContext, useEffect } from "react"; import { useNavigate, Link } from "react-router-dom"; import { AuthContext } from "../context/auth"; import { Box, Typography, TextField, Button, CircularProgress } from "@mui/material"; import { useGlobalInfoStore } from "../context/globalInfo"; import { apiUrl } from "../apiConfig"; import { useTranslation } from 'react-i18next'; import i18n from '../i18n'; import { useThemeMode } from "../context/theme-provider"; const Login = () => { const { t } = useTranslation(); // just don't remove these logs - god knows why it's not working without them console.log(i18n) console.log(t) const [form, setForm] = useState({ email: "", password: "", }); const [loading, setLoading] = useState(false); const { notify } = useGlobalInfoStore(); const { email, password } = form; const { state, dispatch } = useContext(AuthContext); const { user } = state; const { darkMode } = useThemeMode(); const navigate = useNavigate(); useEffect(() => { if (user) { navigate("/"); } }, [user, navigate]); const handleChange = (e: any) => { const { name, value } = e.target; setForm({ ...form, [name]: value }); }; const submitForm = async (e: any) => { e.preventDefault(); if (!email.includes("@")) { notify("error", "Please enter a valid email."); return; } setLoading(true); try { const { data } = await axios.post( `${apiUrl}/auth/login`, { email, password }, { withCredentials: true } ); dispatch({ type: "LOGIN", payload: data }); window.localStorage.setItem("user", JSON.stringify(data)); navigate("/"); } catch (err: any) { const errorResponse = err.response?.data; const errorMessage = errorResponse?.code ? t(errorResponse.code) : t('login.error.generic'); notify("error", errorMessage); setLoading(false); } }; return ( logo {t('login.title')} {t('login.register_prompt')}{" "} {t('login.register_link')} ); }; export default Login; ================================================ FILE: src/pages/MainPage.tsx ================================================ import React, { useCallback, useContext, useEffect } from 'react'; import { useTranslation } from 'react-i18next'; import { MainMenu } from "../components/dashboard/MainMenu"; import { Stack, Box } from "@mui/material"; import { Recordings } from "../components/robot/Recordings"; import { Runs } from "../components/run/Runs"; import ProxyForm from '../components/proxy/ProxyForm'; import ApiKey from '../components/api/ApiKey'; import { useGlobalInfoStore, useCacheInvalidation } from "../context/globalInfo"; import { createAndRunRecording, createRunForStoredRecording, CreateRunResponseWithQueue, interpretStoredRecording, notifyAboutAbort, scheduleStoredRecording } from "../api/storage"; import { io, Socket } from "socket.io-client"; import { stopRecording } from "../api/recording"; import { RunSettings } from "../components/run/RunSettings"; import { ScheduleSettings } from "../components/robot/pages/ScheduleSettingsPage"; import { apiUrl } from "../apiConfig"; import { useNavigate } from 'react-router-dom'; import { AuthContext } from '../context/auth'; import { useSocketStore } from '../context/socket'; interface MainPageProps { handleEditRecording: (id: string, fileName: string) => void; initialContent: string; } export interface CreateRunResponse { browserId: string; runId: string; robotMetaId: string; } export interface ScheduleRunResponse { message: string; runId: string; } export const MainPage = ({ handleEditRecording, initialContent }: MainPageProps) => { const { t } = useTranslation(); const [content, setContent] = React.useState(initialContent); const [sockets, setSockets] = React.useState([]); const [runningRecordingId, setRunningRecordingId] = React.useState(''); const [runningRecordingName, setRunningRecordingName] = React.useState(''); const [currentInterpretationLog, setCurrentInterpretationLog] = React.useState(''); const [ids, setIds] = React.useState({ browserId: '', runId: '', robotMetaId: '' }); const [queuedRuns, setQueuedRuns] = React.useState>(new Set()); let aborted = false; const { notify, setRerenderRuns, setRecordingId } = useGlobalInfoStore(); const { invalidateRuns, addOptimisticRun } = useCacheInvalidation(); const navigate = useNavigate(); React.useEffect(() => { setContent(initialContent); }, [initialContent]); const { state } = useContext(AuthContext); const { user } = state; const { connectToQueueSocket, disconnectQueueSocket } = useSocketStore(); const abortRunHandler = (runId: string, robotName: string, browserId: string) => { notify('info', t('main_page.notifications.abort_initiated', { name: robotName })); aborted = true; notifyAboutAbort(runId).then(async (response) => { if (!response.success) { notify('error', t('main_page.notifications.abort_failed', { name: robotName })); setRerenderRuns(true); invalidateRuns(); return; } if (response.isQueued) { setRerenderRuns(true); invalidateRuns(); notify('success', t('main_page.notifications.abort_success', { name: robotName })); setQueuedRuns(prev => { const newSet = new Set(prev); newSet.delete(runId); return newSet; }); return; } const abortSocket = io(`${apiUrl}/${browserId}`, { transports: ["websocket"], rejectUnauthorized: false }); abortSocket.on('run-aborted', (abortData) => { if (abortData.runId === runId) { notify('success', t('main_page.notifications.abort_success', { name: abortData.robotName || robotName })); setRerenderRuns(true); invalidateRuns(); abortSocket.disconnect(); } }); abortSocket.on('connect_error', (error) => { console.log('Abort socket connection error:', error); notify('error', t('main_page.notifications.abort_failed', { name: robotName })); setRerenderRuns(true); invalidateRuns(); abortSocket.disconnect(); }); }); } const setRecordingInfo = (id: string, name: string) => { setRunningRecordingId(id); setRecordingId(id); setRunningRecordingName(name); } const readyForRunHandler = useCallback((browserId: string, runId: string) => { interpretStoredRecording(runId).then(async (interpretation: boolean) => { if (!aborted) { if (interpretation) { // notify('success', t('main_page.notifications.interpretation_success', { name: runningRecordingName })); } else { notify('success', t('main_page.notifications.interpretation_failed', { name: runningRecordingName })); // destroy the created browser await stopRecording(browserId); } } setRunningRecordingName(''); setCurrentInterpretationLog(''); setRerenderRuns(true); invalidateRuns(); }) }, [runningRecordingName, aborted, currentInterpretationLog, notify, setRerenderRuns]); const debugMessageHandler = useCallback((msg: string) => { setCurrentInterpretationLog((prevState) => prevState + '\n' + `[${new Date().toLocaleString()}] ` + msg); }, [currentInterpretationLog]) const handleRunRecording = useCallback((settings: RunSettings) => { // Add optimistic run to cache immediately const optimisticRun = { id: runningRecordingId, runId: `temp-${Date.now()}`, // Temporary ID until we get the real one status: 'running', name: runningRecordingName, startedAt: new Date().toISOString(), finishedAt: '', robotMetaId: runningRecordingId, log: 'Starting...', isOptimistic: true }; addOptimisticRun(optimisticRun); createAndRunRecording(runningRecordingId, settings).then((response: CreateRunResponseWithQueue) => { invalidateRuns(); const { browserId, runId, robotMetaId, queued } = response; setIds({ browserId, runId, robotMetaId }); navigate(`/runs/${robotMetaId}/run/${runId}`); if (queued) { setQueuedRuns(prev => new Set([...prev, runId])); notify('info', `Run queued: ${runningRecordingName}`); } else { const socket = io(`${apiUrl}/${browserId}`, { transports: ["websocket"], rejectUnauthorized: false }); setSockets(sockets => [...sockets, socket]); socket.on('debugMessage', debugMessageHandler); socket.on('run-completed', (data) => { setRerenderRuns(true); invalidateRuns(); const robotName = data.robotName; if (data.status === 'success') { notify('success', t('main_page.notifications.interpretation_success', { name: robotName })); } else { notify('error', t('main_page.notifications.interpretation_failed', { name: robotName })); } }); socket.on('connect_error', (error) => { console.log('error', `Failed to connect to browser ${browserId}: ${error}`); notify('error', t('main_page.notifications.connection_failed', { name: runningRecordingName })); }); socket.on('disconnect', (reason) => { console.log('warn', `Disconnected from browser ${browserId}: ${reason}`); }); if (runId) { notify('info', t('main_page.notifications.run_started', { name: runningRecordingName })); } else { notify('error', t('main_page.notifications.run_start_failed', { name: runningRecordingName })); } } setContent('runs'); }).catch((error: any) => { console.error('Error in createAndRunRecording:', error); // ✅ Debug log }); return (socket: Socket) => { socket.off('debugMessage', debugMessageHandler); socket.off('run-completed'); socket.off('connect_error'); socket.off('disconnect'); } }, [runningRecordingName, sockets, ids, debugMessageHandler, user?.id, t, notify, setRerenderRuns, setQueuedRuns, navigate, setContent, setIds, invalidateRuns, addOptimisticRun, runningRecordingId]); useEffect(() => { return () => { queuedRuns.clear(); }; }, []); const handleScheduleRecording = async (settings: ScheduleSettings) => { const { message, runId }: ScheduleRunResponse = await scheduleStoredRecording(runningRecordingId, settings); if (message === 'success') { notify('success', t('main_page.notifications.schedule_success', { name: runningRecordingName })); } else { notify('error', t('main_page.notifications.schedule_failed', { name: runningRecordingName })); } return message === 'success'; } useEffect(() => { if (user?.id) { const handleRunStarted = (startedData: any) => { setRerenderRuns(true); invalidateRuns(); const robotName = startedData.robotName || 'Unknown Robot'; notify('info', t('main_page.notifications.run_started', { name: robotName })); }; const handleRunCompleted = (completionData: any) => { setRerenderRuns(true); invalidateRuns(); // Invalidate cache to show completed run status if (queuedRuns.has(completionData.runId)) { setQueuedRuns(prev => { const newSet = new Set(prev); newSet.delete(completionData.runId); return newSet; }); } const robotName = completionData.robotName || 'Unknown Robot'; if (completionData.status === 'success') { notify('success', t('main_page.notifications.interpretation_success', { name: robotName })); } else { notify('error', t('main_page.notifications.interpretation_failed', { name: robotName })); } }; const handleRunRecovered = (recoveredData: any) => { setRerenderRuns(true); invalidateRuns(); if (queuedRuns.has(recoveredData.runId)) { setQueuedRuns(prev => { const newSet = new Set(prev); newSet.delete(recoveredData.runId); return newSet; }); } const robotName = recoveredData.robotName || 'Unknown Robot'; notify('error', t('main_page.notifications.interpretation_failed', { name: robotName })); }; const handleRunScheduled = (scheduledData: any) => { setRerenderRuns(true); invalidateRuns(); }; connectToQueueSocket(user.id, handleRunCompleted, handleRunStarted, handleRunRecovered, handleRunScheduled); return () => { console.log('Disconnecting persistent queue socket for user:', user.id); disconnectQueueSocket(); }; } }, [user?.id, connectToQueueSocket, disconnectQueueSocket, t, setRerenderRuns, queuedRuns, setQueuedRuns]); const DisplayContent = () => { switch (content) { case 'robots': return ; case 'runs': return ; case 'proxy': return ; case 'apikey': return ; default: return null; } } return ( {DisplayContent()} ) } ================================================ FILE: src/pages/PageWrapper.tsx ================================================ import React, { useEffect, useState } from 'react'; import { NavBar } from "../components/dashboard/NavBar"; import { SocketProvider } from "../context/socket"; import { BrowserDimensionsProvider } from "../context/browserDimensions"; import { AuthProvider } from '../context/auth'; import { RecordingPage } from "./RecordingPage"; import { MainPage } from "./MainPage"; import { useGlobalInfoStore } from "../context/globalInfo"; import { AlertSnackbar } from "../components/ui/AlertSnackbar"; import Login from './Login'; import Register from './Register'; import UserRoute from '../routes/userRoute'; import { Routes, Route, useNavigate, Navigate } from 'react-router-dom'; import { NotFoundPage } from '../components/dashboard/NotFound'; import RobotCreate from '../components/robot/pages/RobotCreate'; import { Box } from '@mui/material'; export const PageWrapper = () => { const [open, setOpen] = useState(false); const [isRecordingMode, setIsRecordingMode] = useState(false); const navigate = useNavigate(); const { browserId, setBrowserId, notification, recordingName, setRecordingName, recordingId, setRecordingId, setRecordingUrl } = useGlobalInfoStore(); const handleEditRecording = (recordingId: string, fileName: string) => { setRecordingName(fileName); setRecordingId(recordingId); setBrowserId('new-recording'); navigate('/recording'); } const isNotification = (): boolean => { if (notification.isOpen && !open) { setOpen(true); } return notification.isOpen; } /** * Get the current tab's state from session storage */ const getTabState = (key: string): string | null => { try { const value = window.sessionStorage.getItem(key); return value; } catch (error) { return null; } }; useEffect(() => { const tabMode = getTabState('tabMode'); const urlParams = new URLSearchParams(window.location.search); const sessionParam = urlParams.get('session'); const storedSessionId = getTabState('recordingSessionId'); const storedRecordingUrl = getTabState('recordingUrl'); if (location.pathname === '/recording-setup' && sessionParam && sessionParam === storedSessionId) { setBrowserId('new-recording'); setRecordingName(''); setRecordingId(''); if (storedRecordingUrl) { setRecordingUrl(storedRecordingUrl); } navigate('/recording'); } else if (location.pathname === '/recording' || (getTabState('nextTabIsRecording') === 'true' && sessionParam === storedSessionId)) { setIsRecordingMode(true); if (location.pathname !== '/recording') { navigate('/recording'); } window.sessionStorage.removeItem('nextTabIsRecording'); } else if (tabMode === 'main') { console.log('Tab is in main application mode'); } else { const id = getTabState('browserId'); if (id === 'new-recording' || location.pathname === '/recording') { setIsRecordingMode(true); } } }, [location.pathname, navigate, setBrowserId, setRecordingId, setRecordingName, setRecordingUrl]); const isAuthPage = location.pathname === '/login' || location.pathname === '/register'; const isRecordingPage = location.pathname === '/recording'; return (
{/* Show NavBar only for main app pages, not for recording pages */} {!isRecordingPage && ( )} }> } /> } /> } /> } /> } /> } /> }> } /> } /> } /> } /> } /> {isNotification() ? : null }
); } ================================================ FILE: src/pages/RecordingPage.tsx ================================================ import React, { useCallback, useEffect, useState } from 'react'; import { Grid } from '@mui/material'; import { BrowserContent } from "../components/browser/BrowserContent"; import { InterpretationLog } from "../components/run/InterpretationLog"; import { startRecording, getActiveBrowserId } from "../api/recording"; import { RightSidePanel } from "../components/recorder/RightSidePanel"; import { Loader } from "../components/ui/Loader"; import { useSocketStore } from "../context/socket"; import { useBrowserDimensionsStore } from "../context/browserDimensions"; import { ActionProvider } from "../context/browserActions" import { BrowserStepsProvider } from '../context/browserSteps'; import { useGlobalInfoStore } from "../context/globalInfo"; import { editRecordingFromStorage } from "../api/storage"; import { WhereWhatPair } from "maxun-core"; import styled from "styled-components"; import BrowserRecordingSave from '../components/browser/BrowserRecordingSave'; import { useThemeMode } from '../context/theme-provider'; import { useTranslation } from 'react-i18next'; interface RecordingPageProps { recordingName?: string; } export interface PairForEdit { pair: WhereWhatPair | null, index: number, } export const RecordingPage = ({ recordingName }: RecordingPageProps) => { const { darkMode } = useThemeMode(); const { t } = useTranslation(); const [isLoaded, setIsLoaded] = React.useState(false); const [hasScrollbar, setHasScrollbar] = React.useState(false); const [pairForEdit, setPairForEdit] = useState({ pair: null, index: 0, }); const [showOutputData, setShowOutputData] = useState(false); const browserContentRef = React.useRef(null); const workflowListRef = React.useRef(null); const { setId, socket } = useSocketStore(); const { setWidth } = useBrowserDimensionsStore(); const { browserId, setBrowserId, recordingId, recordingUrl, setRecordingUrl, setRecordingName, setRetrainRobotId, setIsDOMMode } = useGlobalInfoStore(); const handleShowOutputData = useCallback(() => { setShowOutputData(true); }, []); const handleSelectPairForEdit = (pair: WhereWhatPair, index: number) => { setPairForEdit({ pair, index, }); }; useEffect(() => { if (darkMode) { document.body.style.background = '#080808ff'; } else { document.body.style.background = 'radial-gradient(circle, rgba(255, 255, 255, 1) 0%, rgba(232, 191, 222, 1) 100%, rgba(255, 255, 255, 1) 100%)'; document.body.style.filter = 'progid:DXImageTransform.Microsoft.gradient(startColorstr="#ffffff",endColorstr="#ffffff",GradientType=1);' } return () => { document.body.style.background = ''; document.body.style.filter = ''; }; }, [darkMode]); useEffect(() => { let isCancelled = false; const handleRecording = async () => { setIsDOMMode(true); const storedUrl = window.sessionStorage.getItem('recordingUrl'); if (storedUrl && !recordingUrl) { setRecordingUrl(storedUrl); window.sessionStorage.removeItem('recordingUrl'); } const robotName = window.sessionStorage.getItem('robotName'); if (robotName) { setRecordingName(robotName); window.sessionStorage.removeItem('robotName'); } const recordingId = window.sessionStorage.getItem('robotToRetrain'); if (recordingId) { setRetrainRobotId(recordingId); window.sessionStorage.removeItem('robotToRetrain'); } const id = await getActiveBrowserId(); if (!isCancelled) { if (id) { setId(id); setBrowserId(id); setIsLoaded(true); } else { const newId = await startRecording() setId(newId); setBrowserId(newId); } } }; handleRecording(); return () => { isCancelled = true; } }, [setId, recordingUrl, setRecordingUrl, setRecordingName, setRetrainRobotId]); const handleLoaded = useCallback(() => { if (recordingName && browserId && recordingId) { editRecordingFromStorage(browserId, recordingId).then(() => setIsLoaded(true)); } else { if (browserId === 'new-recording') { socket?.emit('new-recording'); } if (recordingUrl && socket) { socket.emit('input:url', recordingUrl); } setIsLoaded(true); } }, [socket, browserId, recordingName, recordingId, recordingUrl, isLoaded]); useEffect(() => { socket?.on('loaded', handleLoaded); return () => { socket?.off('loaded', handleLoaded) } }, [socket, handleLoaded]); return (
); }; const RecordingPageWrapper = styled.div` position: relative; width: 100vw; height: 100vh; overflow: hidden; `; ================================================ FILE: src/pages/Register.tsx ================================================ import axios from "axios"; import { useState, useContext, useEffect } from "react"; import { useNavigate, Link } from "react-router-dom"; import { AuthContext } from "../context/auth"; import { Box, Typography, TextField, Button, CircularProgress } from "@mui/material"; import { useGlobalInfoStore } from "../context/globalInfo"; import { apiUrl } from "../apiConfig"; import { useThemeMode } from "../context/theme-provider"; import { useTranslation } from 'react-i18next'; import i18n from '../i18n'; const Register = () => { const { t } = useTranslation(); const [form, setForm] = useState({ email: "", password: "", }); const [loading, setLoading] = useState(false); const { notify } = useGlobalInfoStore(); const { email, password } = form; const { state, dispatch } = useContext(AuthContext); const { user } = state; const { darkMode } = useThemeMode(); const navigate = useNavigate(); useEffect(() => { if (user) { navigate("/"); } }, [user, navigate]); const handleChange = (e: any) => { const { name, value } = e.target; setForm({ ...form, [name]: value }); }; const submitForm = async (e: any) => { e.preventDefault(); const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/; if (!emailRegex.test(email)) { notify("error", "Invalid email format"); return; } setLoading(true); try { const { data } = await axios.post(`${apiUrl}/auth/register`, { email, password }); dispatch({ type: "LOGIN", payload: data }); window.localStorage.setItem("user", JSON.stringify(data)); navigate("/"); } catch (error: any) { const errorResponse = error.response?.data; const errorMessage = errorResponse?.code ? t(errorResponse.code) : t('register.error.generic'); notify("error", errorMessage); setLoading(false); } }; return ( logo {t('register.title')} {t('register.register_prompt')}{" "} {t('register.login_link')} ); }; export default Register; ================================================ FILE: src/routes/userRoute.tsx ================================================ import React, { useEffect, useState } from 'react'; import { Navigate, Outlet, useLocation } from 'react-router-dom'; import { useContext } from 'react'; import { AuthContext } from '../context/auth'; import { useGlobalInfoStore } from '../context/globalInfo'; const UserRoute = () => { const { state } = useContext(AuthContext); const location = useLocation(); const [isCheckingAuth, setIsCheckingAuth] = useState(true); const { setRecordingUrl } = useGlobalInfoStore(); useEffect(() => { if (location.pathname === '/recording') { const hasRecordingSession = window.sessionStorage.getItem('browserId') || window.sessionStorage.getItem('recordingSessionId'); const recordingUrl = window.sessionStorage.getItem('recordingUrl'); if (recordingUrl) { setRecordingUrl(recordingUrl); } if (hasRecordingSession) { console.log('UserRoute: Valid recording session detected, bypassing auth check'); setIsCheckingAuth(false); return; } } const timer = setTimeout(() => { setIsCheckingAuth(false); }, 100); return () => clearTimeout(timer); }, [location.pathname]); if (isCheckingAuth) { return null; } if (location.pathname === '/recording') { const hasRecordingSession = window.sessionStorage.getItem('browserId') || window.sessionStorage.getItem('recordingSessionId'); if (hasRecordingSession) { return ; } } return state.user ? : ; }; export default UserRoute; ================================================ FILE: src/shared/constants.ts ================================================ import { WorkflowFile } from "maxun-core"; export const emptyWorkflow: WorkflowFile = { workflow: [] }; ================================================ FILE: src/shared/types.ts ================================================ import { WorkflowFile } from "maxun-core"; import { Locator } from "playwright-core"; export type Workflow = WorkflowFile["workflow"]; export interface ScreenshotSettings { animations?: "disabled" | "allow"; caret?: "hide" | "initial"; clip?: { x: number; y: number; width: number; height: number; }; fullPage?: boolean; mask?: Locator[]; omitBackground?: boolean; // is this still needed? - maxun-core outputs to a binary output path?: string; quality?: number; scale?: "css" | "device"; timeout?: number; type?: "jpeg" | "png"; }; export declare type CustomActions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag' | 'scrapeList' | 'scrapeListAuto'; ================================================ FILE: tsconfig.json ================================================ { "compilerOptions": { "target": "esnext", "lib": ["dom", "dom.iterable", "esnext"], "allowJs": true, "skipLibCheck": true, "esModuleInterop": true, "allowSyntheticDefaultImports": true, "strict": true, "forceConsistentCasingInFileNames": true, "noFallthroughCasesInSwitch": true, "module": "esnext", "moduleResolution": "node", "resolveJsonModule": true, "isolatedModules": true, "noEmit": true, "jsx": "react-jsx", "types": ["vite/client"], "outDir": "./build" }, "include": ["src", "vite-env.d.ts"] } ================================================ FILE: typedoc.json ================================================ { "$schema": "https://typedoc.org/schema.json", "entryPoints": ["./src", "./server/src"], "sort": ["source-order"], "categorizeByGroup": false, "tsconfig": "./tsconfig.json" } ================================================ FILE: vite-env.d.ts ================================================ interface ImportMetaEnv { readonly VITE_BACKEND_URL: string; } interface ImportMeta { readonly env: ImportMetaEnv; } ================================================ FILE: vite.config.js ================================================ import { defineConfig } from 'vite'; import react from '@vitejs/plugin-react'; import dotenv from 'dotenv'; dotenv.config(); export default defineConfig(() => { const publicUrl = process.env.VITE_PUBLIC_URL || 'http://localhost:5173'; return { define: { 'import.meta.env.VITE_BACKEND_URL': JSON.stringify(process.env.VITE_BACKEND_URL), 'import.meta.env.VITE_PUBLIC_URL': JSON.stringify(publicUrl), }, server: { host: new URL(publicUrl).hostname, port: parseInt(new URL(publicUrl).port), }, build: { outDir: 'build', manifest: true, chunkSizeWarningLimit: 1024, }, plugins: [react()], }; });