Repository: dgtlmoon/changedetection.io Branch: master Commit: 5f9fa15a6ad4 Files: 427 Total size: 4.6 MB Directory structure: gitextract_1122jxp9/ ├── .dockerignore ├── .github/ │ ├── FUNDING.yml │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ └── feature_request.md │ ├── actions/ │ │ └── extract-memory-report/ │ │ └── action.yml │ ├── dependabot.yml │ ├── nginx-reverse-proxy-test.conf │ ├── test/ │ │ └── Dockerfile-alpine │ └── workflows/ │ ├── codeql-analysis.yml │ ├── containers.yml │ ├── pypi-release.yml │ ├── test-container-build.yml │ ├── test-only.yml │ └── test-stack-reusable-workflow.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .ruff.toml ├── COMMERCIAL_LICENCE.md ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README-pip.md ├── README.md ├── babel.cfg ├── changedetection.py ├── changedetectionio/ │ ├── .gitignore │ ├── PLUGIN_README.md │ ├── __init__.py │ ├── api/ │ │ ├── Import.py │ │ ├── Notifications.py │ │ ├── Search.py │ │ ├── Spec.py │ │ ├── SystemInfo.py │ │ ├── Tags.py │ │ ├── Watch.py │ │ ├── __init__.py │ │ └── auth.py │ ├── auth_decorator.py │ ├── blueprint/ │ │ ├── __init__.py │ │ ├── backups/ │ │ │ ├── __init__.py │ │ │ ├── restore.py │ │ │ └── templates/ │ │ │ ├── backup_create.html │ │ │ └── backup_restore.html │ │ ├── browser_steps/ │ │ │ ├── TODO.txt │ │ │ └── __init__.py │ │ ├── check_proxies/ │ │ │ └── __init__.py │ │ ├── imports/ │ │ │ ├── __init__.py │ │ │ ├── importer.py │ │ │ └── templates/ │ │ │ └── import.html │ │ ├── price_data_follower/ │ │ │ └── __init__.py │ │ ├── rss/ │ │ │ ├── __init__.py │ │ │ ├── _util.py │ │ │ ├── blueprint.py │ │ │ ├── main_feed.py │ │ │ ├── single_watch.py │ │ │ └── tag.py │ │ ├── settings/ │ │ │ ├── __init__.py │ │ │ └── templates/ │ │ │ ├── notification-log.html │ │ │ └── settings.html │ │ ├── tags/ │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── form.py │ │ │ └── templates/ │ │ │ ├── edit-tag.html │ │ │ └── groups-overview.html │ │ ├── ui/ │ │ │ ├── __init__.py │ │ │ ├── diff.py │ │ │ ├── edit.py │ │ │ ├── notification.py │ │ │ ├── preview.py │ │ │ ├── templates/ │ │ │ │ ├── clear_all_history.html │ │ │ │ ├── diff-offscreen-options.html │ │ │ │ ├── diff.html │ │ │ │ ├── edit.html │ │ │ │ └── preview.html │ │ │ └── views.py │ │ └── watchlist/ │ │ ├── __init__.py │ │ └── templates/ │ │ └── watch-overview.html │ ├── browser_steps/ │ │ ├── __init__.py │ │ └── browser_steps.py │ ├── conditions/ │ │ ├── __init__.py │ │ ├── blueprint.py │ │ ├── default_plugin.py │ │ ├── exceptions.py │ │ ├── form.py │ │ ├── pluggy_interface.py │ │ └── plugins/ │ │ ├── __init__.py │ │ ├── levenshtein_plugin.py │ │ └── wordcount_plugin.py │ ├── content_fetchers/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── exceptions/ │ │ │ └── __init__.py │ │ ├── playwright.py │ │ ├── puppeteer.py │ │ ├── requests.py │ │ ├── res/ │ │ │ ├── __init__.py │ │ │ ├── favicon-fetcher.js │ │ │ ├── lock-elements-sizing.js │ │ │ ├── stock-not-in-stock.js │ │ │ ├── unlock-elements-sizing.js │ │ │ └── xpath_element_scraper.js │ │ ├── screenshot_handler.py │ │ └── webdriver_selenium.py │ ├── custom_queue.py │ ├── diff/ │ │ ├── __init__.py │ │ └── tokenizers/ │ │ ├── __init__.py │ │ ├── natural_text.py │ │ └── words_and_html.py │ ├── favicon_utils.py │ ├── flask_app.py │ ├── forms.py │ ├── gc_cleanup.py │ ├── html_tools.py │ ├── is_safe_url.py │ ├── jinja2_custom/ │ │ ├── __init__.py │ │ ├── extensions/ │ │ │ ├── TimeExtension.py │ │ │ └── __init__.py │ │ ├── plugins/ │ │ │ ├── __init__.py │ │ │ └── regex.py │ │ └── safe_jinja.py │ ├── languages.py │ ├── model/ │ │ ├── App.py │ │ ├── Tag.py │ │ ├── Tags.py │ │ ├── Watch.py │ │ ├── __init__.py │ │ ├── persistence.py │ │ └── schema_utils.py │ ├── notification/ │ │ ├── __init__.py │ │ ├── apprise_plugin/ │ │ │ ├── __init__.py │ │ │ ├── assets.py │ │ │ ├── custom_handlers.py │ │ │ └── discord.py │ │ ├── email_helpers.py │ │ └── handler.py │ ├── notification_service.py │ ├── pluggy_interface.py │ ├── processors/ │ │ ├── README.md │ │ ├── __init__.py │ │ ├── base.py │ │ ├── exceptions.py │ │ ├── extract.py │ │ ├── image_ssim_diff/ │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── difference.py │ │ │ ├── edit_hook.py │ │ │ ├── forms.py │ │ │ ├── image_handler/ │ │ │ │ ├── __init__.py │ │ │ │ ├── isolated_libvips.py │ │ │ │ ├── isolated_opencv.py │ │ │ │ └── libvips_handler.py │ │ │ ├── preview.py │ │ │ ├── processor.py │ │ │ ├── templates/ │ │ │ │ └── image_ssim_diff/ │ │ │ │ ├── diff.html │ │ │ │ └── preview.html │ │ │ └── util.py │ │ ├── magic.py │ │ ├── restock_diff/ │ │ │ ├── __init__.py │ │ │ ├── api.yaml │ │ │ ├── forms.py │ │ │ ├── processor.py │ │ │ └── pure_python_extractor.py │ │ ├── templates/ │ │ │ └── extract.html │ │ └── text_json_diff/ │ │ ├── __init__.py │ │ ├── difference.py │ │ └── processor.py │ ├── pytest.ini │ ├── queue_handlers.py │ ├── queuedWatchMetaData.py │ ├── realtime/ │ │ ├── README.md │ │ ├── __init__.py │ │ ├── events.py │ │ └── socket_server.py │ ├── rss_tools.py │ ├── run_basic_tests.sh │ ├── run_custom_browser_url_tests.sh │ ├── run_proxy_tests.sh │ ├── run_socks_proxy_tests.sh │ ├── static/ │ │ ├── favicons/ │ │ │ ├── browserconfig.xml │ │ │ └── site.webmanifest │ │ ├── js/ │ │ │ ├── browser-steps.js │ │ │ ├── comparison-slider.js │ │ │ ├── conditions.js │ │ │ ├── csrf.js │ │ │ ├── diff-overview.js │ │ │ ├── diff-render.js │ │ │ ├── flask-toast-bridge.js │ │ │ ├── global-settings.js │ │ │ ├── hamburger-menu.js │ │ │ ├── language-selector.js │ │ │ ├── modal.js │ │ │ ├── notifications.js │ │ │ ├── plugins.js │ │ │ ├── preview.js │ │ │ ├── realtime.js │ │ │ ├── recheck-proxy.js │ │ │ ├── scheduler.js │ │ │ ├── search-modal.js │ │ │ ├── snippet-to-image.js │ │ │ ├── stepper.js │ │ │ ├── tabs.js │ │ │ ├── toast.js │ │ │ ├── toggle-theme.js │ │ │ ├── vis.js │ │ │ ├── visual-selector.js │ │ │ ├── watch-overview.js │ │ │ └── watch-settings.js │ │ └── styles/ │ │ ├── .dockerignore │ │ ├── .gitignore │ │ ├── diff-image.css │ │ ├── diff.css │ │ ├── package.json │ │ ├── pure-min.css │ │ ├── scss/ │ │ │ ├── _settings.scss │ │ │ ├── diff-image.scss │ │ │ ├── diff.scss │ │ │ ├── parts/ │ │ │ │ ├── _action_sidebar.scss │ │ │ │ ├── _arrows.scss │ │ │ │ ├── _browser-steps.scss │ │ │ │ ├── _conditions_table.scss │ │ │ │ ├── _darkmode.scss │ │ │ │ ├── _diff_image.scss │ │ │ │ ├── _edit.scss │ │ │ │ ├── _extra_browsers.scss │ │ │ │ ├── _extra_proxies.scss │ │ │ │ ├── _hamburger_menu.scss │ │ │ │ ├── _language.scss │ │ │ │ ├── _lister_extra.scss │ │ │ │ ├── _login_form.scss │ │ │ │ ├── _love.scss │ │ │ │ ├── _menu.scss │ │ │ │ ├── _minitabs.scss │ │ │ │ ├── _modal.scss │ │ │ │ ├── _notification_bubble.scss │ │ │ │ ├── _pagination.scss │ │ │ │ ├── _preview_text_filter.scss │ │ │ │ ├── _search_modal.scss │ │ │ │ ├── _socket.scss │ │ │ │ ├── _spinners.scss │ │ │ │ ├── _tabs.scss │ │ │ │ ├── _toast.scss │ │ │ │ ├── _variables.scss │ │ │ │ ├── _visualselector.scss │ │ │ │ ├── _watch_table-mobile.scss │ │ │ │ ├── _watch_table.scss │ │ │ │ └── _widgets.scss │ │ │ └── styles.scss │ │ └── styles.css │ ├── store/ │ │ ├── __init__.py │ │ ├── base.py │ │ ├── file_saving_datastore.py │ │ └── updates.py │ ├── strtobool.py │ ├── templates/ │ │ ├── IMPORTANT.md │ │ ├── _common_fields.html │ │ ├── _helpers.html │ │ ├── base.html │ │ ├── edit/ │ │ │ ├── include_subtract.html │ │ │ └── text-options.html │ │ ├── login.html │ │ └── menu.html │ ├── test_cli_opts.sh │ ├── tests/ │ │ ├── __init__.py │ │ ├── apprise/ │ │ │ ├── test_apprise_asset.py │ │ │ └── test_apprise_custom_api_call.py │ │ ├── conftest.py │ │ ├── custom_browser_url/ │ │ │ ├── __init__.py │ │ │ └── test_custom_browser_url.py │ │ ├── fetchers/ │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── test_content.py │ │ │ └── test_custom_js_before_content.py │ │ ├── import/ │ │ │ └── spreadsheet.xlsx │ │ ├── itemprop_test_examples/ │ │ │ ├── README.md │ │ │ └── a.txt │ │ ├── plugins/ │ │ │ └── test_processor.py │ │ ├── proxy_list/ │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── proxies.json-example │ │ │ ├── squid-auth.conf │ │ │ ├── squid-passwords.txt │ │ │ ├── squid.conf │ │ │ ├── test_multiple_proxy.py │ │ │ ├── test_noproxy.py │ │ │ ├── test_proxy.py │ │ │ ├── test_proxy_noconnect.py │ │ │ └── test_select_custom_proxy.py │ │ ├── proxy_socks5/ │ │ │ ├── proxies.json-example │ │ │ ├── proxies.json-example-noauth │ │ │ ├── test_socks5_proxy.py │ │ │ └── test_socks5_proxy_sources.py │ │ ├── restock/ │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ └── test_restock.py │ │ ├── smtp/ │ │ │ ├── smtp-test-server.py │ │ │ └── test_notification_smtp.py │ │ ├── test_access_control.py │ │ ├── test_add_replace_remove_filter.py │ │ ├── test_api.py │ │ ├── test_api_notification_urls_validation.py │ │ ├── test_api_notifications.py │ │ ├── test_api_openapi.py │ │ ├── test_api_search.py │ │ ├── test_api_security.py │ │ ├── test_api_tags.py │ │ ├── test_auth.py │ │ ├── test_automatic_follow_ldjson_price.py │ │ ├── test_backend.py │ │ ├── test_backup.py │ │ ├── test_basic_socketio.py │ │ ├── test_block_while_text_present.py │ │ ├── test_clone.py │ │ ├── test_commit_persistence.py │ │ ├── test_conditions.py │ │ ├── test_css_selector.py │ │ ├── test_datastore_isolation.py │ │ ├── test_element_removal.py │ │ ├── test_encoding.py │ │ ├── test_errorhandling.py │ │ ├── test_extract_csv.py │ │ ├── test_extract_regex.py │ │ ├── test_filter_exist_changes.py │ │ ├── test_filter_failure_notification.py │ │ ├── test_group.py │ │ ├── test_history_consistency.py │ │ ├── test_html_to_text.py │ │ ├── test_i18n.py │ │ ├── test_ignore.py │ │ ├── test_ignore_regex_text.py │ │ ├── test_ignore_text.py │ │ ├── test_ignorehyperlinks.py │ │ ├── test_ignorestatuscode.py │ │ ├── test_ignorewhitespace.py │ │ ├── test_import.py │ │ ├── test_jinja2.py │ │ ├── test_jsonpath_jq_selector.py │ │ ├── test_live_preview.py │ │ ├── test_nonrenderable_pages.py │ │ ├── test_notification.py │ │ ├── test_notification_errors.py │ │ ├── test_obfuscations.py │ │ ├── test_pdf.py │ │ ├── test_preview_endpoints.py │ │ ├── test_queue_handler.py │ │ ├── test_request.py │ │ ├── test_restock_itemprop.py │ │ ├── test_rss.py │ │ ├── test_rss_group.py │ │ ├── test_rss_reader_mode.py │ │ ├── test_rss_single_watch.py │ │ ├── test_scheduler.py │ │ ├── test_search.py │ │ ├── test_security.py │ │ ├── test_settings_tag_force_reprocess.py │ │ ├── test_share_watch.py │ │ ├── test_source.py │ │ ├── test_trigger.py │ │ ├── test_trigger_regex.py │ │ ├── test_trigger_regex_with_filter.py │ │ ├── test_ui.py │ │ ├── test_unique_lines.py │ │ ├── test_watch_edited_flag.py │ │ ├── test_watch_fields_storage.py │ │ ├── test_xpath_default_namespace.py │ │ ├── test_xpath_selector.py │ │ ├── test_xpath_selector_unit.py │ │ ├── unit/ │ │ │ ├── __init__.py │ │ │ ├── test-content/ │ │ │ │ ├── README.md │ │ │ │ ├── after-2.txt │ │ │ │ ├── after.txt │ │ │ │ └── before.txt │ │ │ ├── test_conditions.py │ │ │ ├── test_html_to_text.py │ │ │ ├── test_jinja2_security.py │ │ │ ├── test_notification_diff.py │ │ │ ├── test_restock_logic.py │ │ │ ├── test_scheduler.py │ │ │ ├── test_semver.py │ │ │ ├── test_time_extension.py │ │ │ ├── test_time_handler.py │ │ │ └── test_watch_model.py │ │ ├── util.py │ │ └── visualselector/ │ │ ├── __init__.py │ │ ├── conftest.py │ │ └── test_fetch_data.py │ ├── time_handler.py │ ├── translations/ │ │ ├── README.md │ │ ├── cs/ │ │ │ └── LC_MESSAGES/ │ │ │ ├── messages.mo │ │ │ └── messages.po │ │ ├── de/ │ │ │ └── LC_MESSAGES/ │ │ │ ├── messages.mo │ │ │ └── messages.po │ │ ├── en_GB/ │ │ │ └── LC_MESSAGES/ │ │ │ ├── messages.mo │ │ │ └── messages.po │ │ ├── en_US/ │ │ │ └── LC_MESSAGES/ │ │ │ ├── messages.mo │ │ │ └── messages.po │ │ ├── es/ │ │ │ └── LC_MESSAGES/ │ │ │ ├── messages.mo │ │ │ └── messages.po │ │ ├── fr/ │ │ │ └── LC_MESSAGES/ │ │ │ ├── messages.mo │ │ │ └── messages.po │ │ ├── it/ │ │ │ └── LC_MESSAGES/ │ │ │ ├── messages.mo │ │ │ └── messages.po │ │ ├── ko/ │ │ │ └── LC_MESSAGES/ │ │ │ ├── messages.mo │ │ │ └── messages.po │ │ ├── messages.pot │ │ ├── uk/ │ │ │ └── LC_MESSAGES/ │ │ │ ├── messages.mo │ │ │ └── messages.po │ │ ├── zh/ │ │ │ └── LC_MESSAGES/ │ │ │ ├── messages.mo │ │ │ └── messages.po │ │ └── zh_Hant_TW/ │ │ └── LC_MESSAGES/ │ │ ├── messages.mo │ │ └── messages.po │ ├── validate_url.py │ ├── widgets/ │ │ ├── __init__.py │ │ ├── ternary_boolean.py │ │ └── test_custom_text.py │ ├── worker.py │ └── worker_pool.py ├── docker-compose.yml ├── docker-entrypoint.sh ├── docs/ │ ├── .gitignore │ ├── README.md │ ├── api-spec.yaml │ ├── api_v1/ │ │ └── index.html │ └── package.json ├── requirements.txt ├── runtime.txt ├── setup.cfg └── setup.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dockerignore ================================================ # Git .git/ .gitignore # GitHub .github/ # Byte-compiled / optimized / DLL files **/__pycache__ **/*.py[cod] # Caches .mypy_cache/ .pytest_cache/ .ruff_cache/ # Distribution / packaging build/ dist/ *.egg-info* # Virtual environment .env .venv/ venv/ # IntelliJ IDEA .idea/ # Visual Studio .vscode/ # Test and development files test-datastore/ tests/ *.md !README.md # Temporary and log files *.log *.tmp tmp/ temp/ # Training data and large files train-data/ works-data/ # Container files Dockerfile* docker-compose*.yml .dockerignore # Development certificates and keys *.pem *.key *.crt profile_output.prof # Large binary files that shouldn't be in container *.pdf chrome.json ================================================ FILE: .github/FUNDING.yml ================================================ # These are supported funding model platforms github: dgtlmoon ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a bug report, if you don't follow this template, your report will be DELETED title: '' labels: 'triage' assignees: 'dgtlmoon' --- **DO NOT USE THIS FORM TO REPORT THAT A PARTICULAR WEBSITE IS NOT SCRAPING/WATCHING AS EXPECTED** This form is only for direct bugs and feature requests todo directly with the software. Please report watched websites (full URL and _any_ settings) that do not work with changedetection.io as expected [**IN THE DISCUSSION FORUMS**](https://github.com/dgtlmoon/changedetection.io/discussions) or your report will be deleted CONSIDER TAKING OUT A SUBSCRIPTION FOR A SMALL PRICE PER MONTH, YOU GET THE BENEFIT OF USING OUR PAID PROXIES AND FURTHERING THE DEVELOPMENT OF CHANGEDETECTION.IO THANK YOU **Describe the bug** A clear and concise description of what the bug is. **Version** *Exact version* in the top right area: 0.... **How did you install?** Docker, Pip, from source directly etc **To Reproduce** Steps to reproduce the behavior: 1. Go to '...' 2. Click on '....' 3. Scroll down to '....' 4. See error ! ALWAYS INCLUDE AN EXAMPLE URL WHERE IT IS POSSIBLE TO RE-CREATE THE ISSUE - USE THE 'SHARE WATCH' FEATURE AND PASTE IN THE SHARE-LINK! **Expected behavior** A clear and concise description of what you expected to happen. **Screenshots** If applicable, add screenshots to help explain your problem. **Desktop (please complete the following information):** - OS: [e.g. iOS] - Browser [e.g. chrome, safari] - Version [e.g. 22] **Smartphone (please complete the following information):** - Device: [e.g. iPhone6] - OS: [e.g. iOS8.1] - Browser [e.g. stock browser, safari] - Version [e.g. 22] **Additional context** Add any other context about the problem here. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: '[feature]' labels: 'enhancement' assignees: '' --- **Version and OS** For example, 0.123 on linux/docker **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you want to happen. **Describe the use-case and give concrete real-world examples** Attach any HTML/JSON, give links to sites, screenshots etc, we are not mind readers **Additional context** Add any other context or screenshots about the feature request here. ================================================ FILE: .github/actions/extract-memory-report/action.yml ================================================ name: 'Extract Memory Test Report' description: 'Extracts and displays memory test report from a container' inputs: container-name: description: 'Name of the container to extract logs from' required: true python-version: description: 'Python version for artifact naming' required: true output-dir: description: 'Directory to store output logs' required: false default: 'output-logs' runs: using: "composite" steps: - name: Create output directory shell: bash run: | mkdir -p ${{ inputs.output-dir }} - name: Dump container log shell: bash run: | echo "Disabled for now" # return # docker logs ${{ inputs.container-name }} > ${{ inputs.output-dir }}/${{ inputs.container-name }}-stdout-${{ inputs.python-version }}.txt 2>&1 || echo "Could not get stdout" # docker logs ${{ inputs.container-name }} 2> ${{ inputs.output-dir }}/${{ inputs.container-name }}-stderr-${{ inputs.python-version }}.txt || echo "Could not get stderr" - name: Extract and display memory test report shell: bash run: | echo "Disabled for now" # echo "Extracting test-memory.log from container..." # docker cp ${{ inputs.container-name }}:/app/changedetectionio/test-memory.log ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log || echo "test-memory.log not found in container" # # echo "=== Top 10 Highest Peak Memory Tests ===" # if [ -f ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log ]; then # grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log | \ # sed 's/.*Peak memory: //' | \ # paste -d'|' - <(grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log) | \ # sort -t'|' -k1 -nr | \ # cut -d'|' -f2 | \ # head -10 # echo "" # echo "=== Full Memory Test Report ===" # cat ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log # else # echo "No memory log available" # fi ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: github-actions directory: / schedule: interval: "weekly" groups: all: patterns: - "*" - package-ecosystem: pip directory: / schedule: interval: "weekly" ================================================ FILE: .github/nginx-reverse-proxy-test.conf ================================================ server { listen 80; server_name localhost; # Test basic reverse proxy to changedetection.io location / { proxy_pass http://changedet-app:5000; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; # WebSocket support proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection "upgrade"; } # Test subpath deployment with X-Forwarded-Prefix location /changedet-sub/ { proxy_pass http://changedet-app:5000/; proxy_set_header X-Forwarded-Prefix /changedet-sub; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; # WebSocket support proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection "upgrade"; } } ================================================ FILE: .github/test/Dockerfile-alpine ================================================ # Taken from https://github.com/linuxserver/docker-changedetection.io/blob/main/Dockerfile # Test that we can still build on Alpine (musl modified libc https://musl.libc.org/) # Some packages wont install via pypi because they dont have a wheel available under this architecture. FROM ghcr.io/linuxserver/baseimage-alpine:3.22 ENV PYTHONUNBUFFERED=1 COPY requirements.txt /requirements.txt ARG TARGETPLATFORM RUN \ apk add --update --no-cache --virtual=build-dependencies \ build-base \ cargo \ git \ jpeg-dev \ libc-dev \ libffi-dev \ libxslt-dev \ openssl-dev \ python3-dev \ file \ zip \ zlib-dev && \ apk add --update --no-cache \ libjpeg \ libxslt \ file \ nodejs \ poppler-utils \ python3 \ glib \ libsm \ libxext \ libxrender && \ case "$TARGETPLATFORM" in \ linux/arm/v7|linux/arm/v8) \ echo "INFO: Skipping py3-opencv on $TARGETPLATFORM (using pixelmatch fallback)" \ ;; \ *) \ apk add --update --no-cache py3-opencv || echo "WARN: py3-opencv install failed, using pixelmatch fallback" \ ;; \ esac && \ echo "**** pip3 install test of changedetection.io ****" && \ python3 -m venv /lsiopy && \ pip install -U pip wheel setuptools && \ pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.22/ -r /requirements.txt && \ apk del --purge \ build-dependencies ================================================ FILE: .github/workflows/codeql-analysis.yml ================================================ # For most projects, this workflow file will not need changing; you simply need # to commit it to your repository. # # You may wish to alter this file to override the set of languages analyzed, # or to provide custom queries or build logic. # # ******** NOTE ******** # We have attempted to detect the languages in your repository. Please check # the `language` matrix defined below to confirm you have the correct set of # supported CodeQL languages. # name: "CodeQL" on: schedule: - cron: '27 9 * * 4' jobs: analyze: name: Analyze runs-on: ubuntu-latest strategy: fail-fast: false matrix: language: [ 'javascript', 'python' ] # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] # Learn more: # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed steps: - name: Checkout repository uses: actions/checkout@v6 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v4 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. # queries: ./path/to/local/query, your-org/your-repo/queries@main # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild uses: github/codeql-action/autobuild@v4 # ℹ️ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines # and modify them (or add more) to build your code if your project # uses a compiled language #- run: | # make bootstrap # make release - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v4 ================================================ FILE: .github/workflows/containers.yml ================================================ name: Build and push containers on: # Automatically triggered by a testing workflow passing, but this is only checked when it lands in the `master`/default branch # workflow_run: # workflows: ["ChangeDetection.io Test"] # branches: [master] # tags: ['0.*'] # types: [completed] # Or a new tagged release release: types: [published, edited] push: branches: - master jobs: metadata: runs-on: ubuntu-latest steps: - name: Show metadata run: | echo SHA ${{ github.sha }} echo github.ref: ${{ github.ref }} echo github_ref: $GITHUB_REF echo Event name: ${{ github.event_name }} echo Ref ${{ github.ref }} echo c: ${{ github.event.workflow_run.conclusion }} echo r: ${{ github.event.workflow_run }} echo tname: "${{ github.event.release.tag_name }}" echo headbranch: -${{ github.event.workflow_run.head_branch }}- set build-push-containers: runs-on: ubuntu-latest # If the testing workflow has a success, then we build to :latest # Or if we are in a tagged release scenario. if: ${{ github.event.workflow_run.conclusion == 'success' }} || ${{ github.event.release.tag_name }} != '' steps: - uses: actions/checkout@v6 - name: Set up Python 3.11 uses: actions/setup-python@v6 with: python-version: 3.11 - name: Cache pip packages uses: actions/cache@v5 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} restore-keys: | ${{ runner.os }}-pip- - name: Install dependencies run: | python -m pip install --upgrade pip pip install flake8 pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Create release metadata run: | # COPY'ed by Dockerfile into changedetectionio/ of the image, then read by the server in store.py echo ${{ github.sha }} > changedetectionio/source.txt echo ${{ github.ref }} > changedetectionio/tag.txt - name: Set up QEMU uses: docker/setup-qemu-action@v4 with: image: tonistiigi/binfmt:latest platforms: all - name: Login to GitHub Container Registry uses: docker/login-action@v4 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Login to Docker Hub Container Registry uses: docker/login-action@v4 with: username: ${{ secrets.DOCKER_HUB_USERNAME }} password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }} - name: Set up Docker Buildx id: buildx uses: docker/setup-buildx-action@v4 with: install: true version: latest driver-opts: image=moby/buildkit:master # master branch -> :dev container tag - name: Docker meta :dev if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }} uses: docker/metadata-action@v6 id: meta_dev with: images: | ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io ghcr.io/${{ github.repository }} tags: | type=raw,value=dev labels: | org.opencontainers.image.created=${{ github.event.release.published_at }} org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications. org.opencontainers.image.documentation=https://changedetection.io org.opencontainers.image.revision=${{ github.sha }} org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io org.opencontainers.image.title=changedetection.io org.opencontainers.image.url=https://changedetection.io - name: Build and push :dev id: docker_build if: ${{ github.ref == 'refs/heads/master' && github.event_name != 'release' }} uses: docker/build-push-action@v7 with: context: ./ file: ./Dockerfile push: true tags: ${{ steps.meta_dev.outputs.tags }} labels: ${{ steps.meta_dev.outputs.labels }} platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8 cache-from: type=gha cache-to: type=gha,mode=max # Looks like this was disabled # provenance: false # A new tagged release is required, which builds :tag and :latest - name: Debug release info if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.') run: | echo "Release tag: ${{ github.event.release.tag_name }}" echo "Github ref: ${{ github.ref }}" echo "Github ref name: ${{ github.ref_name }}" - name: Docker meta :tag if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.') uses: docker/metadata-action@v6 id: meta with: images: | ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io ghcr.io/dgtlmoon/changedetection.io tags: | type=semver,pattern={{version}},value=${{ github.event.release.tag_name }} type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }} type=semver,pattern={{major}},value=${{ github.event.release.tag_name }} type=raw,value=latest labels: | org.opencontainers.image.created=${{ github.event.release.published_at }} org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications. org.opencontainers.image.documentation=https://changedetection.io org.opencontainers.image.revision=${{ github.sha }} org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io org.opencontainers.image.title=changedetection.io org.opencontainers.image.url=https://changedetection.io org.opencontainers.image.version=${{ github.event.release.tag_name }} - name: Build and push :tag id: docker_build_tag_release if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.') uses: docker/build-push-action@v7 with: context: ./ file: ./Dockerfile push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8 cache-from: type=gha cache-to: type=gha,mode=max # Looks like this was disabled # provenance: false - name: Image digest run: echo step SHA ${{ steps.vars.outputs.sha_short }} tag ${{steps.vars.outputs.tag}} branch ${{steps.vars.outputs.branch}} digest ${{ steps.docker_build.outputs.digest }} ================================================ FILE: .github/workflows/pypi-release.yml ================================================ name: Publish Python 🐍distribution 📦 to PyPI and TestPyPI on: push jobs: build: name: Build distribution 📦 runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 with: python-version: "3.11" - name: Install pypa/build run: >- python3 -m pip install build --user - name: Build a binary wheel and a source tarball run: python3 -m build - name: Store the distribution packages uses: actions/upload-artifact@v7 with: name: python-package-distributions path: dist/ test-pypi-package: name: Test the built package works basically. runs-on: ubuntu-latest needs: - build steps: - name: Download all the dists uses: actions/download-artifact@v8 with: name: python-package-distributions path: dist/ - name: Set up Python 3.11 uses: actions/setup-python@v6 with: python-version: '3.11' - name: Test that the basic pip built package runs without error run: | set -ex ls -alR # Install the first wheel found in dist/ WHEEL=$(find dist -type f -name "*.whl" -print -quit) echo Installing $WHEEL python3 -m pip install --upgrade pip python3 -m pip install "$WHEEL" changedetection.io -d /tmp -p 10000 & sleep 3 curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null # --- API test --- # This also means that the docs/api-spec.yml was shipped and could be read test -f /tmp/changedetection.json API_KEY=$(jq -r '.. | .api_access_token? // empty' /tmp/changedetection.json) echo Test API KEY is $API_KEY curl -X POST "http://127.0.0.1:10000/api/v1/watch" \ -H "x-api-key: ${API_KEY}" \ -H "Content-Type: application/json" \ --show-error --fail \ --retry 6 --retry-delay 1 --retry-connrefused \ -d '{ "url": "https://example.com", "title": "Example Site Monitor", "time_between_check": { "hours": 1 } }' killall changedetection.io publish-to-pypi: name: >- Publish Python 🐍 distribution 📦 to PyPI if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes needs: - test-pypi-package runs-on: ubuntu-latest environment: name: release url: https://pypi.org/p/changedetection.io permissions: id-token: write # IMPORTANT: mandatory for trusted publishing steps: - name: Download all the dists uses: actions/download-artifact@v8 with: name: python-package-distributions path: dist/ - name: Publish distribution 📦 to PyPI uses: pypa/gh-action-pypi-publish@release/v1 ================================================ FILE: .github/workflows/test-container-build.yml ================================================ name: ChangeDetection.io Container Build Test # Triggers the workflow on push or pull request events # This line doesnt work, even tho it is the documented one #on: [push, pull_request] on: push: paths: - requirements.txt - Dockerfile - .github/workflows/* - .github/test/Dockerfile* pull_request: paths: - requirements.txt - Dockerfile - .github/workflows/* - .github/test/Dockerfile* # Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing # @todo: some kind of path filter for requirements.txt and Dockerfile jobs: builder: name: Build ${{ matrix.platform }} (${{ matrix.dockerfile == './Dockerfile' && 'main' || 'alpine' }}) runs-on: ubuntu-latest strategy: matrix: include: # Main Dockerfile platforms - platform: linux/amd64 dockerfile: ./Dockerfile - platform: linux/arm64 dockerfile: ./Dockerfile - platform: linux/arm/v7 dockerfile: ./Dockerfile - platform: linux/arm/v8 dockerfile: ./Dockerfile # Alpine Dockerfile platforms (musl via alpine check) - platform: linux/amd64 dockerfile: ./.github/test/Dockerfile-alpine - platform: linux/arm64 dockerfile: ./.github/test/Dockerfile-alpine steps: - uses: actions/checkout@v6 - name: Set up Python 3.11 uses: actions/setup-python@v6 with: python-version: 3.11 - name: Cache pip packages uses: actions/cache@v5 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} restore-keys: | ${{ runner.os }}-pip- # Just test that the build works, some libraries won't compile on ARM/rPi etc - name: Set up QEMU uses: docker/setup-qemu-action@v4 with: image: tonistiigi/binfmt:latest platforms: all - name: Set up Docker Buildx id: buildx uses: docker/setup-buildx-action@v4 with: install: true version: latest driver-opts: image=moby/buildkit:master - name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }}) id: docker_build uses: docker/build-push-action@v7 # https://github.com/docker/build-push-action#customizing with: context: ./ file: ${{ matrix.dockerfile }} platforms: ${{ matrix.platform }} cache-from: type=gha cache-to: type=gha,mode=max ================================================ FILE: .github/workflows/test-only.yml ================================================ name: ChangeDetection.io App Test # Triggers the workflow on push or pull request events on: [push, pull_request] jobs: lint-code: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Lint with Ruff run: | pip install ruff # Check for syntax errors and undefined names ruff check . --select E9,F63,F7,F82 # Complete check with errors treated as warnings ruff check . --exit-zero - name: Validate OpenAPI spec run: | pip install openapi-spec-validator python3 -c "from openapi_spec_validator import validate_spec; import yaml; validate_spec(yaml.safe_load(open('docs/api-spec.yaml')))" test-application-3-10: # Only run on push to master (including PR merges) if: github.event_name == 'push' && github.ref == 'refs/heads/master' needs: lint-code uses: ./.github/workflows/test-stack-reusable-workflow.yml with: python-version: '3.10' test-application-3-11: # Always run needs: lint-code uses: ./.github/workflows/test-stack-reusable-workflow.yml with: python-version: '3.11' test-application-3-12: # Only run on push to master (including PR merges) if: github.event_name == 'push' && github.ref == 'refs/heads/master' needs: lint-code uses: ./.github/workflows/test-stack-reusable-workflow.yml with: python-version: '3.12' skip-pypuppeteer: true test-application-3-13: # Only run on push to master (including PR merges) if: github.event_name == 'push' && github.ref == 'refs/heads/master' needs: lint-code uses: ./.github/workflows/test-stack-reusable-workflow.yml with: python-version: '3.13' skip-pypuppeteer: true test-application-3-14: #if: github.event_name == 'push' && github.ref == 'refs/heads/master' needs: lint-code uses: ./.github/workflows/test-stack-reusable-workflow.yml with: python-version: '3.14' skip-pypuppeteer: false ================================================ FILE: .github/workflows/test-stack-reusable-workflow.yml ================================================ name: ChangeDetection.io App Test on: workflow_call: inputs: python-version: description: 'Python version to use' required: true type: string default: '3.11' skip-pypuppeteer: description: 'Skip PyPuppeteer (not supported in 3.11/3.12)' required: false type: boolean default: false jobs: # Build the Docker image once and share it with all test jobs build: runs-on: ubuntu-latest env: PYTHON_VERSION: ${{ inputs.python-version }} steps: - uses: actions/checkout@v6 - name: Set up Python ${{ env.PYTHON_VERSION }} uses: actions/setup-python@v6 with: python-version: ${{ env.PYTHON_VERSION }} - name: Cache pip packages uses: actions/cache@v5 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt') }} restore-keys: | ${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}- ${{ runner.os }}-pip- - name: Get current date for cache key id: date run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT - name: Set up Docker Buildx uses: docker/setup-buildx-action@v4 - name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }} uses: docker/build-push-action@v7 with: context: ./ file: ./Dockerfile build-args: | PYTHON_VERSION=${{ env.PYTHON_VERSION }} LOGGER_LEVEL=TRACE tags: test-changedetectionio load: true cache-from: type=gha,scope=build-${{ github.ref_name }}-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt', 'Dockerfile') }}-${{ steps.date.outputs.date }} cache-to: type=gha,mode=max,scope=build-${{ github.ref_name }}-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt', 'Dockerfile') }}-${{ steps.date.outputs.date }} - name: Verify build run: | echo "---- Built for Python ${{ env.PYTHON_VERSION }} -----" docker run test-changedetectionio bash -c 'pip list' - name: We should be Python ${{ env.PYTHON_VERSION }} ... run: | docker run test-changedetectionio bash -c 'python3 --version' - name: Save Docker image run: | docker save test-changedetectionio -o /tmp/test-changedetectionio.tar - name: Upload Docker image artifact uses: actions/upload-artifact@v7 with: name: test-changedetectionio-${{ env.PYTHON_VERSION }} path: /tmp/test-changedetectionio.tar retention-days: 1 # Unit tests (lightweight, no ancillary services needed) unit-tests: runs-on: ubuntu-latest needs: build timeout-minutes: 10 env: PYTHON_VERSION: ${{ inputs.python-version }} steps: - uses: actions/checkout@v6 - name: Download Docker image artifact uses: actions/download-artifact@v8 with: name: test-changedetectionio-${{ env.PYTHON_VERSION }} path: /tmp - name: Load Docker image run: | docker load -i /tmp/test-changedetectionio.tar - name: Run Unit Tests run: | docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff' docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model' docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security' docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver' docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text' # Basic pytest tests with ancillary services basic-tests: runs-on: ubuntu-latest needs: build timeout-minutes: 25 env: PYTHON_VERSION: ${{ inputs.python-version }} steps: - uses: actions/checkout@v6 - name: Download Docker image artifact uses: actions/download-artifact@v8 with: name: test-changedetectionio-${{ env.PYTHON_VERSION }} path: /tmp - name: Load Docker image run: | docker load -i /tmp/test-changedetectionio.tar - name: Test built container with Pytest run: | docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh' - name: Test CLI options run: | docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network docker run --name test-cdio-cli-opts --network changedet-network test-changedetectionio bash -c 'changedetectionio/test_cli_opts.sh' &> cli-opts-output.txt echo "=== CLI Options Test Output ===" cat cli-opts-output.txt - name: CLI Memory Test run: | echo "=== Checking CLI batch mode memory usage ===" # Extract RSS memory value from output RSS_MB=$(grep -oP "Memory consumption before worker shutdown: RSS=\K[\d.]+" cli-opts-output.txt | head -1 || echo "0") echo "RSS Memory: ${RSS_MB} MB" # Check if RSS is less than 100MB if [ -n "$RSS_MB" ]; then if (( $(echo "$RSS_MB < 100" | bc -l) )); then echo "✓ Memory usage is acceptable: ${RSS_MB} MB < 100 MB" else echo "✗ Memory usage too high: ${RSS_MB} MB >= 100 MB" exit 1 fi else echo "⚠ Could not extract memory usage, skipping check" fi - name: Extract memory report and logs if: always() uses: ./.github/actions/extract-memory-report with: container-name: test-cdio-basic-tests python-version: ${{ env.PYTHON_VERSION }} - name: Store test artifacts if: always() uses: actions/upload-artifact@v7 with: name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }} path: output-logs - name: Store CLI test output if: always() uses: actions/upload-artifact@v7 with: name: test-cdio-cli-opts-output-py${{ env.PYTHON_VERSION }} path: cli-opts-output.txt # Playwright tests playwright-tests: runs-on: ubuntu-latest needs: build timeout-minutes: 10 env: PYTHON_VERSION: ${{ inputs.python-version }} steps: - uses: actions/checkout@v6 - name: Download Docker image artifact uses: actions/download-artifact@v8 with: name: test-changedetectionio-${{ env.PYTHON_VERSION }} path: /tmp - name: Load Docker image run: | docker load -i /tmp/test-changedetectionio.tar - name: Spin up ancillary services run: | docker network create changedet-network docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest - name: Playwright - Specific tests in built container run: | docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' - name: Playwright - Headers and requests run: | docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .' - name: Playwright - Restock detection run: | docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' # Pyppeteer tests pyppeteer-tests: runs-on: ubuntu-latest needs: build if: ${{ inputs.skip-pypuppeteer == false }} timeout-minutes: 10 env: PYTHON_VERSION: ${{ inputs.python-version }} steps: - uses: actions/checkout@v6 - name: Download Docker image artifact uses: actions/download-artifact@v8 with: name: test-changedetectionio-${{ env.PYTHON_VERSION }} path: /tmp - name: Load Docker image run: | docker load -i /tmp/test-changedetectionio.tar - name: Spin up ancillary services run: | docker network create changedet-network docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest - name: Pyppeteer - Specific tests in built container run: | docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' - name: Pyppeteer - Headers and requests checks run: | docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py' - name: Pyppeteer - Restock detection run: | docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' # Selenium tests selenium-tests: runs-on: ubuntu-latest needs: build timeout-minutes: 10 env: PYTHON_VERSION: ${{ inputs.python-version }} steps: - uses: actions/checkout@v6 - name: Download Docker image artifact uses: actions/download-artifact@v8 with: name: test-changedetectionio-${{ env.PYTHON_VERSION }} path: /tmp - name: Load Docker image run: | docker load -i /tmp/test-changedetectionio.tar - name: Spin up ancillary services run: | docker network create changedet-network docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4 sleep 3 - name: Specific tests for headers and requests checks with Selenium run: | docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py' - name: Specific tests in built container for Selenium run: | docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py' # SMTP tests smtp-tests: runs-on: ubuntu-latest needs: build timeout-minutes: 10 env: PYTHON_VERSION: ${{ inputs.python-version }} steps: - uses: actions/checkout@v6 - name: Download Docker image artifact uses: actions/download-artifact@v8 with: name: test-changedetectionio-${{ env.PYTHON_VERSION }} path: /tmp - name: Load Docker image run: | docker load -i /tmp/test-changedetectionio.tar - name: Spin up SMTP test server run: | docker network create changedet-network docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py' - name: Test SMTP notification mime types run: | docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py' nginx-reverse-proxy: runs-on: ubuntu-latest needs: build timeout-minutes: 10 env: PYTHON_VERSION: ${{ inputs.python-version }} steps: - uses: actions/checkout@v6 - name: Download Docker image artifact uses: actions/download-artifact@v8 with: name: test-changedetectionio-${{ env.PYTHON_VERSION }} path: /tmp - name: Load Docker image run: | docker load -i /tmp/test-changedetectionio.tar - name: Spin up services run: | docker network create changedet-network # Start changedetection.io container with X-Forwarded headers support docker run --name changedet-app --hostname changedet-app --network changedet-network \ -e USE_X_SETTINGS=true \ -d test-changedetectionio sleep 3 - name: Start nginx reverse proxy run: | # Start nginx with our test configuration docker run --name nginx-proxy --network changedet-network -d -p 8080:80 --rm \ -v ${{ github.workspace }}/.github/nginx-reverse-proxy-test.conf:/etc/nginx/conf.d/default.conf:ro \ nginx:alpine sleep 2 - name: Test reverse proxy - root path run: | echo "=== Testing nginx reverse proxy at root path ===" curl --retry-connrefused --retry 6 -s http://localhost:8080/ > /tmp/nginx-test-root.html # Check for changedetection.io UI elements if grep -q "checkbox-uuid" /tmp/nginx-test-root.html; then echo "✓ Found checkbox-uuid in response" else echo "ERROR: checkbox-uuid not found in response" cat /tmp/nginx-test-root.html exit 1 fi # Check for watchlist content if grep -q -i "watch" /tmp/nginx-test-root.html; then echo "✓ Found watch/watchlist content in response" else echo "ERROR: watchlist content not found" cat /tmp/nginx-test-root.html exit 1 fi echo "✓ Root path reverse proxy working correctly" - name: Test reverse proxy - subpath with X-Forwarded-Prefix run: | echo "=== Testing nginx reverse proxy at subpath /changedet-sub/ ===" curl --retry-connrefused --retry 6 -s http://localhost:8080/changedet-sub/ > /tmp/nginx-test-subpath.html # Check for changedetection.io UI elements if grep -q "checkbox-uuid" /tmp/nginx-test-subpath.html; then echo "✓ Found checkbox-uuid in subpath response" else echo "ERROR: checkbox-uuid not found in subpath response" cat /tmp/nginx-test-subpath.html exit 1 fi echo "✓ Subpath reverse proxy working correctly" - name: Test API through reverse proxy subpath run: | echo "=== Testing API endpoints through nginx subpath /changedet-sub/ ===" # Extract API key from the changedetection.io datastore API_KEY=$(docker exec changedet-app cat /datastore/changedetection.json | grep -o '"api_access_token": *"[^"]*"' | cut -d'"' -f4) if [ -z "$API_KEY" ]; then echo "ERROR: Could not extract API key from datastore" docker exec changedet-app cat /datastore/changedetection.json exit 1 fi echo "✓ Extracted API key: ${API_KEY:0:8}..." # Create a watch via API through nginx proxy subpath echo "Creating watch via POST to /changedet-sub/api/v1/watch" RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "http://localhost:8080/changedet-sub/api/v1/watch" \ -H "x-api-key: ${API_KEY}" \ -H "Content-Type: application/json" \ -d '{ "url": "https://example.com/test-nginx-proxy", "tag": "nginx-test" }') HTTP_CODE=$(echo "$RESPONSE" | tail -n1) BODY=$(echo "$RESPONSE" | head -n-1) if [ "$HTTP_CODE" != "201" ]; then echo "ERROR: Expected HTTP 201, got $HTTP_CODE" echo "Response: $BODY" exit 1 fi echo "✓ Watch created successfully (HTTP 201)" # Extract the watch UUID from response WATCH_UUID=$(echo "$BODY" | grep -o '"uuid": *"[^"]*"' | cut -d'"' -f4) echo "✓ Watch UUID: $WATCH_UUID" # Update the watch via PUT through nginx proxy subpath echo "Updating watch via PUT to /changedet-sub/api/v1/watch/${WATCH_UUID}" RESPONSE=$(curl -s -w "\n%{http_code}" -X PUT "http://localhost:8080/changedet-sub/api/v1/watch/${WATCH_UUID}" \ -H "x-api-key: ${API_KEY}" \ -H "Content-Type: application/json" \ -d '{ "paused": true }') HTTP_CODE=$(echo "$RESPONSE" | tail -n1) BODY=$(echo "$RESPONSE" | head -n-1) if [ "$HTTP_CODE" != "200" ]; then echo "ERROR: Expected HTTP 200, got $HTTP_CODE" echo "Response: $BODY" exit 1 fi if echo "$BODY" | grep -q 'OK'; then echo "✓ Watch updated successfully (HTTP 200, response: OK)" else echo "ERROR: Expected response 'OK', got: $BODY" echo "Response: $BODY" exit 1 fi # Verify the watch is paused via GET echo "Verifying watch is paused via GET" RESPONSE=$(curl -s "http://localhost:8080/changedet-sub/api/v1/watch/${WATCH_UUID}" \ -H "x-api-key: ${API_KEY}") if echo "$RESPONSE" | grep -q '"paused": *true'; then echo "✓ Watch is paused as expected" else echo "ERROR: Watch paused state not confirmed" echo "Response: $RESPONSE" exit 1 fi echo "✓ API tests through nginx subpath completed successfully" - name: Cleanup nginx test if: always() run: | docker logs nginx-proxy || true docker logs changedet-app || true docker stop nginx-proxy changedet-app || true docker rm nginx-proxy changedet-app || true # Proxy tests proxy-tests: runs-on: ubuntu-latest needs: build timeout-minutes: 10 env: PYTHON_VERSION: ${{ inputs.python-version }} steps: - uses: actions/checkout@v6 - name: Download Docker image artifact uses: actions/download-artifact@v8 with: name: test-changedetectionio-${{ env.PYTHON_VERSION }} path: /tmp - name: Load Docker image run: | docker load -i /tmp/test-changedetectionio.tar - name: Spin up services run: | docker network create changedet-network docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4 docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest - name: Test proxy Squid style interaction run: | cd changedetectionio ./run_proxy_tests.sh docker ps cd .. - name: Test proxy SOCKS5 style interaction run: | cd changedetectionio ./run_socks_proxy_tests.sh cd .. # Custom browser URL tests custom-browser-tests: runs-on: ubuntu-latest needs: build timeout-minutes: 10 env: PYTHON_VERSION: ${{ inputs.python-version }} steps: - uses: actions/checkout@v6 - name: Download Docker image artifact uses: actions/download-artifact@v8 with: name: test-changedetectionio-${{ env.PYTHON_VERSION }} path: /tmp - name: Load Docker image run: | docker load -i /tmp/test-changedetectionio.tar - name: Spin up ancillary services run: | docker network create changedet-network docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest - name: Test custom browser URL run: | cd changedetectionio ./run_custom_browser_url_tests.sh processor-plugin-tests: runs-on: ubuntu-latest needs: build timeout-minutes: 20 env: PYTHON_VERSION: ${{ inputs.python-version }} steps: - uses: actions/checkout@v6 - name: Download Docker image artifact uses: actions/download-artifact@v8 with: name: test-changedetectionio-${{ env.PYTHON_VERSION }} path: /tmp - name: Load Docker image run: | docker load -i /tmp/test-changedetectionio.tar - name: Basic processor plugin registration and checks run: | docker run -e EXTRA_PACKAGES=changedetection.io-osint-processor test-changedetectionio bash -c 'cd changedetectionio;pytest -vvv -s tests/plugins/test_processor.py::test_check_plugin_processor' # Container startup tests container-tests: runs-on: ubuntu-latest needs: build timeout-minutes: 10 env: PYTHON_VERSION: ${{ inputs.python-version }} steps: - uses: actions/checkout@v6 - name: Download Docker image artifact uses: actions/download-artifact@v8 with: name: test-changedetectionio-${{ env.PYTHON_VERSION }} path: /tmp - name: Load Docker image run: | docker load -i /tmp/test-changedetectionio.tar - name: Test container starts+runs basically without error run: | docker run --name test-changedetectionio -p 5556:5000 -d test-changedetectionio sleep 3 curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1 docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1 docker kill test-changedetectionio - name: Test HTTPS SSL mode run: | openssl req -x509 -newkey rsa:4096 -keyout privkey.pem -out cert.pem -days 365 -nodes -subj "/CN=localhost" docker run --name test-changedetectionio-ssl --rm -e SSL_CERT_FILE=cert.pem -e SSL_PRIVKEY_FILE=privkey.pem -p 5000:5000 -v ./cert.pem:/app/cert.pem -v ./privkey.pem:/app/privkey.pem -d test-changedetectionio sleep 3 curl --retry-connrefused --retry 6 -k https://localhost:5000 -v|grep -q checkbox-uuid docker kill test-changedetectionio-ssl - name: Test IPv6 Mode run: | docker run --name test-changedetectionio-ipv6 --rm -p 5000:5000 -e LISTEN_HOST=:: -d test-changedetectionio sleep 3 curl --retry-connrefused --retry 6 http://[::1]:5000 -v|grep -q checkbox-uuid docker kill test-changedetectionio-ipv6 # Signal tests signal-tests: runs-on: ubuntu-latest needs: build timeout-minutes: 10 env: PYTHON_VERSION: ${{ inputs.python-version }} steps: - uses: actions/checkout@v6 - name: Download Docker image artifact uses: actions/download-artifact@v8 with: name: test-changedetectionio-${{ env.PYTHON_VERSION }} path: /tmp - name: Load Docker image run: | docker load -i /tmp/test-changedetectionio.tar - name: Test SIGTERM and SIGINT signal shutdown run: | echo SIGINT Shutdown request test docker run --name sig-test -d test-changedetectionio sleep 3 echo ">>> Sending SIGINT to sig-test container" docker kill --signal=SIGINT sig-test sleep 3 docker ps docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGINT' || exit 1 test -z "`docker ps|grep sig-test`" if [ $? -ne 0 ]; then echo "Looks like container was running when it shouldnt be" docker ps exit 1 fi docker rm sig-test echo SIGTERM Shutdown request test docker run --name sig-test -d test-changedetectionio sleep 3 echo ">>> Sending SIGTERM to sig-test container" docker kill --signal=SIGTERM sig-test sleep 3 docker ps docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGTERM' || exit 1 test -z "`docker ps|grep sig-test`" if [ $? -ne 0 ]; then echo "Looks like container was running when it shouldnt be" docker ps exit 1 fi docker rm sig-test # Upgrade path test upgrade-path-test: runs-on: ubuntu-latest needs: build timeout-minutes: 25 env: PYTHON_VERSION: ${{ inputs.python-version }} steps: - uses: actions/checkout@v6 with: fetch-depth: 0 # Fetch all history and tags for upgrade testing - name: Set up Python ${{ env.PYTHON_VERSION }} uses: actions/setup-python@v6 with: python-version: ${{ env.PYTHON_VERSION }} - name: Check upgrade works without error run: | echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ===" sudo apt-get update && sudo apt-get install -y --no-install-recommends \ g++ \ gcc \ libc-dev \ libffi-dev \ libjpeg-dev \ libssl-dev \ libxslt-dev \ make \ patch \ pkg-config \ zlib1g-dev # Checkout old version and create datastore git checkout 0.49.1 python3 -m venv .venv source .venv/bin/activate pip install -r requirements.txt pip install 'pyOpenSSL>=23.2.0' echo "=== Running version 0.49.1 to create datastore ===" ALLOW_IANA_RESTRICTED_ADDRESSES=true python3 ./changedetection.py -C -d /tmp/data & APP_PID=$! # Wait for app to be ready echo "Waiting for 0.49.1 to be ready..." sleep 6 # Extract API key from datastore (0.49.1 uses url-watches.json) API_KEY=$(jq -r '.settings.application.api_access_token // empty' /tmp/data/url-watches.json) echo "API Key: ${API_KEY:0:8}..." # Create a watch with tag "github-group-test" via API echo "Creating test watch with tag via API..." curl -X POST "http://127.0.0.1:5000/api/v1/watch" \ -H "x-api-key: ${API_KEY}" \ -H "Content-Type: application/json" \ --show-error --fail \ --retry 6 --retry-delay 1 --retry-connrefused \ -d '{ "url": "https://example.com/upgrade-test", "tag": "github-group-test" }' echo "✓ Created watch with tag 'github-group-test'" # Create a specific test URL watch echo "Creating test URL watch via API..." curl -X POST "http://127.0.0.1:5000/api/v1/watch" \ -H "x-api-key: ${API_KEY}" \ -H "Content-Type: application/json" \ --show-error --fail \ -d '{ "url": "http://localhost/test.txt" }' echo "✓ Created watch for 'http://localhost/test.txt' in version 0.49.1" # Stop the old version gracefully kill $APP_PID wait $APP_PID || true echo "✓ Version 0.49.1 stopped" # Upgrade to current version (use commit SHA since we're in detached HEAD) echo "Upgrading to commit ${{ github.sha }}" git checkout ${{ github.sha }} pip install -r requirements.txt echo "=== Running current version (commit ${{ github.sha }}) with old datastore (testing mode) ===" ALLOW_IANA_RESTRICTED_ADDRESSES=true TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD=1 python3 ./changedetection.py -d /tmp/data > /tmp/upgrade-test.log 2>&1 echo "=== Upgrade test output ===" cat /tmp/upgrade-test.log echo "✓ Datastore upgraded successfully" # Now start the current version normally to verify the tag survived echo "=== Starting current version to verify tag exists after upgrade ===" ALLOW_IANA_RESTRICTED_ADDRESSES=true timeout 20 python3 ./changedetection.py -d /tmp/data > /tmp/ui-test.log 2>&1 & APP_PID=$! # Wait for app to be ready and fetch UI echo "Waiting for current version to be ready..." sleep 5 curl --retry 6 --retry-delay 1 --retry-connrefused --silent http://127.0.0.1:5000 > /tmp/ui-output.html # Verify tag exists in UI if grep -q "github-group-test" /tmp/ui-output.html; then echo "✓ Tag 'github-group-test' found in UI after upgrade" else echo "ERROR: Tag 'github-group-test' not found in UI after upgrade" echo "=== UI Output ===" cat /tmp/ui-output.html echo "=== App Log ===" cat /tmp/ui-test.log kill $APP_PID || true exit 1 fi # Verify test URL exists in UI if grep -q "http://localhost/test.txt" /tmp/ui-output.html; then echo "✓ Watch URL 'http://localhost/test.txt' found in UI after upgrade" else echo "ERROR: Watch URL 'http://localhost/test.txt' not found in UI after upgrade" echo "=== UI Output ===" cat /tmp/ui-output.html echo "=== App Log ===" cat /tmp/ui-test.log kill $APP_PID || true exit 1 fi # Cleanup kill $APP_PID || true wait $APP_PID || true echo "" echo "✓✓✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }} ✓✓✓" echo " - Commit: ${{ github.sha }}" echo " - Datastore migrated successfully" echo " - Tag 'github-group-test' survived upgrade" echo " - Watch URL 'http://localhost/test.txt' survived upgrade" echo "✓ Upgrade test passed: 0.49.1 → ${{ github.ref_name }}" - name: Upload upgrade test logs if: always() uses: actions/upload-artifact@v7 with: name: upgrade-test-logs-py${{ env.PYTHON_VERSION }} path: /tmp/upgrade-test.log ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files **/__pycache__ **/*.py[cod] # Caches .mypy_cache/ .pytest_cache/ .ruff_cache/ # Distribution / packaging build/ dist/ *.egg-info* # Virtual environment .env .venv/ venv/ .python-version # IDEs .idea .vscode/settings.json *~ # Datastore files datastore/ test-datastore/ # Memory consumption log test-memory.log tests/logs/ ================================================ FILE: .pre-commit-config.yaml ================================================ repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.11.2 hooks: # Lint (and apply safe fixes) - id: ruff args: [--fix] # Fomrat - id: ruff-format ================================================ FILE: .ruff.toml ================================================ # Minimum supported version target-version = "py310" # Formatting options line-length = 100 indent-width = 4 exclude = [ "__pycache__", ".eggs", ".git", ".tox", ".venv", "*.egg-info", "*.pyc", ] [lint] # https://docs.astral.sh/ruff/rules/ select = [ "B", # flake8-bugbear "B9", "C", "E", # pycodestyle "F", # Pyflakes "I", # isort "N", # pep8-naming "UP", # pyupgrade "W", # pycodestyle ] ignore = [ "B007", # unused-loop-control-variable "B909", # loop-iterator-mutation "E203", # whitespace-before-punctuation "E266", # multiple-leading-hashes-for-block-comment "E501", # redundant-backslash "F403", # undefined-local-with-import-star "N802", # invalid-function-name "N806", # non-lowercase-variable-in-function "N815", # mixed-case-variable-in-class-scope ] [lint.mccabe] max-complexity = 12 [format] indent-style = "space" quote-style = "preserve" ================================================ FILE: COMMERCIAL_LICENCE.md ================================================ # Generally In any commercial activity involving 'Hosting' (as defined herein), whether in part or in full, this license must be executed and adhered to. # Commercial License Agreement This Commercial License Agreement ("Agreement") is entered into by and between Web Technologies s.r.o. here-in ("Licensor") and (your company or personal name) _____________ ("Licensee"). This Agreement sets forth the terms and conditions under which Licensor provides its software ("Software") and services to Licensee for the purpose of reselling the software either in part or full, as part of any commercial activity where the activity involves a third party. ### Definition of Hosting For the purposes of this Agreement, "hosting" means making the functionality of the Program or modified version available to third parties as a service. This includes, without limitation: - Enabling third parties to interact with the functionality of the Program or modified version remotely through a computer network. - Offering a service the value of which entirely or primarily derives from the value of the Program or modified version. - Offering a service that accomplishes for users the primary purpose of the Program or modified version. ## 1. Grant of License Subject to the terms and conditions of this Agreement, Licensor grants Licensee a non-exclusive, non-transferable license to install, use, and resell the Software. Licensee may: - Resell the Software as part of a service offering or as a standalone product. - Host the Software on a server and provide it as a hosted service (e.g., Software as a Service - SaaS). - Integrate the Software into a larger product or service that is then sold or provided for commercial purposes, where the software is used either in part or full. ## 2. License Fees Licensee agrees to pay Licensor the license fees specified in the ordering document. License fees are due and payable as specified in the ordering document. The fees may include initial licensing costs and recurring fees based on the number of end users, instances of the Software resold, or revenue generated from the resale activities. ## 3. Resale Conditions Licensee must comply with the following conditions when reselling the Software, whether the software is resold in part or full: - Provide end users with access to the source code under the same open-source license conditions as provided by Licensor. - Clearly state in all marketing and sales materials that the Software is provided under a commercial license from Licensor, and provide a link back to https://changedetection.io. - Ensure end users are aware of and agree to the terms of the commercial license prior to resale. - Do not sublicense or transfer the Software to third parties except as part of an authorized resale activity. ## 4. Hosting and Provision of Services Licensee may host the Software (either in part or full) on its servers and provide it as a hosted service to end users. The following conditions apply: - Licensee must ensure that all hosted versions of the Software comply with the terms of this Agreement. - Licensee must provide Licensor with regular reports detailing the number of end users and instances of the hosted service. - Any modifications to the Software made by Licensee for hosting purposes must be made available to end users under the same open-source license conditions, unless agreed otherwise. ## 5. Services Licensor will provide support and maintenance services as described in the support policy referenced in the ordering document should such an agreement be signed by all parties. Additional fees may apply for support services provided to end users resold by Licensee. ## 6. Reporting and Audits Licensee agrees to provide Licensor with regular reports detailing the number of instances, end users, and revenue generated from the resale of the Software. Licensor reserves the right to audit Licensee’s records to ensure compliance with this Agreement. ## 7. Term and Termination This Agreement shall commence on the effective date and continue for the period set forth in the ordering document unless terminated earlier in accordance with this Agreement. Either party may terminate this Agreement if the other party breaches any material term and fails to cure such breach within thirty (30) days after receipt of written notice. ## 8. Limitation of Liability and Disclaimer of Warranty Executing this commercial license does not waive the Limitation of Liability or Disclaimer of Warranty as stated in the open-source LICENSE provided with the Software. The Software is provided "as is," without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, and noninfringement. In no event shall the authors or copyright holders be liable for any claim, damages, or other liability, whether in an action of contract, tort, or otherwise, arising from, out of, or in connection with the Software or the use or other dealings in the Software. ## 9. Governing Law This Agreement shall be governed by and construed in accordance with the laws of the Czech Republic. ## Contact Information For commercial licensing inquiries, please contact contact@changedetection.io and dgtlmoon@gmail.com. ================================================ FILE: CONTRIBUTING.md ================================================ Contributing is always welcome! I am no professional flask developer, if you know a better way that something can be done, please let me know! Otherwise, it's always best to PR into the `master` branch. Please be sure that all new functionality has a matching test! Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notification.py` for example ================================================ FILE: Dockerfile ================================================ # pip dependencies install stage ARG PYTHON_VERSION=3.11 FROM python:${PYTHON_VERSION}-slim-bookworm AS builder # See `cryptography` pin comment in requirements.txt RUN apt-get update && apt-get install -y --no-install-recommends \ g++ \ gcc \ libc-dev \ libffi-dev \ libjpeg-dev \ libssl-dev \ libxslt-dev \ make \ patch \ pkg-config \ zlib1g-dev RUN mkdir /install WORKDIR /install COPY requirements.txt /requirements.txt # Use cache mounts and multiple wheel sources for faster ARM builds ENV PIP_CACHE_DIR=/tmp/pip-cache # Help Rust find OpenSSL for cryptography package compilation on ARM ENV PKG_CONFIG_PATH="/usr/lib/pkgconfig:/usr/lib/arm-linux-gnueabihf/pkgconfig:/usr/lib/aarch64-linux-gnu/pkgconfig" ENV PKG_CONFIG_ALLOW_SYSTEM_CFLAGS=1 ENV OPENSSL_DIR="/usr" ENV OPENSSL_LIB_DIR="/usr/lib/arm-linux-gnueabihf" ENV OPENSSL_INCLUDE_DIR="/usr/include/openssl" # Additional environment variables for cryptography Rust build ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1 RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \ pip install \ --prefer-binary \ --extra-index-url https://www.piwheels.org/simple \ --extra-index-url https://pypi.anaconda.org/ARM-software/simple \ --cache-dir=/tmp/pip-cache \ --target=/dependencies \ -r /requirements.txt # Playwright is an alternative to Selenium # Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing # https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported) RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \ pip install \ --prefer-binary \ --cache-dir=/tmp/pip-cache \ --target=/dependencies \ playwright~=1.56.0 \ || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." # OpenCV is optional for fast image comparison (pixelmatch is the fallback) # Skip on arm/v7 and arm/v8 where builds take weeks - excluded from requirements.txt ARG TARGETPLATFORM RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \ case "$TARGETPLATFORM" in \ linux/arm/v7|linux/arm/v8) \ echo "INFO: Skipping OpenCV on $TARGETPLATFORM (build takes too long), using pixelmatch fallback" \ ;; \ *) \ pip install \ --prefer-binary \ --extra-index-url https://www.piwheels.org/simple \ --cache-dir=/tmp/pip-cache \ --target=/dependencies \ opencv-python-headless>=4.8.0.76 \ || echo "WARN: OpenCV install failed, will use pixelmatch fallback" \ ;; \ esac # Final image stage FROM python:${PYTHON_VERSION}-slim-bookworm LABEL org.opencontainers.image.source="https://github.com/dgtlmoon/changedetection.io" LABEL org.opencontainers.image.url="https://changedetection.io" LABEL org.opencontainers.image.documentation="https://changedetection.io/tutorials" LABEL org.opencontainers.image.title="changedetection.io" LABEL org.opencontainers.image.description="Self-hosted web page change monitoring and notification service" LABEL org.opencontainers.image.licenses="Apache-2.0" LABEL org.opencontainers.image.vendor="changedetection.io" RUN apt-get update && apt-get install -y --no-install-recommends \ libxslt1.1 \ # For presenting price amounts correctly in the restock/price detection overview locales \ # For pdftohtml poppler-utils \ # favicon type detection and other uses file \ zlib1g \ # OpenCV dependencies for image processing libglib2.0-0 \ libsm6 \ libxext6 \ libxrender-dev \ && apt-get clean && rm -rf /var/lib/apt/lists/* # https://stackoverflow.com/questions/58701233/docker-logs-erroneously-appears-empty-until-container-stops ENV PYTHONUNBUFFERED=1 RUN [ ! -d "/datastore" ] && mkdir /datastore # Re #80, sets SECLEVEL=1 in openssl.conf to allow monitoring sites with weak/old cipher suites RUN sed -i 's/^CipherString = .*/CipherString = DEFAULT@SECLEVEL=1/' /etc/ssl/openssl.cnf # Copy modules over to the final image and add their dir to PYTHONPATH COPY --from=builder /dependencies /usr/local ENV PYTHONPATH=/usr/local EXPOSE 5000 # The actual flask app module COPY changedetectionio /app/changedetectionio # Compile translation files for i18n support RUN pybabel compile -d /app/changedetectionio/translations # Also for OpenAPI validation wrapper - needs the YML RUN [ ! -d "/app/docs" ] && mkdir /app/docs COPY docs/api-spec.yaml /app/docs/api-spec.yaml # Starting wrapper COPY changedetection.py /app/changedetection.py # Github Action test purpose(test-only.yml). # On production, it is effectively LOGGER_LEVEL=''. ARG LOGGER_LEVEL='' ENV LOGGER_LEVEL="$LOGGER_LEVEL" # Default ENV LC_ALL=en_US.UTF-8 WORKDIR /app # Copy and set up entrypoint script for installing extra packages COPY docker-entrypoint.sh /docker-entrypoint.sh RUN chmod +x /docker-entrypoint.sh # Set entrypoint to handle EXTRA_PACKAGES env var ENTRYPOINT ["/docker-entrypoint.sh"] # Default command (can be overridden in docker-compose.yml) CMD ["python", "./changedetection.py", "-d", "/datastore"] ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2025 Web Technologies s.r.o. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: MANIFEST.in ================================================ recursive-include changedetectionio/api * include docs/api-spec.yaml recursive-include changedetectionio/blueprint * recursive-include changedetectionio/conditions * recursive-include changedetectionio/content_fetchers * recursive-include changedetectionio/jinja2_custom * recursive-include changedetectionio/model * recursive-include changedetectionio/notification * recursive-include changedetectionio/processors * recursive-include changedetectionio/realtime * recursive-include changedetectionio/static * recursive-include changedetectionio/store * recursive-include changedetectionio/templates * recursive-include changedetectionio/tests * recursive-include changedetectionio/translations * recursive-include changedetectionio/widgets * prune changedetectionio/static/package-lock.json prune changedetectionio/static/styles/node_modules prune changedetectionio/static/styles/package-lock.json include changedetectionio/favicon_utils.py include changedetection.py include requirements.txt include README-pip.md global-exclude *.pyc global-exclude node_modules global-exclude venv global-exclude test-datastore global-exclude changedetection.io*dist-info global-exclude changedetectionio/tests/proxy_socks5/test-datastore ================================================ FILE: README-pip.md ================================================ # Monitor website changes Detect WebPage Changes Automatically — Monitor Web Page Changes in Real Time Monitor websites for updates — get notified via Discord, Email, Slack, Telegram, Webhook and many more. Detect web page content changes and get instant alerts. [Changedetection.io is the best tool to monitor web-pages for changes](https://changedetection.io) Track website content changes and receive notifications via Discord, Email, Slack, Telegram and 90+ more Ideal for monitoring price changes, content edits, conditional changes and more. [Self-hosted web page change monitoring, list of websites with changes](https://changedetection.io) [**Don't have time? Try our extremely affordable subscription use our proxies and support!**](https://changedetection.io) ### Target specific parts of the webpage using the Visual Selector tool. Available when connected to a playwright content fetcher (included as part of our subscription service) [Select parts and elements of a web page to monitor for changes](https://changedetection.io?src=pip) ### Easily see what changed, examine by word, line, or individual character. [Self-hosted web page change monitoring context difference ](https://changedetection.io?src=pip) ### Perform interactive browser steps Fill in text boxes, click buttons and more, setup your changedetection scenario. Using the **Browser Steps** configuration, add basic steps before performing change detection, such as logging into websites, adding a product to a cart, accept cookie logins, entering dates and refining searches. [Website change detection with interactive browser steps, detect changes behind login and password, search queries and more](https://changedetection.io?src=pip) After **Browser Steps** have been run, then visit the **Visual Selector** tab to refine the content you're interested in. Requires Playwright to be enabled. ### Example use cases - Products and services have a change in pricing - _Out of stock notification_ and _Back In stock notification_ - Monitor and track PDF file changes, know when a PDF file has text changes. - Governmental department updates (changes are often only on their websites) - New software releases, security advisories when you're not on their mailing list. - Festivals with changes - Discogs restock alerts and monitoring - Realestate listing changes - Know when your favourite whiskey is on sale, or other special deals are announced before anyone else - COVID related news from government websites - University/organisation news from their website - Detect and monitor changes in JSON API responses - JSON API monitoring and alerting - Changes in legal and other documents - Trigger API calls via notifications when text appears on a website - Glue together APIs using the JSON filter and JSON notifications - Create RSS feeds based on changes in web content - Monitor HTML source code for unexpected changes, strengthen your PCI compliance - You have a very sensitive list of URLs to watch and you do _not_ want to use the paid alternatives. (Remember, _you_ are the product) - Get notified when certain keywords appear in Twitter search results - Proactively search for jobs, get notified when companies update their careers page, search job portals for keywords. - Get alerts when new job positions are open on Bamboo HR and other job platforms - Website defacement monitoring - Pokémon Card Restock Tracker / Pokémon TCG Tracker - RegTech - stay ahead of regulatory changes, regulatory compliance _Need an actual Chrome runner with Javascript support? We support fetching via WebDriver and Playwright!_ #### Key Features - Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions! - Target elements with xPath(1.0) and CSS Selectors, Easily monitor complex JSON with JSONPath or jq - Switch between fast non-JS and Chrome JS based "fetchers" - Track changes in PDF files (Monitor text changed in the PDF, Also monitor PDF filesize and checksums) - Easily specify how often a site should be checked - Execute JS before extracting text (Good for logging in, see examples in the UI!) - Override Request Headers, Specify `POST` or `GET` and other methods - Use the "Visual Selector" to help target specific elements - Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration) - Send a screenshot with the notification when a change is detected in the web page We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link. [Oxylabs](https://oxylabs.go2cloud.org/SH2d) is also an excellent proxy provider and well worth using, they offer Residential, ISP, Rotating and many other proxy types to suit your project. Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/ ```bash $ pip3 install changedetection.io ``` Specify a target for the *datastore path* with `-d` (required) and a *listening port* with `-p` (defaults to `5000`) ```bash $ changedetection.io -d /path/to/empty/data/dir -p 5000 ``` Then visit http://127.0.0.1:5000 , You should now be able to access the UI. See https://changedetection.io for more information. ================================================ FILE: README.md ================================================ # Detect Website Changes Automatically — Monitor Web Page Changes in Real Time Monitor websites for updates — get notified via Discord, Email, Slack, Telegram, Webhook and many more. **Detect web page content changes and get instant alerts.** Ideal for monitoring price changes, content edits, conditional changes and more. [Web site page change monitoring](https://changedetection.io?src=github) [![Release Version][release-shield]][release-link] [![Docker Pulls][docker-pulls]][docker-link] [![License][license-shield]](LICENSE.md) ![changedetection.io](https://github.com/dgtlmoon/changedetection.io/actions/workflows/test-only.yml/badge.svg?branch=master) [**Get started with website page change monitoring straight away. Don't have time? Try our $8.99/month subscription, use our proxies and support!**](https://changedetection.io) , _half the price of other website change monitoring services!_ - Chrome browser included. - Nothing to install, access via browser login after signup. - Super fast, no registration needed setup. - Get started watching and receiving website change notifications straight away. - See our [tutorials and how-to page for more inspiration](https://changedetection.io/tutorials) ### Target specific parts of the webpage using the Visual Selector tool. Available when connected to a playwright content fetcher (included as part of our subscription service) [Select parts and elements of a web page to monitor for changes](https://changedetection.io?src=github) ### Easily see what changed, examine by word, line, or individual character. [Self-hosted web page change monitoring context difference ](https://changedetection.io?src=github) ### Perform interactive browser steps Fill in text boxes, click buttons and more, setup your changedetection scenario. Using the **Browser Steps** configuration, add basic steps before performing change detection, such as logging into websites, adding a product to a cart, accept cookie logins, entering dates and refining searches. [Website change detection with interactive browser steps, detect changes behind login and password, search queries and more](https://changedetection.io?src=github) After **Browser Steps** have been run, then visit the **Visual Selector** tab to refine the content you're interested in. Requires Playwright to be enabled. ### Awesome restock and price change notifications Enable the _"Re-stock & Price detection for single product pages"_ option to activate the best way to monitor product pricing, this will extract any meta-data in the HTML page and give you many options to follow the pricing of the product. Easily organise and monitor prices for products from the dashboard, get alerts and notifications when the price of a product changes or comes back in stock again! [Easily keep an eye on product price changes directly from the UI](https://changedetection.io?src=github) Set price change notification parameters, upper and lower price, price change percentage and more. Always know when a product for sale drops in price. [Set upper lower and percentage price change notification values](https://changedetection.io?src=github) ### Example use cases - Products and services have a change in pricing - _Out of stock notification_ and _Back In stock notification_ - Monitor and track PDF file changes, know when a PDF file has text changes. - Governmental department updates (changes are often only on their websites) - New software releases, security advisories when you're not on their mailing list. - Festivals with changes - Discogs restock alerts and monitoring - Realestate listing changes - Know when your favourite whiskey is on sale, or other special deals are announced before anyone else - COVID related news from government websites - University/organisation news from their website - Detect and monitor changes in JSON API responses - JSON API monitoring and alerting - Changes in legal and other documents - Trigger API calls via notifications when text appears on a website - Glue together APIs using the JSON filter and JSON notifications - Create RSS feeds based on changes in web content - Monitor HTML source code for unexpected changes, strengthen your PCI compliance - You have a very sensitive list of URLs to watch and you do _not_ want to use the paid alternatives. (Remember, _you_ are the product) - Get notified when certain keywords appear in Twitter search results - Proactively search for jobs, get notified when companies update their careers page, search job portals for keywords. - Get alerts when new job positions are open on Bamboo HR and other job platforms - Website defacement monitoring - Pokémon Card Restock Tracker / Pokémon TCG Tracker - RegTech - stay ahead of regulatory changes, regulatory compliance _Need an actual Chrome runner with Javascript support? We support fetching via WebDriver and Playwright!_ #### Key Features - Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions! - Target elements with xPath 1 and xPath 2, CSS Selectors, Easily monitor complex JSON with JSONPath or jq - Switch between fast non-JS and Chrome JS based "fetchers" - Track changes in PDF files (Monitor text changed in the PDF, Also monitor PDF filesize and checksums) - Easily specify how often a site should be checked - Execute JS before extracting text (Good for logging in, see examples in the UI!) - Override Request Headers, Specify `POST` or `GET` and other methods - Use the "Visual Selector" to help target specific elements - Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration) - Send a screenshot with the notification when a change is detected in the web page We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $150 using our signup link. Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/ ### Conditional web page changes Easily [configure conditional actions](https://changedetection.io/tutorial/conditional-actions-web-page-changes), for example, only trigger when a price is above or below a preset amount, or [when a web page includes (or does not include) a keyword](https://changedetection.io/tutorial/how-monitor-keywords-any-website) Conditional web page changes ### Schedule web page watches in any timezone, limit by day of week and time. Easily set a re-check schedule, for example you could limit the web page change detection to only operate during business hours. Or perhaps based on a foreign timezone (for example, you want to check for the latest news-headlines in a foreign country at 0900 AM), How to monitor web page changes according to a schedule Includes quick short-cut buttons to setup a schedule for **business hours only**, or **weekends**. ### We have a Chrome extension! Easily add the current web page to your changedetection.io tool, simply install the extension and click "Sync" to connect it to your existing changedetection.io install. [Chrome Extension to easily add the current web-page to detect a change.](https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop) [Goto the Chrome Webstore to download the extension.](https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop) ( Or check out the [GitHub repo](https://github.com/dgtlmoon/changedetection.io-browser-extension) ) ## Installation ### Docker With Docker composer, just clone this repository and.. ```bash $ docker compose up -d ``` Docker standalone ```bash $ docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/datastore --name changedetection.io dgtlmoon/changedetection.io ``` `:latest` tag is our latest stable release, `:dev` tag is our bleeding edge `master` branch. Alternative docker repository over at ghcr - [ghcr.io/dgtlmoon/changedetection.io](https://ghcr.io/dgtlmoon/changedetection.io) ### Windows See the install instructions at the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Microsoft-Windows ### Python Pip Check out our pypi page https://pypi.org/project/changedetection.io/ ```bash $ pip3 install changedetection.io $ changedetection.io -d /path/to/empty/data/dir -p 5000 ``` Then visit http://127.0.0.1:5000 , You should now be able to access the UI. _Now with per-site configurable support for using a fast built in HTTP fetcher or use a Chrome based fetcher for monitoring of JavaScript websites!_ ## Updating changedetection.io ### Docker ``` docker pull dgtlmoon/changedetection.io docker kill $(docker ps -a -f name=changedetection.io -q) docker rm $(docker ps -a -f name=changedetection.io -q) docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/datastore --name changedetection.io dgtlmoon/changedetection.io ``` ### docker compose ```bash docker compose pull && docker compose up -d ``` See the wiki for more information https://github.com/dgtlmoon/changedetection.io/wiki ## Different browser viewport sizes (mobile, desktop etc) If you are using the recommended `sockpuppetbrowser` (which is in the docker-compose.yml as a setting to be uncommented) you can easily set different viewport sizes for your web page change detection, [see more information here about setting up different viewport sizes](https://github.com/dgtlmoon/sockpuppetbrowser?tab=readme-ov-file#setting-viewport-size). ## Filters XPath(1.0), JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools. (We support LXML `re:test`, `re:match` and `re:replace`.) ## Notifications ChangeDetection.io supports a massive amount of notifications (including email, office365, custom APIs, etc) when a web-page has a change detected thanks to the apprise library. Simply set one or more notification URL's in the _[edit]_ tab of that watch. Just some examples discord://webhook_id/webhook_token flock://app_token/g:channel_id gitter://token/room gchat://workspace/key/token msteams://TokenA/TokenB/TokenC/ o365://TenantID:AccountEmail/ClientID/ClientSecret/TargetEmail rocket://user:password@hostname/#Channel mailto://user:pass@example.com?to=receivingAddress@example.com json://someserver.com/custom-api syslog:// And everything else in this list! Self-hosted web page change monitoring notifications Now you can also customise your notification content and use Jinja2 templating for their title and body! ## JSON API Monitoring Detect changes and monitor data in JSON API's by using either JSONPath or jq to filter, parse, and restructure JSON as needed. ![image](https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/json-filter-field-example.png) This will re-parse the JSON and apply formatting to the text, making it super easy to monitor and detect changes in JSON API results ![image](https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/json-diff-example.png) ### JSONPath or jq? For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more specific information on jq. One big advantage of `jq` is that you can use logic in your JSON filter, such as filters to only show items that have a value greater than/less than etc. See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/JSON-Selector-Filter-help for more information and examples ### Parse JSON embedded in HTML! When you enable a `json:` or `jq:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites. ``` ... ``` `json:$..price` or `jq:..price` would give `3949.99`, or you can extract the whole structure (use a JSONpath test website to validate with) The application also supports notifying you that it can follow this information automatically ## Proxy Configuration See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration , we also support using [Bright Data proxy services where possible](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support) and [Oxylabs](https://oxylabs.go2cloud.org/SH2d) proxy services. ## Raspberry Pi support? Raspberry Pi and linux/arm/v6 linux/arm/v7 arm64 devices are supported! See the wiki for [details](https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver) ## Import support Easily [import your list of websites to watch for changes in Excel .xslx file format](https://changedetection.io/tutorial/how-import-your-website-change-detection-lists-excel), or paste in lists of website URLs as plaintext. Excel import is recommended - that way you can better organise tags/groups of websites and other features. ## API Support Full REST API for programmatic management of watches, tags, notifications and more. - **[Interactive API Documentation](https://changedetection.io/docs/api_v1/index.html)** - Complete API reference with live testing - **[OpenAPI Specification](docs/api-spec.yaml)** - Generate SDKs for any programming language ## Support us Do you use changedetection.io to make money? does it save you time or money? Does it make your life easier? less stressful? Remember, we write this software when we should be doing actual paid work, we have to buy food and pay rent just like you. Consider taking out an officially supported [website change detection subscription](https://changedetection.io?src=github) , even if you don't use it, you still get the warm fuzzy feeling of helping out the project. (And who knows, you might just use it!) ## Commercial Support I offer commercial support, this software is depended on by network security, aerospace , data-science and data-journalist professionals just to name a few, please reach out at dgtlmoon@gmail.com for any enquiries, I am more than glad to work with your organisation to further the possibilities of what can be done with changedetection.io [release-shield]: https://img.shields.io:/github/v/release/dgtlmoon/changedetection.io?style=for-the-badge [docker-pulls]: https://img.shields.io/docker/pulls/dgtlmoon/changedetection.io?style=for-the-badge [test-shield]: https://github.com/dgtlmoon/changedetection.io/actions/workflows/test-only.yml/badge.svg?branch=master [license-shield]: https://img.shields.io/github/license/dgtlmoon/changedetection.io.svg?style=for-the-badge [release-link]: https://github.com/dgtlmoon/changedetection.io/releases [docker-link]: https://hub.docker.com/r/dgtlmoon/changedetection.io ## Commercial Licencing If you are reselling this software either in part or full as part of any commercial arrangement, you must abide by our COMMERCIAL_LICENCE.md found in our code repository, please contact dgtlmoon@gmail.com and contact@changedetection.io . ## Third-party licenses changedetectionio.html_tools.elementpath_tostring: Copyright (c), 2018-2021, SISSA (Scuola Internazionale Superiore di Studi Avanzati), Licensed under [MIT license](https://github.com/sissaschool/elementpath/blob/master/LICENSE) ## Contributors Recognition of fantastic contributors to the project - Constantin Hong https://github.com/Constantin1489 ================================================ FILE: babel.cfg ================================================ [python: **.py] keywords = _:1,_l:1,gettext:1 [jinja2: **/templates/**.html] encoding = utf-8 ================================================ FILE: changedetection.py ================================================ #!/usr/bin/env python3 # Only exists for direct CLI usage import changedetectionio if __name__ == '__main__': changedetectionio.main() ================================================ FILE: changedetectionio/.gitignore ================================================ test-datastore package-lock.json ================================================ FILE: changedetectionio/PLUGIN_README.md ================================================ # Creating Plugins for changedetection.io This document describes how to create plugins for changedetection.io. Plugins can be used to extend the functionality of the application in various ways. ## Plugin Types ### UI Stats Tab Plugins These plugins can add content to the Stats tab in the Edit page. This is useful for adding custom statistics or visualizations about a watch. #### Creating a UI Stats Tab Plugin 1. Create a Python file in a directory that will be loaded by the plugin system. 2. Use the `global_hookimpl` decorator to implement the `ui_edit_stats_extras` hook: ```python import pluggy from loguru import logger global_hookimpl = pluggy.HookimplMarker("changedetectionio") @global_hookimpl def ui_edit_stats_extras(watch): """Add custom content to the stats tab""" # Calculate or retrieve your stats my_stat = calculate_something(watch) # Return HTML content as a string html = f"""

My Plugin Statistics

My statistic: {my_stat}

""" return html ``` 3. The HTML you return will be included in the Stats tab. ## Plugin Loading Plugins can be loaded from: 1. Built-in plugin directories in the codebase 2. External packages using setuptools entry points To add a new plugin directory, modify the `plugin_dirs` dictionary in `pluggy_interface.py`. ## Example Plugin Here's a simple example of a plugin that adds a word count statistic to the Stats tab: ```python import pluggy from loguru import logger global_hookimpl = pluggy.HookimplMarker("changedetectionio") def count_words_in_history(watch): """Count words in the latest snapshot""" try: if not watch.history.keys(): return 0 latest_key = list(watch.history.keys())[-1] latest_content = watch.get_history_snapshot(timestamp=latest_key) return len(latest_content.split()) except Exception as e: logger.error(f"Error counting words: {str(e)}") return 0 @global_hookimpl def ui_edit_stats_extras(watch): """Add word count to the Stats tab""" word_count = count_words_in_history(watch) html = f"""

Content Analysis

Word count (latest snapshot) {word_count}
""" return html ``` ## Testing Your Plugin 1. Place your plugin in one of the directories scanned by the plugin system 2. Restart changedetection.io 3. Go to the Edit page of a watch and check the Stats tab to see your content ================================================ FILE: changedetectionio/__init__.py ================================================ #!/usr/bin/env python3 # Read more https://github.com/dgtlmoon/changedetection.io/wiki # Semver means never use .01, or 00. Should be .1. __version__ = '0.54.6' from changedetectionio.strtobool import strtobool from json.decoder import JSONDecodeError from loguru import logger import getopt import logging import os import platform import signal import threading import time # Eventlet completely removed - using threading mode for SocketIO # This provides better Python 3.12+ compatibility and eliminates eventlet/asyncio conflicts # Note: store and changedetection_app are imported inside main() to avoid # initialization before argument parsing (allows --help to work without loading everything) # ============================================================================== # Multiprocessing Configuration - CRITICAL for Thread Safety # ============================================================================== # # PROBLEM: Python 3.12+ warns about fork() with multi-threaded processes: # "This process is multi-threaded, use of fork() may lead to deadlocks" # # WHY IT'S DANGEROUS: # 1. This Flask app has multiple threads (HTTP handlers, workers, SocketIO) # 2. fork() copies ONLY the calling thread to the child process # 3. BUT fork() also copies all locks/mutexes in their current state # 4. If another thread held a lock during fork() → child has locked lock with no owner # 5. Result: PERMANENT DEADLOCK if child tries to acquire that lock # # SOLUTION: Use 'spawn' instead of 'fork' # - spawn starts a fresh Python interpreter (no inherited threads or locks) # - Slower (~200ms vs ~1ms) but safe with multi-threaded parent # - Consistent across all platforms (Windows already uses spawn by default) # # IMPLEMENTATION: # 1. Explicit contexts everywhere (primary protection): # - playwright.py: ctx = multiprocessing.get_context('spawn') # - puppeteer.py: ctx = multiprocessing.get_context('spawn') # - isolated_opencv.py: ctx = multiprocessing.get_context('spawn') # - isolated_libvips.py: ctx = multiprocessing.get_context('spawn') # # 2. Global default (defense-in-depth, below): # - Safety net if future code forgets explicit context # - Protects against third-party libraries using Process() # - Costs nothing (explicit contexts always override it) # # WHY BOTH? # - Explicit contexts: Clear, self-documenting, always works # - Global default: Safety net for forgotten contexts or library code # - If someone writes "Process()" instead of "ctx.Process()", still safe! # # See: https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods # ============================================================================== import multiprocessing import os import sys # Limit glibc malloc arena count to prevent RSS growth from concurrent requests. # Default: glibc creates up to 8×CPU_cores arenas. Each concurrent thread/connection # can trigger a new arena, and freed memory stays mapped in those arenas as RSS forever. # With MALLOC_ARENA_MAX=2, at most 2 arenas are used; freed pages return to the OS faster. # Must be set before worker threads start; env var is read lazily by glibc on first arena creation. if 'MALLOC_ARENA_MAX' not in os.environ: os.environ['MALLOC_ARENA_MAX'] = '2' try: import ctypes as _ctypes _ctypes.CDLL('libc.so.6').mallopt(-8, 2) # M_ARENA_MAX = -8 except Exception: pass # Set spawn as global default (safety net - all our code uses explicit contexts anyway) # Skip in tests to avoid breaking pytest-flask's LiveServer fixture (uses unpicklable local functions) if 'pytest' not in sys.modules: try: if multiprocessing.get_start_method(allow_none=True) is None: multiprocessing.set_start_method('spawn', force=False) logger.debug("Set multiprocessing default to 'spawn' for thread safety (explicit contexts used everywhere)") except RuntimeError: logger.debug(f"Multiprocessing start method already set: {multiprocessing.get_start_method()}") # Only global so we can access it in the signal handler app = None datastore = None def get_version(): return __version__ # Parent wrapper or OS sends us a SIGTERM/SIGINT, do everything required for a clean shutdown def sigshutdown_handler(_signo, _stack_frame): name = signal.Signals(_signo).name logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Fast shutdown initiated') # Set exit flag immediately to stop all loops app.config.exit.set() datastore.stop_thread = True # Log memory consumption before shutting down workers (cross-platform) try: import psutil process = psutil.Process() mem_info = process.memory_info() rss_mb = mem_info.rss / 1024 / 1024 vms_mb = mem_info.vms / 1024 / 1024 logger.info(f"Memory consumption before worker shutdown: RSS={rss_mb:,.2f} MB, VMS={vms_mb:,.2f} MB") except Exception as e: logger.warning(f"Could not retrieve memory stats: {str(e)}") # Shutdown workers and queues immediately try: from changedetectionio import worker_pool worker_pool.shutdown_workers() except Exception as e: logger.error(f"Error shutting down workers: {str(e)}") # Close janus queues properly try: from changedetectionio.flask_app import update_q, notification_q update_q.close() notification_q.close() logger.debug("Queues closed successfully") except Exception as e: logger.critical(f"CRITICAL: Failed to close queues: {e}") # Shutdown socketio server fast from changedetectionio.flask_app import socketio_server if socketio_server and hasattr(socketio_server, 'shutdown'): try: socketio_server.shutdown() except Exception as e: logger.error(f"Error shutting down Socket.IO server: {str(e)}") # With immediate persistence, all data is already saved logger.success('All data already persisted (immediate commits enabled).') sys.exit() def print_help(): """Print help text for command line options""" print('Usage: changedetection.py [options]') print('') print('Standard options:') print(' -s SSL enable') print(' -h HOST Listen host (default: 0.0.0.0)') print(' -p PORT Listen port (default: 5000)') print(' -d PATH Datastore path') print(' -l LEVEL Log level (TRACE, DEBUG, INFO, SUCCESS, WARNING, ERROR, CRITICAL)') print(' -c Cleanup unused snapshots') print(' -C Create datastore directory if it doesn\'t exist') print(' -P true/false Set all watches paused (true) or active (false)') print('') print('Add URLs on startup:') print(' -u URL Add URL to watch (can be used multiple times)') print(' -u0 \'JSON\' Set options for first -u URL (e.g. \'{"processor":"text_json_diff"}\')') print(' -u1 \'JSON\' Set options for second -u URL (0-indexed)') print(' -u2 \'JSON\' Set options for third -u URL, etc.') print(' Available options: processor, fetch_backend, headers, method, etc.') print(' See model/Watch.py for all available options') print('') print('Recheck on startup:') print(' -r all Queue all watches for recheck on startup') print(' -r UUID,... Queue specific watches (comma-separated UUIDs)') print(' -r all N Queue all watches, wait for completion, repeat N times') print(' -r UUID,... N Queue specific watches, wait for completion, repeat N times') print('') print('Batch mode:') print(' -b Run in batch mode (process queue then exit)') print(' Useful for CI/CD, cron jobs, or one-time checks') print(' NOTE: Batch mode checks if Flask is running and aborts if port is in use') print(' Use -p PORT to specify a different port if needed') print('') def main(): global datastore global app # Early help/version check before any initialization if '--help' in sys.argv or '-help' in sys.argv: print_help() sys.exit(0) if '--version' in sys.argv or '-v' in sys.argv: print(f'changedetection.io {__version__}') sys.exit(0) # Import heavy modules after help/version checks to keep startup fast for those flags from changedetectionio import store from changedetectionio.flask_app import changedetection_app datastore_path = None # Set a default logger level logger_level = 'DEBUG' include_default_watches = True all_paused = None # None means don't change, True/False to set host = os.environ.get("LISTEN_HOST", "0.0.0.0").strip() port = int(os.environ.get('PORT', 5000)) ssl_mode = False # Lists for multiple URLs and their options urls_to_add = [] url_options = {} # Key: index (0-based), Value: dict of options recheck_watches = None # None, 'all', or list of UUIDs recheck_repeat_count = 1 # Number of times to repeat recheck cycle batch_mode = False # Run once then exit when queue is empty # On Windows, create and use a default path. if os.name == 'nt': datastore_path = os.path.expandvars(r'%APPDATA%\changedetection.io') os.makedirs(datastore_path, exist_ok=True) else: # Must be absolute so that send_from_directory doesnt try to make it relative to backend/ datastore_path = os.path.join(os.getcwd(), "../datastore") # Pre-process arguments to extract -u, -u, and -r options before getopt # This allows unlimited -u0, -u1, -u2, ... options without predefining them cleaned_argv = ['changedetection.py'] # Start with program name i = 1 while i < len(sys.argv): arg = sys.argv[i] # Handle -u (add URL) if arg == '-u' and i + 1 < len(sys.argv): urls_to_add.append(sys.argv[i + 1]) i += 2 continue # Handle -u (set options for URL at index N) if arg.startswith('-u') and len(arg) > 2 and arg[2:].isdigit(): idx = int(arg[2:]) if i + 1 < len(sys.argv): try: import json url_options[idx] = json.loads(sys.argv[i + 1]) except json.JSONDecodeError as e: print(f'Error: Invalid JSON for {arg}: {sys.argv[i + 1]}') print(f'JSON decode error: {e}') sys.exit(2) i += 2 continue # Handle -r (recheck watches) if arg == '-r' and i + 1 < len(sys.argv): recheck_arg = sys.argv[i + 1] if recheck_arg.lower() == 'all': recheck_watches = 'all' else: # Parse comma-separated list of UUIDs recheck_watches = [uuid.strip() for uuid in recheck_arg.split(',') if uuid.strip()] # Check for optional repeat count as third argument if i + 2 < len(sys.argv) and sys.argv[i + 2].isdigit(): recheck_repeat_count = int(sys.argv[i + 2]) if recheck_repeat_count < 1: print(f'Error: Repeat count must be at least 1, got {recheck_repeat_count}') sys.exit(2) i += 3 else: i += 2 continue # Handle -b (batch mode - run once and exit) if arg == '-b': batch_mode = True i += 1 continue # Keep other arguments for getopt cleaned_argv.append(arg) i += 1 try: opts, args = getopt.getopt(cleaned_argv[1:], "6Csd:h:p:l:P:", "port") except getopt.GetoptError as e: print_help() print(f'Error: {e}') sys.exit(2) create_datastore_dir = False # Set a logger level via shell env variable # Used: Dockerfile for CICD # To set logger level for pytest, see the app function in tests/conftest.py if os.getenv("LOGGER_LEVEL"): level = os.getenv("LOGGER_LEVEL") logger_level = int(level) if level.isdigit() else level.upper() for opt, arg in opts: if opt == '-s': ssl_mode = True if opt == '-h': host = arg if opt == '-p': port = int(arg) if opt == '-d': datastore_path = arg # Create the datadir if it doesnt exist if opt == '-C': create_datastore_dir = True if opt == '-l': logger_level = int(arg) if arg.isdigit() else arg.upper() if opt == '-P': try: all_paused = bool(strtobool(arg)) except ValueError: print(f'Error: Invalid value for -P option: {arg}') print('Expected: true, false, yes, no, 1, or 0') sys.exit(2) # If URLs are provided, don't include default watches if urls_to_add: include_default_watches = False logger.success(f"changedetection.io version {get_version()} starting.") # Launch using SocketIO run method for proper integration (if enabled) ssl_cert_file = os.getenv("SSL_CERT_FILE", 'cert.pem') ssl_privkey_file = os.getenv("SSL_PRIVKEY_FILE", 'privkey.pem') if os.getenv("SSL_CERT_FILE") and os.getenv("SSL_PRIVKEY_FILE"): ssl_mode = True # SSL mode could have been set by -s too, therefor fallback to default values if ssl_mode: if not os.path.isfile(ssl_cert_file) or not os.path.isfile(ssl_privkey_file): logger.critical(f"Cannot start SSL/HTTPS mode, Please be sure that {ssl_cert_file}' and '{ssl_privkey_file}' exist in in {os.getcwd()}") os._exit(2) # Without this, a logger will be duplicated logger.remove() try: log_level_for_stdout = { 'TRACE', 'DEBUG', 'INFO', 'SUCCESS' } logger.configure(handlers=[ {"sink": sys.stdout, "level": logger_level, "filter" : lambda record: record['level'].name in log_level_for_stdout}, {"sink": sys.stderr, "level": logger_level, "filter": lambda record: record['level'].name not in log_level_for_stdout}, ]) # Catch negative number or wrong log level name except ValueError: print("Available log level names: TRACE, DEBUG(default), INFO, SUCCESS," " WARNING, ERROR, CRITICAL") sys.exit(2) # Disable verbose pyppeteer logging to prevent memory leaks from large CDP messages # Set both parent and child loggers since pyppeteer hardcodes DEBUG level logging.getLogger('pyppeteer.connection').setLevel(logging.WARNING) logging.getLogger('pyppeteer.connection.Connection').setLevel(logging.WARNING) # isnt there some @thingy to attach to each route to tell it, that this route needs a datastore app_config = { 'datastore_path': datastore_path, 'batch_mode': batch_mode, 'recheck_watches': recheck_watches, 'recheck_repeat_count': recheck_repeat_count } if not os.path.isdir(app_config['datastore_path']): if create_datastore_dir: os.makedirs(app_config['datastore_path'], exist_ok=True) else: logger.critical( f"ERROR: Directory path for the datastore '{app_config['datastore_path']}'" f" does not exist, cannot start, please make sure the" f" directory exists or specify a directory with the -d option.\n" f"Or use the -C parameter to create the directory.") sys.exit(2) try: datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__, include_default_watches=include_default_watches) except JSONDecodeError as e: # Dont' start if the JSON DB looks corrupt logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.") logger.critical(str(e)) sys.exit(1) # Testing mode: Exit cleanly after datastore initialization (for CI/CD upgrade tests) if os.environ.get('TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD'): logger.success(f"TESTING MODE: Datastore loaded successfully from {app_config['datastore_path']}") logger.success(f"TESTING MODE: Schema version: {datastore.data['settings']['application'].get('schema_version', 'unknown')}") logger.success(f"TESTING MODE: Loaded {len(datastore.data['watching'])} watches") logger.success("TESTING MODE: Exiting cleanly (TESTING_SHUTDOWN_AFTER_DATASTORE_LOAD is set)") sys.exit(0) # Apply all_paused setting if specified via CLI if all_paused is not None: datastore.data['settings']['application']['all_paused'] = all_paused logger.info(f"Setting all watches paused: {all_paused}") # Inject datastore into plugins that need access to settings from changedetectionio.pluggy_interface import inject_datastore_into_plugins inject_datastore_into_plugins(datastore) # Step 1: Add URLs with their options (if provided via -u flags) added_watch_uuids = [] if urls_to_add: logger.info(f"Adding {len(urls_to_add)} URL(s) from command line") for idx, url in enumerate(urls_to_add): extras = url_options.get(idx, {}) if extras: logger.debug(f"Adding watch {idx}: {url} with options: {extras}") else: logger.debug(f"Adding watch {idx}: {url}") new_uuid = datastore.add_watch(url=url, extras=extras) if new_uuid: added_watch_uuids.append(new_uuid) logger.success(f"Added watch: {url} (UUID: {new_uuid})") else: logger.error(f"Failed to add watch: {url}") app = changedetection_app(app_config, datastore) # Step 2: Queue newly added watches (if -u was provided in batch mode) # This must happen AFTER app initialization so update_q is available if batch_mode and added_watch_uuids: from changedetectionio.flask_app import update_q from changedetectionio import queuedWatchMetaData, worker_pool logger.info(f"Batch mode: Queuing {len(added_watch_uuids)} newly added watches") for watch_uuid in added_watch_uuids: try: worker_pool.queue_item_async_safe( update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}) ) logger.debug(f"Queued newly added watch: {watch_uuid}") except Exception as e: logger.error(f"Failed to queue watch {watch_uuid}: {e}") # Step 3: Queue watches for recheck (if -r was provided) # This must happen AFTER app initialization so update_q is available if recheck_watches is not None: from changedetectionio.flask_app import update_q from changedetectionio import queuedWatchMetaData, worker_pool watches_to_queue = [] if recheck_watches == 'all': # Queue all watches, excluding those already queued in batch mode all_watches = list(datastore.data['watching'].keys()) if batch_mode and added_watch_uuids: # Exclude newly added watches that were already queued in batch mode watches_to_queue = [uuid for uuid in all_watches if uuid not in added_watch_uuids] logger.info(f"Queuing {len(watches_to_queue)} existing watches for recheck ({len(added_watch_uuids)} newly added watches already queued)") else: watches_to_queue = all_watches logger.info(f"Queuing all {len(watches_to_queue)} watches for recheck") else: # Queue specific UUIDs watches_to_queue = recheck_watches logger.info(f"Queuing {len(watches_to_queue)} specific watches for recheck") queued_count = 0 for watch_uuid in watches_to_queue: if watch_uuid in datastore.data['watching']: try: worker_pool.queue_item_async_safe( update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}) ) queued_count += 1 logger.debug(f"Queued watch for recheck: {watch_uuid}") except Exception as e: logger.error(f"Failed to queue watch {watch_uuid}: {e}") else: logger.warning(f"Watch UUID not found in datastore: {watch_uuid}") logger.success(f"Successfully queued {queued_count} watches for recheck") # Step 4: Setup batch mode monitor (if -b was provided) if batch_mode: from changedetectionio.flask_app import update_q # Safety check: Ensure Flask app is not already running on this port # Batch mode should never run alongside the web server import socket test_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: # Try to bind to the configured host:port (no SO_REUSEADDR - strict check) test_socket.bind((host, port)) test_socket.close() logger.debug(f"Batch mode: Port {port} is available (Flask app not running)") except OSError as e: test_socket.close() # errno 98 = EADDRINUSE (Linux) # errno 48 = EADDRINUSE (macOS) # errno 10048 = WSAEADDRINUSE (Windows) if e.errno in (48, 98, 10048) or "Address already in use" in str(e) or "already in use" in str(e).lower(): logger.critical(f"ERROR: Batch mode cannot run - port {port} is already in use") logger.critical(f"The Flask web server appears to be running on {host}:{port}") logger.critical(f"Batch mode is designed for standalone operation (CI/CD, cron jobs, etc.)") logger.critical(f"Please either stop the Flask web server, or use a different port with -p PORT") sys.exit(1) else: # Some other socket error - log but continue (might be network configuration issue) logger.warning(f"Port availability check failed with unexpected error: {e}") logger.warning(f"Continuing with batch mode anyway - be aware of potential conflicts") def queue_watches_for_recheck(datastore, iteration): """Helper function to queue watches for recheck""" watches_to_queue = [] if recheck_watches == 'all': all_watches = list(datastore.data['watching'].keys()) if batch_mode and added_watch_uuids and iteration == 1: # Only exclude newly added watches on first iteration watches_to_queue = [uuid for uuid in all_watches if uuid not in added_watch_uuids] else: watches_to_queue = all_watches logger.info(f"Batch mode (iteration {iteration}): Queuing all {len(watches_to_queue)} watches") elif recheck_watches: watches_to_queue = recheck_watches logger.info(f"Batch mode (iteration {iteration}): Queuing {len(watches_to_queue)} specific watches") queued_count = 0 for watch_uuid in watches_to_queue: if watch_uuid in datastore.data['watching']: try: worker_pool.queue_item_async_safe( update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}) ) queued_count += 1 except Exception as e: logger.error(f"Failed to queue watch {watch_uuid}: {e}") else: logger.warning(f"Watch UUID not found in datastore: {watch_uuid}") logger.success(f"Batch mode (iteration {iteration}): Successfully queued {queued_count} watches") return queued_count def batch_mode_monitor(): """Monitor queue and workers, shutdown or repeat when work is complete""" import time # Track iterations if repeat mode is enabled current_iteration = 1 total_iterations = recheck_repeat_count if recheck_watches and recheck_repeat_count > 1 else 1 if total_iterations > 1: logger.info(f"Batch mode: Will repeat recheck {total_iterations} times") else: logger.info("Batch mode: Waiting for all queued items to complete...") # Wait a bit for workers to start processing time.sleep(3) try: while current_iteration <= total_iterations: logger.info(f"Batch mode: Waiting for iteration {current_iteration}/{total_iterations} to complete...") # Use the shared wait_for_all_checks function completed = worker_pool.wait_for_all_checks(update_q, timeout=300) if not completed: logger.warning(f"Batch mode: Iteration {current_iteration} timed out after 300 seconds") logger.success(f"Batch mode: Iteration {current_iteration}/{total_iterations} completed") # Check if we need to repeat if current_iteration < total_iterations: logger.info(f"Batch mode: Starting iteration {current_iteration + 1}...") current_iteration += 1 # Re-queue watches for next iteration queue_watches_for_recheck(datastore, current_iteration) # Brief pause before continuing time.sleep(2) else: # All iterations complete logger.success(f"Batch mode: All {total_iterations} iterations completed, initiating shutdown") # Trigger shutdown import os, signal os.kill(os.getpid(), signal.SIGTERM) return except Exception as e: logger.error(f"Batch mode monitor error: {e}") logger.error(f"Initiating emergency shutdown") import os, signal os.kill(os.getpid(), signal.SIGTERM) # Start monitor in background thread monitor_thread = threading.Thread(target=batch_mode_monitor, daemon=True, name="BatchModeMonitor") monitor_thread.start() logger.info("Batch mode enabled: Will exit after all queued items are processed") # Get the SocketIO instance from the Flask app (created in flask_app.py) from changedetectionio.flask_app import socketio_server global socketio socketio = socketio_server signal.signal(signal.SIGTERM, sigshutdown_handler) signal.signal(signal.SIGINT, sigshutdown_handler) # Custom signal handler for memory cleanup def sigusr_clean_handler(_signo, _stack_frame): from changedetectionio.gc_cleanup import memory_cleanup logger.info('SIGUSR1 received: Running memory cleanup') return memory_cleanup(app) # Register the SIGUSR1 signal handler # Only register the signal handler if running on Linux if platform.system() == "Linux": signal.signal(signal.SIGUSR1, sigusr_clean_handler) else: logger.info("SIGUSR1 handler only registered on Linux, skipped.") app.config['datastore_path'] = datastore_path @app.context_processor def inject_template_globals(): return dict(right_sticky="v"+__version__, new_version_available=app.config['NEW_VERSION_AVAILABLE'], has_password=datastore.data['settings']['application']['password'] != False, socket_io_enabled=datastore.data['settings']['application'].get('ui', {}).get('socket_io_enabled', True), all_paused=datastore.data['settings']['application'].get('all_paused', False), all_muted=datastore.data['settings']['application'].get('all_muted', False) ) # Monitored websites will not receive a Referer header when a user clicks on an outgoing link. @app.after_request def hide_referrer(response): if strtobool(os.getenv("HIDE_REFERER", 'false')): response.headers["Referrer-Policy"] = "same-origin" return response # Proxy sub-directory support # Set environment var USE_X_SETTINGS=1 on this script # And then in your proxy_pass settings # # proxy_set_header Host "localhost"; # proxy_set_header X-Forwarded-Prefix /app; if os.getenv('USE_X_SETTINGS'): logger.info("USE_X_SETTINGS is ENABLED") from werkzeug.middleware.proxy_fix import ProxyFix app.wsgi_app = ProxyFix( app.wsgi_app, x_for=1, # X-Forwarded-For (client IP) x_proto=1, # X-Forwarded-Proto (http/https) x_host=1, # X-Forwarded-Host (original host) x_port=1, # X-Forwarded-Port (original port) x_prefix=1 # X-Forwarded-Prefix (URL prefix) ) # In batch mode, skip starting the HTTP server - just keep workers running if batch_mode: logger.info("Batch mode: Skipping HTTP server startup, workers will process queue") logger.info("Batch mode: Main thread will wait for shutdown signal") # Keep main thread alive until batch monitor triggers shutdown try: while True: time.sleep(1) except KeyboardInterrupt: logger.info("Batch mode: Keyboard interrupt received") pass else: # Normal mode: Start HTTP server # SocketIO instance is already initialized in flask_app.py if socketio_server: if ssl_mode: logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}") socketio.run(app, host=host, port=int(port), debug=False, ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True) else: socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True) else: # Run Flask app without Socket.IO if disabled logger.info("Starting Flask app without Socket.IO server") if ssl_mode: logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}") app.run(host=host, port=int(port), debug=False, ssl_context=(ssl_cert_file, ssl_privkey_file)) else: app.run(host=host, port=int(port), debug=False) ================================================ FILE: changedetectionio/api/Import.py ================================================ from changedetectionio.strtobool import strtobool from flask_restful import abort, Resource from flask import request from functools import wraps from . import auth, validate_openapi_request from ..validate_url import is_safe_valid_url import json # Number of URLs above which import switches to background processing IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD = 20 def default_content_type(content_type='text/plain'): """Decorator to set a default Content-Type header if none is provided.""" def decorator(f): @wraps(f) def wrapper(*args, **kwargs): if not request.content_type: # Set default content type in the request environment request.environ['CONTENT_TYPE'] = content_type return f(*args, **kwargs) return wrapper return decorator def convert_query_param_to_type(value, schema_property): """ Convert a query parameter string to the appropriate type based on schema definition. Args: value: String value from query parameter schema_property: Schema property definition with 'type' or 'anyOf' field Returns: Converted value in the appropriate type Supports both OpenAPI 3.1 formats: - type: [string, 'null'] (array format) - anyOf: [{type: string}, {type: null}] (anyOf format) """ prop_type = schema_property.get('type') # Handle OpenAPI 3.1 type arrays: type: [string, 'null'] if isinstance(prop_type, list): # Use the first non-null type from the array for t in prop_type: if t != 'null': prop_type = t break else: prop_type = None # Handle anyOf schemas (older format) elif 'anyOf' in schema_property: # Use the first non-null type from anyOf for option in schema_property['anyOf']: if option.get('type') and option.get('type') != 'null': prop_type = option.get('type') break else: prop_type = None # Handle array type (e.g., notification_urls) if prop_type == 'array': # Support both comma-separated and JSON array format if value.startswith('['): try: return json.loads(value) except json.JSONDecodeError: return [v.strip() for v in value.split(',')] return [v.strip() for v in value.split(',')] # Handle object type (e.g., time_between_check, headers) elif prop_type == 'object': try: return json.loads(value) except json.JSONDecodeError: raise ValueError(f"Invalid JSON object for field: {value}") # Handle boolean type elif prop_type == 'boolean': return strtobool(value) # Handle integer type elif prop_type == 'integer': return int(value) # Handle number type (float) elif prop_type == 'number': return float(value) # Default: return as string return value class Import(Resource): def __init__(self, **kwargs): # datastore is a black box dependency self.datastore = kwargs['datastore'] @auth.check_token @default_content_type('text/plain') #3547 #3542 @validate_openapi_request('importWatches') def post(self): """Import a list of watched URLs with optional watch configuration.""" from . import get_watch_schema_properties # Special parameters that are NOT watch configuration special_params = {'tag', 'tag_uuids', 'dedupe', 'proxy'} extras = {} # Handle special 'proxy' parameter if request.args.get('proxy'): plist = self.datastore.proxy_list if not request.args.get('proxy') in plist: proxy_list_str = ', '.join(plist) if plist else 'none configured' return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400 else: extras['proxy'] = request.args.get('proxy') # Handle special 'dedupe' parameter dedupe = strtobool(request.args.get('dedupe', 'true')) # Handle special 'tag' and 'tag_uuids' parameters tags = request.args.get('tag') tag_uuids = request.args.get('tag_uuids') if tag_uuids: tag_uuids = tag_uuids.split(',') # Extract ALL other query parameters as watch configuration # Get schema from OpenAPI spec (replaces old schema_create_watch) schema_properties = get_watch_schema_properties() for param_name, param_value in request.args.items(): # Skip special parameters if param_name in special_params: continue # Skip if not in schema (unknown parameter) if param_name not in schema_properties: return f"Unknown watch configuration parameter: {param_name}", 400 # Convert to appropriate type based on schema try: converted_value = convert_query_param_to_type(param_value, schema_properties[param_name]) extras[param_name] = converted_value except (ValueError, json.JSONDecodeError) as e: return f"Invalid value for parameter '{param_name}': {str(e)}", 400 # Validate processor if provided if 'processor' in extras: from changedetectionio.processors import available_processors available = [p[0] for p in available_processors()] if extras['processor'] not in available: return f"Invalid processor '{extras['processor']}'. Available processors: {', '.join(available)}", 400 # Validate fetch_backend if provided if 'fetch_backend' in extras: from changedetectionio.content_fetchers import available_fetchers available = [f[0] for f in available_fetchers()] # Also allow 'system' and extra_browser_* patterns is_valid = ( extras['fetch_backend'] == 'system' or extras['fetch_backend'] in available or extras['fetch_backend'].startswith('extra_browser_') ) if not is_valid: return f"Invalid fetch_backend '{extras['fetch_backend']}'. Available: system, {', '.join(available)}", 400 # Validate notification_urls if provided if 'notification_urls' in extras: from wtforms import ValidationError from changedetectionio.api.Notifications import validate_notification_urls try: validate_notification_urls(extras['notification_urls']) except ValidationError as e: return f"Invalid notification_urls: {str(e)}", 400 urls = request.get_data().decode('utf8').splitlines() # Clean and validate URLs upfront urls_to_import = [] for url in urls: url = url.strip() if not len(url): continue # Validate URL if not is_safe_valid_url(url): return f"Invalid or unsupported URL - {url}", 400 # Check for duplicates if dedupe is enabled if dedupe and self.datastore.url_exists(url): continue urls_to_import.append(url) # For small imports, process synchronously for immediate feedback if len(urls_to_import) < IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD: added = [] for url in urls_to_import: new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids) added.append(new_uuid) return added, 200 # For large imports (>= 20), process in background thread else: import threading from loguru import logger def import_watches_background(): """Background thread to import watches - discarded after completion.""" try: added_count = 0 for url in urls_to_import: try: self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids) added_count += 1 except Exception as e: logger.error(f"Error importing URL {url}: {e}") logger.info(f"Background import complete: {added_count} watches created") except Exception as e: logger.error(f"Error in background import: {e}") # Start background thread and return immediately thread = threading.Thread(target=import_watches_background, daemon=True, name="ImportWatches-Background") thread.start() return {'status': f'Importing {len(urls_to_import)} URLs in background', 'count': len(urls_to_import)}, 202 ================================================ FILE: changedetectionio/api/Notifications.py ================================================ from flask_restful import Resource, abort from flask import request from . import auth, validate_openapi_request class Notifications(Resource): def __init__(self, **kwargs): # datastore is a black box dependency self.datastore = kwargs['datastore'] @auth.check_token @validate_openapi_request('getNotifications') def get(self): """Return Notification URL List.""" notification_urls = self.datastore.data.get('settings', {}).get('application', {}).get('notification_urls', []) return { 'notification_urls': notification_urls, }, 200 @auth.check_token @validate_openapi_request('addNotifications') def post(self): """Create Notification URLs.""" json_data = request.get_json() notification_urls = json_data.get("notification_urls", []) from wtforms import ValidationError try: validate_notification_urls(notification_urls) except ValidationError as e: return str(e), 400 added_urls = [] for url in notification_urls: clean_url = url.strip() added_url = self.datastore.add_notification_url(clean_url) if added_url: added_urls.append(added_url) if not added_urls: return "No valid notification URLs were added", 400 return {'notification_urls': added_urls}, 201 @auth.check_token @validate_openapi_request('replaceNotifications') def put(self): """Replace Notification URLs.""" json_data = request.get_json() notification_urls = json_data.get("notification_urls", []) from wtforms import ValidationError try: validate_notification_urls(notification_urls) except ValidationError as e: return str(e), 400 if not isinstance(notification_urls, list): return "Invalid input format", 400 clean_urls = [url.strip() for url in notification_urls if isinstance(url, str)] self.datastore.data['settings']['application']['notification_urls'] = clean_urls self.datastore.commit() return {'notification_urls': clean_urls}, 200 @auth.check_token @validate_openapi_request('deleteNotifications') def delete(self): """Delete Notification URLs.""" json_data = request.get_json() urls_to_delete = json_data.get("notification_urls", []) if not isinstance(urls_to_delete, list): abort(400, message="Expected a list of notification URLs.") notification_urls = self.datastore.data['settings']['application'].get('notification_urls', []) deleted = [] for url in urls_to_delete: clean_url = url.strip() if clean_url in notification_urls: notification_urls.remove(clean_url) deleted.append(clean_url) if not deleted: abort(400, message="No matching notification URLs found.") self.datastore.data['settings']['application']['notification_urls'] = notification_urls self.datastore.commit() return 'OK', 204 def validate_notification_urls(notification_urls): from changedetectionio.forms import ValidateAppRiseServers validator = ValidateAppRiseServers() class DummyForm: pass dummy_form = DummyForm() field = type("Field", (object,), {"data": notification_urls, "gettext": lambda self, x: x})() validator(dummy_form, field) ================================================ FILE: changedetectionio/api/Search.py ================================================ from flask_restful import Resource, abort from flask import request from . import auth, validate_openapi_request class Search(Resource): def __init__(self, **kwargs): # datastore is a black box dependency self.datastore = kwargs['datastore'] @auth.check_token @validate_openapi_request('searchWatches') def get(self): """Search for watches by URL or title text.""" query = request.args.get('q', '').strip() tag_limit = request.args.get('tag', '').strip() from changedetectionio.strtobool import strtobool partial = bool(strtobool(request.args.get('partial', '0'))) if 'partial' in request.args else False # Require a search query if not query: abort(400, message="Search query 'q' parameter is required") # Use the search function from the datastore matching_uuids = self.datastore.search_watches_for_url(query=query, tag_limit=tag_limit, partial=partial) # Build the response with watch details results = {} for uuid in matching_uuids: watch = self.datastore.data['watching'].get(uuid) results[uuid] = { 'last_changed': watch.last_changed, 'last_checked': watch['last_checked'], 'last_error': watch['last_error'], 'title': watch['title'], 'url': watch['url'], 'viewed': watch.viewed } return results, 200 ================================================ FILE: changedetectionio/api/Spec.py ================================================ import functools from flask import make_response from flask_restful import Resource @functools.cache def _get_spec_yaml(): """Build and cache the merged spec as a YAML string (only serialized once per process).""" import yaml from changedetectionio.api import build_merged_spec_dict return yaml.dump(build_merged_spec_dict(), default_flow_style=False, allow_unicode=True) class Spec(Resource): def get(self): """Return the merged OpenAPI spec including all registered processor extensions.""" return make_response( _get_spec_yaml(), 200, {'Content-Type': 'application/yaml'} ) ================================================ FILE: changedetectionio/api/SystemInfo.py ================================================ from flask_restful import Resource from . import auth, validate_openapi_request class SystemInfo(Resource): def __init__(self, **kwargs): # datastore is a black box dependency self.datastore = kwargs['datastore'] self.update_q = kwargs['update_q'] @auth.check_token @validate_openapi_request('getSystemInfo') def get(self): """Return system info.""" import time overdue_watches = [] # Check all watches and report which have not been checked but should have been for uuid, watch in self.datastore.data.get('watching', {}).items(): # see if now - last_checked is greater than the time that should have been # this is not super accurate (maybe they just edited it) but better than nothing t = watch.threshold_seconds() if not t: # Use the system wide default t = self.datastore.threshold_seconds time_since_check = time.time() - watch.get('last_checked') # Allow 5 minutes of grace time before we decide it's overdue if time_since_check - (5 * 60) > t: overdue_watches.append(uuid) from changedetectionio import __version__ as main_version return { 'queue_size': self.update_q.qsize(), 'overdue_watches': overdue_watches, 'uptime': round(time.time() - self.datastore.start_time, 2), 'watch_count': len(self.datastore.data.get('watching', {})), 'version': main_version }, 200 ================================================ FILE: changedetectionio/api/Tags.py ================================================ from changedetectionio import queuedWatchMetaData from changedetectionio import worker_pool from flask_restful import abort, Resource from loguru import logger import threading from flask import request from . import auth from . import validate_openapi_request class Tag(Resource): def __init__(self, **kwargs): # datastore is a black box dependency self.datastore = kwargs['datastore'] self.update_q = kwargs['update_q'] # Get information about a single tag # curl http://localhost:5000/api/v1/tag/ @auth.check_token @validate_openapi_request('getTag') def get(self, uuid): """Get data for a single tag/group, toggle notification muting, or recheck all.""" tag = self.datastore.data['settings']['application']['tags'].get(uuid) if not tag: abort(404, message=f'No tag exists with the UUID of {uuid}') if request.args.get('recheck'): # Recheck all watches with this tag, including muted # First collect watches to queue watches_to_queue = [] for k in sorted(self.datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)): watch_uuid = k[0] watch = k[1] if not watch['paused'] and tag['uuid'] in watch['tags']: watches_to_queue.append(watch_uuid) # If less than 20 watches, queue synchronously for immediate feedback if len(watches_to_queue) < 20: for watch_uuid in watches_to_queue: worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})) return {'status': f'OK, queued {len(watches_to_queue)} watches for rechecking'}, 200 else: # 20+ watches - queue in background thread to avoid blocking API response def queue_watches_background(): """Background thread to queue watches - discarded after completion.""" try: for watch_uuid in watches_to_queue: worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})) logger.info(f"Background queueing complete for tag {tag['uuid']}: {len(watches_to_queue)} watches queued") except Exception as e: logger.error(f"Error in background queueing for tag {tag['uuid']}: {e}") # Start background thread and return immediately thread = threading.Thread(target=queue_watches_background, daemon=True, name=f"QueueTag-{tag['uuid'][:8]}") thread.start() return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202 if request.args.get('muted', '') == 'muted': tag['notification_muted'] = True tag.commit() return "OK", 200 elif request.args.get('muted', '') == 'unmuted': tag['notification_muted'] = False tag.commit() return "OK", 200 # Filter out Watch-specific runtime fields that don't apply to Tags (yet) # TODO: Future enhancement - aggregate these values from all Watches that have this tag: # - check_count: sum of all watches' check_count # - last_checked: most recent last_checked from all watches # - last_changed: most recent last_changed from all watches # - consecutive_filter_failures: count of watches with failures # - etc. # These come from watch_base inheritance but currently have no meaningful value for Tags watch_only_fields = { 'browser_steps_last_error_step', 'check_count', 'consecutive_filter_failures', 'content-type', 'fetch_time', 'last_changed', 'last_checked', 'last_error', 'last_notification_error', 'last_viewed', 'notification_alert_count', 'page_title', 'previous_md5', 'remote_server_reply' } # Create clean tag dict without Watch-specific fields clean_tag = {k: v for k, v in tag.items() if k not in watch_only_fields} return clean_tag @auth.check_token @validate_openapi_request('deleteTag') def delete(self, uuid): """Delete a tag/group and remove it from all watches.""" if not self.datastore.data['settings']['application']['tags'].get(uuid): abort(400, message='No tag exists with the UUID of {}'.format(uuid)) # Delete the tag, and any tag reference del self.datastore.data['settings']['application']['tags'][uuid] # Remove tag from all watches for watch_uuid, watch in self.datastore.data['watching'].items(): if watch.get('tags') and uuid in watch['tags']: watch['tags'].remove(uuid) watch.commit() return 'OK', 204 @auth.check_token @validate_openapi_request('updateTag') def put(self, uuid): """Update tag information.""" tag = self.datastore.data['settings']['application']['tags'].get(uuid) if not tag: abort(404, message='No tag exists with the UUID of {}'.format(uuid)) # Make a mutable copy of request.json for modification json_data = dict(request.json) # Validate notification_urls if provided if 'notification_urls' in json_data: from wtforms import ValidationError from changedetectionio.api.Notifications import validate_notification_urls try: notification_urls = json_data.get('notification_urls', []) validate_notification_urls(notification_urls) except ValidationError as e: return str(e), 400 # Filter out readOnly fields (extracted from OpenAPI spec Tag schema) # These are system-managed fields that should never be user-settable from . import get_readonly_tag_fields readonly_fields = get_readonly_tag_fields() # Tag model inherits from watch_base but has no @property attributes of its own # So we only need to filter readOnly fields for field in readonly_fields: json_data.pop(field, None) # Validate remaining fields - reject truly unknown fields # Get valid fields from Tag schema from . import get_tag_schema_properties valid_fields = set(get_tag_schema_properties().keys()) # Check for unknown fields unknown_fields = set(json_data.keys()) - valid_fields if unknown_fields: return f"Unknown field(s): {', '.join(sorted(unknown_fields))}", 400 tag.update(json_data) tag.commit() # Clear checksums for all watches using this tag to force reprocessing # Tag changes affect inherited configuration cleared_count = self.datastore.clear_checksums_for_tag(uuid) logger.info(f"Tag {uuid} updated via API, cleared {cleared_count} watch checksums") return "OK", 200 @auth.check_token @validate_openapi_request('createTag') def post(self): """Create a single tag/group.""" json_data = request.get_json() title = json_data.get("title",'').strip() # Validate that only valid fields are provided # Get valid fields from Tag schema from . import get_tag_schema_properties valid_fields = set(get_tag_schema_properties().keys()) # Check for unknown fields unknown_fields = set(json_data.keys()) - valid_fields if unknown_fields: return f"Unknown field(s): {', '.join(sorted(unknown_fields))}", 400 new_uuid = self.datastore.add_tag(title=title) if new_uuid: # Apply any extra fields (e.g. processor_config_restock_diff) beyond just title extra = {k: v for k, v in json_data.items() if k != 'title'} if extra: tag = self.datastore.data['settings']['application']['tags'].get(new_uuid) if tag: tag.update(extra) tag.commit() return {'uuid': new_uuid}, 201 else: return "Invalid or unsupported tag", 400 class Tags(Resource): def __init__(self, **kwargs): # datastore is a black box dependency self.datastore = kwargs['datastore'] @auth.check_token @validate_openapi_request('listTags') def get(self): """List tags/groups.""" result = {} for uuid, tag in self.datastore.data['settings']['application']['tags'].items(): result[uuid] = { 'date_created': tag.get('date_created', 0), 'notification_muted': tag.get('notification_muted', False), 'title': tag.get('title', ''), 'uuid': tag.get('uuid') } return result, 200 ================================================ FILE: changedetectionio/api/Watch.py ================================================ import os import threading from changedetectionio.validate_url import is_safe_valid_url from changedetectionio.favicon_utils import get_favicon_mime_type from . import auth from changedetectionio import queuedWatchMetaData, strtobool from changedetectionio import worker_pool from flask import request, make_response, send_from_directory from flask_restful import abort, Resource from loguru import logger import copy from . import validate_openapi_request, get_readonly_watch_fields from ..notification import valid_notification_formats from ..notification.handler import newline_re def validate_time_between_check_required(json_data): """ Validate that at least one time interval is specified when not using default settings. Returns None if valid, or error message string if invalid. Defaults to using global settings if time_between_check_use_default is not provided. """ # Default to using global settings if not specified use_default = json_data.get('time_between_check_use_default', True) # If using default settings, no validation needed if use_default: return None # If not using defaults, check if time_between_check exists and has at least one non-zero value time_check = json_data.get('time_between_check') if not time_check: # No time_between_check provided and not using defaults - this is an error return "At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings." # time_between_check exists, check if it has at least one non-zero value if any([ (time_check.get('weeks') or 0) > 0, (time_check.get('days') or 0) > 0, (time_check.get('hours') or 0) > 0, (time_check.get('minutes') or 0) > 0, (time_check.get('seconds') or 0) > 0 ]): return None # time_between_check exists but all values are 0 or empty - this is an error return "At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings." class Watch(Resource): def __init__(self, **kwargs): # datastore is a black box dependency self.datastore = kwargs['datastore'] self.update_q = kwargs['update_q'] # Get information about a single watch, excluding the history list (can be large) # curl http://localhost:5000/api/v1/watch/ # @todo - version2 - ?muted and ?paused should be able to be called together, return the watch struct not "OK" # ?recheck=true @auth.check_token @validate_openapi_request('getWatch') def get(self, uuid): """Get information about a single watch, recheck, pause, or mute.""" # Get watch reference first (for pause/mute operations) watch_obj = self.datastore.data['watching'].get(uuid) if not watch_obj: abort(404, message='No watch exists with the UUID of {}'.format(uuid)) # Create a dict copy for JSON response (with lock for thread safety) # This is much faster than deepcopy and doesn't copy the datastore reference # WARNING: dict() is a SHALLOW copy - nested dicts are shared with original! # Only safe because we only ADD scalar properties (line 97-101), never modify nested dicts # If you need to modify nested dicts, use: from copy import deepcopy; watch = deepcopy(dict(watch_obj)) with self.datastore.lock: watch = dict(watch_obj) if request.args.get('recheck'): worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) return "OK", 200 if request.args.get('paused', '') == 'paused': watch_obj.pause() watch_obj.commit() return "OK", 200 elif request.args.get('paused', '') == 'unpaused': watch_obj.unpause() watch_obj.commit() return "OK", 200 if request.args.get('muted', '') == 'muted': watch_obj.mute() watch_obj.commit() return "OK", 200 elif request.args.get('muted', '') == 'unmuted': watch_obj.unmute() watch_obj.commit() return "OK", 200 # Return without history, get that via another API call # Properties are not returned as a JSON, so add the required props manually watch['history_n'] = watch_obj.history_n # attr .last_changed will check for the last written text snapshot on change watch['last_changed'] = watch_obj.last_changed watch['viewed'] = watch_obj.viewed watch['link'] = watch_obj.link, return watch @auth.check_token @validate_openapi_request('deleteWatch') def delete(self, uuid): """Delete a watch and related history.""" if not self.datastore.data['watching'].get(uuid): abort(400, message='No watch exists with the UUID of {}'.format(uuid)) self.datastore.delete(uuid) return 'OK', 204 @auth.check_token @validate_openapi_request('updateWatch') def put(self, uuid): """Update watch information.""" watch = self.datastore.data['watching'].get(uuid) if not watch: abort(404, message='No watch exists with the UUID of {}'.format(uuid)) if request.json.get('proxy'): plist = self.datastore.proxy_list if not plist or request.json.get('proxy') not in plist: proxy_list_str = ', '.join(plist) if plist else 'none configured' return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400 # Validate time_between_check when not using defaults validation_error = validate_time_between_check_required(request.json) if validation_error: return validation_error, 400 # Validate notification_urls if provided if 'notification_urls' in request.json: from wtforms import ValidationError from changedetectionio.api.Notifications import validate_notification_urls try: notification_urls = request.json.get('notification_urls', []) validate_notification_urls(notification_urls) except ValidationError as e: return str(e), 400 # XSS etc protection - validate URL if it's being updated if 'url' in request.json: new_url = request.json.get('url') # URL must be a non-empty string if new_url is None: return "URL cannot be null", 400 if not isinstance(new_url, str): return "URL must be a string", 400 if not new_url.strip(): return "URL cannot be empty or whitespace only", 400 if not is_safe_valid_url(new_url.strip()): return "Invalid or unsupported URL format. URL must use http://, https://, or ftp:// protocol", 400 # Handle processor-config-* fields separately (save to JSON, not datastore) from changedetectionio import processors # Make a mutable copy of request.json for modification json_data = dict(request.json) # Extract and remove processor config fields from json_data processor_config_data = processors.extract_processor_config_from_form_data(json_data) # Filter out readOnly fields (extracted from OpenAPI spec Watch schema) # These are system-managed fields that should never be user-settable readonly_fields = get_readonly_watch_fields() # Also filter out @property attributes (computed/derived values from the model) # These are not stored and should be ignored in PUT requests from changedetectionio.model.Watch import model as WatchModel property_fields = WatchModel.get_property_names() # Combine both sets of fields to ignore fields_to_ignore = readonly_fields | property_fields # Remove all ignored fields from update data for field in fields_to_ignore: json_data.pop(field, None) # Validate remaining fields - reject truly unknown fields # Get valid fields from WatchBase schema from . import get_watch_schema_properties valid_fields = set(get_watch_schema_properties().keys()) # Also allow last_viewed (explicitly defined in UpdateWatch schema) valid_fields.add('last_viewed') # Check for unknown fields unknown_fields = set(json_data.keys()) - valid_fields if unknown_fields: return f"Unknown field(s): {', '.join(sorted(unknown_fields))}", 400 # Update watch with regular (non-processor-config) fields watch.update(json_data) watch.commit() # Save processor config to JSON file processors.save_processor_config(self.datastore, uuid, processor_config_data) return "OK", 200 class WatchHistory(Resource): def __init__(self, **kwargs): # datastore is a black box dependency self.datastore = kwargs['datastore'] # Get a list of available history for a watch by UUID # curl http://localhost:5000/api/v1/watch//history @auth.check_token @validate_openapi_request('getWatchHistory') def get(self, uuid): """Get a list of all historical snapshots available for a watch.""" watch = self.datastore.data['watching'].get(uuid) if not watch: abort(404, message='No watch exists with the UUID of {}'.format(uuid)) return watch.history, 200 class WatchSingleHistory(Resource): def __init__(self, **kwargs): # datastore is a black box dependency self.datastore = kwargs['datastore'] @auth.check_token @validate_openapi_request('getWatchSnapshot') def get(self, uuid, timestamp): """Get single snapshot from watch.""" watch = self.datastore.data['watching'].get(uuid) if not watch: abort(404, message=f"No watch exists with the UUID of {uuid}") if not len(watch.history): abort(404, message=f"Watch found but no history exists for the UUID {uuid}") if timestamp == 'latest': timestamp = list(watch.history.keys())[-1] # Validate that the timestamp exists in history if timestamp not in watch.history: abort(404, message=f"No history snapshot found for timestamp '{timestamp}'") if request.args.get('html'): content = watch.get_fetched_html(timestamp) if content: response = make_response(content, 200) response.mimetype = "text/html" else: response = make_response("No content found", 404) response.mimetype = "text/plain" else: content = watch.get_history_snapshot(timestamp=timestamp) response = make_response(content, 200) response.mimetype = "text/plain" return response class WatchHistoryDiff(Resource): """ Generate diff between two historical snapshots. Note: This API endpoint currently returns text-based diffs and works best with the text_json_diff processor. Future processor types (like image_diff, restock_diff) may want to implement their own specialized API endpoints for returning processor-specific data (e.g., price charts, image comparisons). The web UI diff page (/diff/) is processor-aware and delegates rendering to processors/{type}/difference.py::render() for processor-specific visualizations. """ def __init__(self, **kwargs): # datastore is a black box dependency self.datastore = kwargs['datastore'] @auth.check_token @validate_openapi_request('getWatchHistoryDiff') def get(self, uuid, from_timestamp, to_timestamp): """Generate diff between two historical snapshots.""" from changedetectionio import diff from changedetectionio.notification.handler import apply_service_tweaks watch = self.datastore.data['watching'].get(uuid) if not watch: abort(404, message=f"No watch exists with the UUID of {uuid}") if not len(watch.history): abort(404, message=f"Watch found but no history exists for the UUID {uuid}") history_keys = list(watch.history.keys()) # Handle 'latest' keyword for to_timestamp if to_timestamp == 'latest': to_timestamp = history_keys[-1] # Handle 'previous' keyword for from_timestamp (second-most-recent) if from_timestamp == 'previous': if len(history_keys) < 2: abort(404, message=f"Not enough history entries. Need at least 2 snapshots for 'previous'") from_timestamp = history_keys[-2] # Validate timestamps exist if from_timestamp not in watch.history: abort(404, message=f"From timestamp {from_timestamp} not found in watch history") if to_timestamp not in watch.history: abort(404, message=f"To timestamp {to_timestamp} not found in watch history") # Get the format parameter (default to 'text') output_format = request.args.get('format', 'text').lower() # Validate format if output_format not in valid_notification_formats.keys(): abort(400, message=f"Invalid format. Must be one of: {', '.join(valid_notification_formats.keys())}") # Get the word_diff parameter (default to False - line-level mode) word_diff = strtobool(request.args.get('word_diff', 'false')) # Get the no_markup parameter (default to False) no_markup = strtobool(request.args.get('no_markup', 'false')) # Retrieve snapshot contents from_version_file_contents = watch.get_history_snapshot(from_timestamp) to_version_file_contents = watch.get_history_snapshot(to_timestamp) # Get diff preferences from query parameters (matching UI preferences in DIFF_PREFERENCES_CONFIG) # Support both 'type' (UI parameter) and 'word_diff' (API parameter) for backward compatibility diff_type = request.args.get('type', 'diffLines') if diff_type == 'diffWords': word_diff = True # Get boolean diff preferences with defaults from DIFF_PREFERENCES_CONFIG changes_only = strtobool(request.args.get('changesOnly', 'false')) ignore_whitespace = strtobool(request.args.get('ignoreWhitespace', 'false')) include_removed = strtobool(request.args.get('removed', 'true')) include_added = strtobool(request.args.get('added', 'true')) include_replaced = strtobool(request.args.get('replaced', 'true')) # Generate the diff with all preferences content = diff.render_diff( previous_version_file_contents=from_version_file_contents, newest_version_file_contents=to_version_file_contents, ignore_junk=ignore_whitespace, include_equal=not changes_only, include_removed=include_removed, include_added=include_added, include_replaced=include_replaced, word_diff=word_diff, ) # Skip formatting if no_markup is set if no_markup: mimetype = "text/plain" else: # Apply formatting based on the requested format if output_format == 'htmlcolor': from changedetectionio.notification.handler import apply_html_color_to_body content = apply_html_color_to_body(n_body=content) mimetype = "text/html" else: # Apply service tweaks for text/html formats # Pass empty URL and title as they're not used for the placeholder replacement we need _, content, _ = apply_service_tweaks( url='', n_body=content, n_title='', requested_output_format=output_format ) mimetype = "text/html" if output_format == 'html' else "text/plain" if 'html' in output_format: content = newline_re.sub('
\r\n', content) response = make_response(content, 200) response.mimetype = mimetype return response class WatchFavicon(Resource): def __init__(self, **kwargs): # datastore is a black box dependency self.datastore = kwargs['datastore'] @auth.check_token @validate_openapi_request('getWatchFavicon') def get(self, uuid): """Get favicon for a watch.""" watch = self.datastore.data['watching'].get(uuid) if not watch: abort(404, message=f"No watch exists with the UUID of {uuid}") favicon_filename = watch.get_favicon_filename() if favicon_filename: # Use cached MIME type detection filepath = os.path.join(watch.data_dir, favicon_filename) mime = get_favicon_mime_type(filepath) response = make_response(send_from_directory(watch.data_dir, favicon_filename)) response.headers['Content-type'] = mime response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate return response abort(404, message=f'No Favicon available for {uuid}') class CreateWatch(Resource): def __init__(self, **kwargs): # datastore is a black box dependency self.datastore = kwargs['datastore'] self.update_q = kwargs['update_q'] @auth.check_token @validate_openapi_request('createWatch') def post(self): """Create a single watch.""" json_data = request.get_json() url = json_data['url'].strip() if not is_safe_valid_url(url): return "Invalid or unsupported URL", 400 if json_data.get('proxy'): plist = self.datastore.proxy_list if not plist or json_data.get('proxy') not in plist: proxy_list_str = ', '.join(plist) if plist else 'none configured' return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400 # Validate time_between_check when not using defaults validation_error = validate_time_between_check_required(json_data) if validation_error: return validation_error, 400 # Validate notification_urls if provided if 'notification_urls' in json_data: from wtforms import ValidationError from changedetectionio.api.Notifications import validate_notification_urls try: notification_urls = json_data.get('notification_urls', []) validate_notification_urls(notification_urls) except ValidationError as e: return str(e), 400 # Handle processor-config-* fields separately (save to JSON, not watch) from changedetectionio import processors extras = copy.deepcopy(json_data) # Extract and remove processor config fields from extras processor_config_data = processors.extract_processor_config_from_form_data(extras) # Because we renamed 'tag' to 'tags' but don't want to change the API (can do this in v2 of the API) tags = None if extras.get('tag'): tags = extras.get('tag') del extras['tag'] del extras['url'] new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags) # Save processor config to separate JSON file if new_uuid and processor_config_data: processors.save_processor_config(self.datastore, new_uuid, processor_config_data) if new_uuid: # Dont queue because the scheduler will check that it hasnt been checked before anyway # worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid})) return {'uuid': new_uuid}, 201 else: # Check if it was a limit issue page_watch_limit = os.getenv('PAGE_WATCH_LIMIT') if page_watch_limit: try: page_watch_limit = int(page_watch_limit) current_watch_count = len(self.datastore.data['watching']) if current_watch_count >= page_watch_limit: return f"Watch limit reached ({current_watch_count}/{page_watch_limit} watches). Cannot add more watches.", 429 except ValueError: pass return "Invalid or unsupported URL", 400 @auth.check_token @validate_openapi_request('listWatches') def get(self): """List watches.""" list = {} tag_limit = request.args.get('tag', '').lower() for uuid, watch in self.datastore.data['watching'].items(): # Watch tags by name (replace the other calls?) tags = self.datastore.get_all_tags_for_watch(uuid=uuid) if tag_limit and not any(v.get('title').lower() == tag_limit for k, v in tags.items()): continue list[uuid] = { 'last_changed': watch.last_changed, 'last_checked': watch['last_checked'], 'last_error': watch['last_error'], 'link': watch.link, 'page_title': watch['page_title'], 'tags': [*tags], # Unpack dict keys to list (can't use list() since variable named 'list') 'title': watch['title'], 'url': watch['url'], 'viewed': watch.viewed } if request.args.get('recheck_all'): # Collect all watches to queue watches_to_queue = self.datastore.data['watching'].keys() # If less than 20 watches, queue synchronously for immediate feedback if len(watches_to_queue) < 20: # Get already queued/running UUIDs once (efficient) queued_uuids = set(self.update_q.get_queued_uuids()) running_uuids = set(worker_pool.get_running_uuids()) # Filter out watches that are already queued or running watches_to_queue_filtered = [ uuid for uuid in watches_to_queue if uuid not in queued_uuids and uuid not in running_uuids ] # Queue only the filtered watches for uuid in watches_to_queue_filtered: worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) # Provide feedback about skipped watches skipped_count = len(watches_to_queue) - len(watches_to_queue_filtered) if skipped_count > 0: return {'status': f'OK, queued {len(watches_to_queue_filtered)} watches for rechecking ({skipped_count} already queued or running)'}, 200 else: return {'status': f'OK, queued {len(watches_to_queue_filtered)} watches for rechecking'}, 200 else: # 20+ watches - queue in background thread to avoid blocking API response # Capture queued/running state before background thread queued_uuids = set(self.update_q.get_queued_uuids()) running_uuids = set(worker_pool.get_running_uuids()) def queue_all_watches_background(): """Background thread to queue all watches - discarded after completion.""" try: queued_count = 0 skipped_count = 0 for uuid in watches_to_queue: # Check if already queued or running (state captured at start) if uuid not in queued_uuids and uuid not in running_uuids: worker_pool.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) queued_count += 1 else: skipped_count += 1 logger.info(f"Background queueing complete: {queued_count} watches queued, {skipped_count} skipped (already queued/running)") except Exception as e: logger.error(f"Error in background queueing all watches: {e}") # Start background thread and return immediately thread = threading.Thread(target=queue_all_watches_background, daemon=True, name="QueueAllWatches-Background") thread.start() return {'status': f'OK, queueing {len(watches_to_queue)} watches in background'}, 202 return list, 200 ================================================ FILE: changedetectionio/api/__init__.py ================================================ import functools from flask import request, abort from loguru import logger @functools.cache def build_merged_spec_dict(): """ Load the base OpenAPI spec and merge in any per-processor api.yaml extensions. Each processor can provide an api.yaml file alongside its __init__.py that defines additional schemas (e.g., processor_config_restock_diff). These are merged into WatchBase.properties so the spec accurately reflects what the API accepts. Plugin processors (via pluggy) are also supported - they just need an api.yaml next to their processor module. Returns the merged dict (cached - do not mutate the returned value). """ import os import yaml spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml') if not os.path.exists(spec_path): spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml') with open(spec_path, 'r', encoding='utf-8') as f: spec_dict = yaml.safe_load(f) try: from changedetectionio.processors import find_processors, get_parent_module for module, proc_name in find_processors(): parent = get_parent_module(module) if not parent or not hasattr(parent, '__file__'): continue api_yaml_path = os.path.join(os.path.dirname(parent.__file__), 'api.yaml') if not os.path.exists(api_yaml_path): continue with open(api_yaml_path, 'r', encoding='utf-8') as f: proc_spec = yaml.safe_load(f) # Merge schemas proc_schemas = proc_spec.get('components', {}).get('schemas', {}) spec_dict['components']['schemas'].update(proc_schemas) # Inject processor_config_{name} into WatchBase if the schema is defined schema_key = f'processor_config_{proc_name}' if schema_key in proc_schemas: spec_dict['components']['schemas']['WatchBase']['properties'][schema_key] = { '$ref': f'#/components/schemas/{schema_key}' } # Append x-code-samples from processor paths into existing path operations for path, path_item in proc_spec.get('paths', {}).items(): if path not in spec_dict.get('paths', {}): continue for method, operation in path_item.items(): if method not in spec_dict['paths'][path]: continue if 'x-code-samples' in operation: existing = spec_dict['paths'][path][method].get('x-code-samples', []) spec_dict['paths'][path][method]['x-code-samples'] = existing + operation['x-code-samples'] except Exception as e: logger.warning(f"Failed to merge processor API specs: {e}") return spec_dict @functools.cache def get_openapi_spec(): """Lazy load OpenAPI spec and dependencies only when validation is needed.""" from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup return OpenAPI.from_dict(build_merged_spec_dict()) @functools.cache def get_openapi_schema_dict(): """ Get the raw OpenAPI spec dictionary for schema access. Used by Import endpoint to validate and convert query parameters. Returns the merged YAML dict (not the OpenAPI object). """ return build_merged_spec_dict() @functools.cache def _resolve_schema_properties(schema_name): """ Generic helper to resolve schema properties, including allOf inheritance. Args: schema_name: Name of the schema (e.g., 'WatchBase', 'Watch', 'Tag') Returns: dict: All properties including inherited ones from $ref schemas """ spec_dict = get_openapi_schema_dict() schema = spec_dict['components']['schemas'].get(schema_name, {}) properties = {} # Handle allOf (schema inheritance) if 'allOf' in schema: for item in schema['allOf']: # Resolve $ref to parent schema if '$ref' in item: ref_path = item['$ref'].split('/')[-1] ref_schema = spec_dict['components']['schemas'].get(ref_path, {}) properties.update(ref_schema.get('properties', {})) # Add schema-specific properties if 'properties' in item: properties.update(item['properties']) else: # Direct properties (no inheritance) properties = schema.get('properties', {}) return properties @functools.cache def get_watch_schema_properties(): """ Extract watch schema properties from OpenAPI spec for Import endpoint. Returns WatchBase properties (all writable Watch fields). """ return _resolve_schema_properties('WatchBase') # Import readonly field utilities from shared module (avoids circular dependencies with model layer) from changedetectionio.model.schema_utils import get_readonly_watch_fields, get_readonly_tag_fields @functools.cache def get_tag_schema_properties(): """ Extract Tag schema properties from OpenAPI spec. Returns WatchBase properties + Tag-specific properties (overrides_watch). """ return _resolve_schema_properties('Tag') def validate_openapi_request(operation_id): """Decorator to validate incoming requests against OpenAPI spec.""" def decorator(f): @functools.wraps(f) def wrapper(*args, **kwargs): from werkzeug.exceptions import BadRequest try: # Skip OpenAPI validation for GET requests since they don't have request bodies if request.method.upper() != 'GET': # Lazy import - only loaded when actually validating a request from openapi_core.contrib.flask import FlaskOpenAPIRequest from openapi_core.templating.paths.exceptions import ServerNotFound, PathNotFound, PathError spec = get_openapi_spec() openapi_request = FlaskOpenAPIRequest(request) result = spec.unmarshal_request(openapi_request) if result.errors: error_details = [] for error in result.errors: # Skip path/server validation errors for reverse proxy compatibility # Flask routing already validates that endpoints exist (returns 404 if not). # OpenAPI validation here is primarily for request body schema validation. # When behind nginx/reverse proxy, URLs may have path prefixes that don't # match the OpenAPI server definitions, causing false positives. if isinstance(error, PathError): logger.debug(f"API Call - Skipping path/server validation (delegated to Flask): {error}") continue error_str = str(error) # Extract detailed schema errors from __cause__ if hasattr(error, '__cause__') and hasattr(error.__cause__, 'schema_errors'): for schema_error in error.__cause__.schema_errors: field = '.'.join(str(p) for p in schema_error.path) if schema_error.path else 'body' msg = schema_error.message if hasattr(schema_error, 'message') else str(schema_error) error_details.append(f"{field}: {msg}") else: error_details.append(error_str) # Only raise if we have actual validation errors (not path/server issues) if error_details: logger.error(f"API Call - Validation failed: {'; '.join(error_details)}") raise BadRequest(f"Validation failed: {'; '.join(error_details)}") except BadRequest: # Re-raise BadRequest exceptions (validation failures) raise except Exception as e: # If OpenAPI spec loading fails, log but don't break existing functionality logger.critical(f"OpenAPI validation warning for {operation_id}: {e}") abort(500) return f(*args, **kwargs) return wrapper return decorator # Import all API resources from .Watch import Watch, WatchHistory, WatchSingleHistory, WatchHistoryDiff, CreateWatch, WatchFavicon from .Tags import Tags, Tag from .Import import Import from .SystemInfo import SystemInfo from .Spec import Spec from .Notifications import Notifications ================================================ FILE: changedetectionio/api/auth.py ================================================ from flask import request, make_response, jsonify from functools import wraps # Simple API auth key comparison # @todo - Maybe short lived token in the future? def check_token(f): @wraps(f) def decorated(*args, **kwargs): datastore = args[0].datastore config_api_token_enabled = datastore.data['settings']['application'].get('api_access_token_enabled') config_api_token = datastore.data['settings']['application'].get('api_access_token') # config_api_token_enabled - a UI option in settings if access should obey the key or not if config_api_token_enabled: if request.headers.get('x-api-key') != config_api_token: return make_response( jsonify("Invalid access - API key invalid."), 403 ) return f(*args, **kwargs) return decorated ================================================ FILE: changedetectionio/auth_decorator.py ================================================ import os from functools import wraps from flask import current_app, redirect, request from loguru import logger def login_optionally_required(func): """ If password authentication is enabled, verify the user is logged in. To be used as a decorator for routes that should optionally require login. This version is blueprint-friendly as it uses current_app instead of directly accessing app. """ @wraps(func) def decorated_view(*args, **kwargs): from flask import current_app import flask_login from flask_login import current_user # Access datastore through the app config datastore = current_app.config['DATASTORE'] has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False) # Permitted if request.endpoint and 'diff_history_page' in request.endpoint and datastore.data['settings']['application'].get('shared_diff_access'): return func(*args, **kwargs) elif request.method in flask_login.config.EXEMPT_METHODS: return func(*args, **kwargs) elif current_app.config.get('LOGIN_DISABLED'): return func(*args, **kwargs) elif has_password_enabled and not current_user.is_authenticated: return current_app.login_manager.unauthorized() return func(*args, **kwargs) return decorated_view ================================================ FILE: changedetectionio/blueprint/__init__.py ================================================ ================================================ FILE: changedetectionio/blueprint/backups/__init__.py ================================================ import datetime import glob import threading from flask import Blueprint, render_template, send_from_directory, flash, url_for, redirect, abort from flask_babel import gettext import os from changedetectionio.store import ChangeDetectionStore from changedetectionio.flask_app import login_optionally_required from loguru import logger BACKUP_FILENAME_FORMAT = "changedetection-backup-{}.zip" def create_backup(datastore_path, watches: dict, tags: dict = None): logger.debug("Creating backup...") import zipfile from pathlib import Path # create a ZipFile object timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") backupname = BACKUP_FILENAME_FORMAT.format(timestamp) backup_filepath = os.path.join(datastore_path, backupname) with zipfile.ZipFile(backup_filepath.replace('.zip', '.tmp'), "w", compression=zipfile.ZIP_DEFLATED, compresslevel=8) as zipObj: # Add the settings file (supports both formats) # New format: changedetection.json changedetection_json = os.path.join(datastore_path, "changedetection.json") if os.path.isfile(changedetection_json): zipObj.write(changedetection_json, arcname="changedetection.json") logger.debug("Added changedetection.json to backup") # Legacy format: url-watches.json (for backward compatibility) url_watches_json = os.path.join(datastore_path, "url-watches.json") if os.path.isfile(url_watches_json): zipObj.write(url_watches_json, arcname="url-watches.json") logger.debug("Added url-watches.json to backup") # Add tag data directories (each tag has its own {uuid}/tag.json) for uuid, tag in (tags or {}).items(): for f in Path(tag.data_dir).glob('*'): zipObj.write(f, arcname=os.path.join(f.parts[-2], f.parts[-1]), compress_type=zipfile.ZIP_DEFLATED, compresslevel=8) logger.debug(f"Added tag '{tag.get('title')}' ({uuid}) to backup") # Add any data in the watch data directory. for uuid, w in watches.items(): for f in Path(w.data_dir).glob('*'): zipObj.write(f, # Use the full path to access the file, but make the file 'relative' in the Zip. arcname=os.path.join(f.parts[-2], f.parts[-1]), compress_type=zipfile.ZIP_DEFLATED, compresslevel=8) # Create a list file with just the URLs, so it's easier to port somewhere else in the future list_file = "url-list.txt" with open(os.path.join(datastore_path, list_file), "w") as f: for uuid in watches: url = watches[uuid]["url"] f.write("{}\r\n".format(url)) list_with_tags_file = "url-list-with-tags.txt" with open( os.path.join(datastore_path, list_with_tags_file), "w" ) as f: for uuid in watches: url = watches[uuid].get('url') tag = watches[uuid].get('tags', {}) f.write("{} {}\r\n".format(url, tag)) # Add it to the Zip zipObj.write( os.path.join(datastore_path, list_file), arcname=list_file, compress_type=zipfile.ZIP_DEFLATED, compresslevel=8, ) zipObj.write( os.path.join(datastore_path, list_with_tags_file), arcname=list_with_tags_file, compress_type=zipfile.ZIP_DEFLATED, compresslevel=8, ) # Now it's done, rename it so it shows up finally and its completed being written. os.rename(backup_filepath.replace('.zip', '.tmp'), backup_filepath.replace('.tmp', '.zip')) def construct_blueprint(datastore: ChangeDetectionStore): from .restore import construct_restore_blueprint backups_blueprint = Blueprint('backups', __name__, template_folder="templates") backups_blueprint.register_blueprint(construct_restore_blueprint(datastore)) backup_threads = [] @login_optionally_required @backups_blueprint.route("/request-backup", methods=['GET']) def request_backup(): if any(thread.is_alive() for thread in backup_threads): flash(gettext("A backup is already running, check back in a few minutes"), "error") return redirect(url_for('backups.create')) if len(find_backups()) > int(os.getenv("MAX_NUMBER_BACKUPS", 100)): flash(gettext("Maximum number of backups reached, please remove some"), "error") return redirect(url_for('backups.create')) # With immediate persistence, all data is already saved zip_thread = threading.Thread( target=create_backup, args=(datastore.datastore_path, datastore.data.get("watching")), kwargs={'tags': datastore.data['settings']['application'].get('tags', {})}, daemon=True, name="BackupCreator" ) zip_thread.start() backup_threads.append(zip_thread) flash(gettext("Backup building in background, check back in a few minutes.")) return redirect(url_for('backups.create')) def find_backups(): backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*")) backups = glob.glob(backup_filepath) backup_info = [] for backup in backups: size = os.path.getsize(backup) / (1024 * 1024) creation_time = os.path.getctime(backup) backup_info.append({ 'filename': os.path.basename(backup), 'filesize': f"{size:.2f}", 'creation_time': creation_time }) backup_info.sort(key=lambda x: x['creation_time'], reverse=True) return backup_info @login_optionally_required @backups_blueprint.route("/download/", methods=['GET']) def download_backup(filename): import re filename = filename.strip() backup_filename_regex = BACKUP_FILENAME_FORMAT.format(r"\d+") # Resolve 'latest' before any validation so checks run against the real filename. if filename == 'latest': backups = find_backups() if not backups: abort(404) filename = backups[0]['filename'] if not re.match(r"^" + backup_filename_regex + "$", filename): abort(400) # Bad Request if the filename doesn't match the pattern full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename) if not full_path.startswith(os.path.abspath(datastore.datastore_path) + os.sep): abort(404) logger.debug(f"Backup download request for '{full_path}'") return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True) @login_optionally_required @backups_blueprint.route("/", methods=['GET']) @backups_blueprint.route("/create", methods=['GET']) def create(): backups = find_backups() output = render_template("backup_create.html", available_backups=backups, backup_running=any(thread.is_alive() for thread in backup_threads) ) return output @login_optionally_required @backups_blueprint.route("/remove-backups", methods=['GET']) def remove_backups(): backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*")) backups = glob.glob(backup_filepath) for backup in backups: os.unlink(backup) flash(gettext("Backups were deleted.")) return redirect(url_for('backups.create')) return backups_blueprint ================================================ FILE: changedetectionio/blueprint/backups/restore.py ================================================ import io import json import os import re import shutil import tempfile import threading import zipfile from flask import Blueprint, render_template, flash, url_for, redirect, request from flask_babel import gettext, lazy_gettext as _l from wtforms import Form, BooleanField, SubmitField from flask_wtf.file import FileField, FileAllowed from loguru import logger from changedetectionio.flask_app import login_optionally_required # Maximum size of the uploaded zip file. Override via env var MAX_RESTORE_UPLOAD_MB. _MAX_UPLOAD_BYTES = int(os.getenv("MAX_RESTORE_UPLOAD_MB", 256)) * 1024 * 1024 # Maximum total uncompressed size of all entries (zip-bomb guard). Override via MAX_RESTORE_DECOMPRESSED_MB. _MAX_DECOMPRESSED_BYTES = int(os.getenv("MAX_RESTORE_DECOMPRESSED_MB", 1024)) * 1024 * 1024 # Only top-level directories whose name is a valid UUID are treated as watch/tag entries. _UUID_RE = re.compile( r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$', re.IGNORECASE, ) class RestoreForm(Form): zip_file = FileField(_l('Backup zip file'), validators=[ FileAllowed(['zip'], _l('Must be a .zip backup file!')) ]) include_groups = BooleanField(_l('Include groups'), default=True) include_groups_replace_existing = BooleanField(_l('Replace existing groups of the same UUID'), default=True) include_watches = BooleanField(_l('Include watches'), default=True) include_watches_replace_existing = BooleanField(_l('Replace existing watches of the same UUID'), default=True) submit = SubmitField(_l('Restore backup')) def import_from_zip(zip_stream, datastore, include_groups, include_groups_replace, include_watches, include_watches_replace): """ Extract and import watches and groups from a backup zip stream. Mirrors the store's _load_watches / _load_tags loading pattern: - UUID dirs with tag.json → Tag.model + tag_obj.commit() - UUID dirs with watch.json → rehydrate_entity + watch_obj.commit() Returns a dict with counts: restored_groups, skipped_groups, restored_watches, skipped_watches. Raises zipfile.BadZipFile if the stream is not a valid zip. """ from changedetectionio.model import Tag restored_groups = 0 skipped_groups = 0 restored_watches = 0 skipped_watches = 0 current_tags = datastore.data['settings']['application'].get('tags', {}) current_watches = datastore.data['watching'] with tempfile.TemporaryDirectory() as tmpdir: logger.debug(f"Restore: extracting zip to {tmpdir}") with zipfile.ZipFile(zip_stream, 'r') as zf: total_uncompressed = sum(m.file_size for m in zf.infolist()) if total_uncompressed > _MAX_DECOMPRESSED_BYTES: raise ValueError( f"Backup archive decompressed size ({total_uncompressed // (1024 * 1024)} MB) " f"exceeds the {_MAX_DECOMPRESSED_BYTES // (1024 * 1024)} MB limit" ) resolved_dest = os.path.realpath(tmpdir) for member in zf.infolist(): member_dest = os.path.realpath(os.path.join(resolved_dest, member.filename)) if not member_dest.startswith(resolved_dest + os.sep) and member_dest != resolved_dest: raise ValueError(f"Zip Slip path traversal detected in backup archive: {member.filename!r}") zf.extract(member, tmpdir) logger.debug("Restore: zip extracted, scanning UUID directories") for entry in os.scandir(tmpdir): if not entry.is_dir(): continue uuid = entry.name if not _UUID_RE.match(uuid): logger.warning(f"Restore: skipping non-UUID directory {uuid!r}") continue tag_json_path = os.path.join(entry.path, 'tag.json') watch_json_path = os.path.join(entry.path, 'watch.json') # --- Tags (groups) --- if include_groups and os.path.exists(tag_json_path): if uuid in current_tags and not include_groups_replace: logger.debug(f"Restore: skipping existing group {uuid} (replace not requested)") skipped_groups += 1 continue try: with open(tag_json_path, 'r', encoding='utf-8') as f: tag_data = json.load(f) except (json.JSONDecodeError, IOError) as e: logger.error(f"Restore: failed to read tag.json for {uuid}: {e}") continue title = tag_data.get('title', uuid) logger.debug(f"Restore: importing group '{title}' ({uuid})") # Mirror _load_tags: set uuid and force processor tag_data['uuid'] = uuid tag_data['processor'] = 'restock_diff' # Copy the UUID directory so data_dir exists for commit() dst_dir = os.path.join(datastore.datastore_path, uuid) if os.path.exists(dst_dir): shutil.rmtree(dst_dir) shutil.copytree(entry.path, dst_dir) tag_obj = Tag.model( datastore_path=datastore.datastore_path, __datastore=datastore.data, default=tag_data ) current_tags[uuid] = tag_obj tag_obj.commit() restored_groups += 1 logger.success(f"Restore: group '{title}' ({uuid}) restored") # --- Watches --- elif include_watches and os.path.exists(watch_json_path): if uuid in current_watches and not include_watches_replace: logger.debug(f"Restore: skipping existing watch {uuid} (replace not requested)") skipped_watches += 1 continue try: with open(watch_json_path, 'r', encoding='utf-8') as f: watch_data = json.load(f) except (json.JSONDecodeError, IOError) as e: logger.error(f"Restore: failed to read watch.json for {uuid}: {e}") continue url = watch_data.get('url', uuid) logger.debug(f"Restore: importing watch '{url}' ({uuid})") # Copy UUID directory first so data_dir and history files exist dst_dir = os.path.join(datastore.datastore_path, uuid) if os.path.exists(dst_dir): shutil.rmtree(dst_dir) shutil.copytree(entry.path, dst_dir) # Mirror _load_watches / rehydrate_entity watch_data['uuid'] = uuid watch_obj = datastore.rehydrate_entity(uuid, watch_data) current_watches[uuid] = watch_obj watch_obj.commit() restored_watches += 1 logger.success(f"Restore: watch '{url}' ({uuid}) restored") logger.debug(f"Restore: scan complete - groups {restored_groups} restored / {skipped_groups} skipped, " f"watches {restored_watches} restored / {skipped_watches} skipped") # Persist changedetection.json (includes the updated tags dict) logger.debug("Restore: committing datastore settings") datastore.commit() return { 'restored_groups': restored_groups, 'skipped_groups': skipped_groups, 'restored_watches': restored_watches, 'skipped_watches': skipped_watches, } def construct_restore_blueprint(datastore): restore_blueprint = Blueprint('restore', __name__, template_folder="templates") restore_threads = [] @login_optionally_required @restore_blueprint.route("/restore", methods=['GET']) def restore(): form = RestoreForm() return render_template("backup_restore.html", form=form, restore_running=any(t.is_alive() for t in restore_threads), max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024), max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024)) @login_optionally_required @restore_blueprint.route("/restore/start", methods=['POST']) def backups_restore_start(): if any(t.is_alive() for t in restore_threads): flash(gettext("A restore is already running, check back in a few minutes"), "error") return redirect(url_for('backups.restore.restore')) zip_file = request.files.get('zip_file') if not zip_file or not zip_file.filename: flash(gettext("No file uploaded"), "error") return redirect(url_for('backups.restore.restore')) if not zip_file.filename.lower().endswith('.zip'): flash(gettext("File must be a .zip backup file"), "error") return redirect(url_for('backups.restore.restore')) # Reject oversized uploads before reading the stream into memory. content_length = request.content_length if content_length and content_length > _MAX_UPLOAD_BYTES: flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error") return redirect(url_for('backups.restore.restore')) # Read into memory now — the request stream is gone once we return. # Read one byte beyond the limit so we can detect truncated-but-still-oversized streams. try: raw = zip_file.read(_MAX_UPLOAD_BYTES + 1) if len(raw) > _MAX_UPLOAD_BYTES: flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error") return redirect(url_for('backups.restore.restore')) zip_bytes = io.BytesIO(raw) with zipfile.ZipFile(zip_bytes): # quick validity check before spawning pass zip_bytes.seek(0) except zipfile.BadZipFile: flash(gettext("Invalid or corrupted zip file"), "error") return redirect(url_for('backups.restore.restore')) include_groups = request.form.get('include_groups') == 'y' include_groups_replace = request.form.get('include_groups_replace_existing') == 'y' include_watches = request.form.get('include_watches') == 'y' include_watches_replace = request.form.get('include_watches_replace_existing') == 'y' restore_thread = threading.Thread( target=import_from_zip, kwargs={ 'zip_stream': zip_bytes, 'datastore': datastore, 'include_groups': include_groups, 'include_groups_replace': include_groups_replace, 'include_watches': include_watches, 'include_watches_replace': include_watches_replace, }, daemon=True, name="BackupRestore" ) restore_thread.start() restore_threads[:] = [t for t in restore_threads if t.is_alive()] restore_threads.append(restore_thread) flash(gettext("Restore started in background, check back in a few minutes.")) return redirect(url_for('backups.restore.restore')) return restore_blueprint ================================================ FILE: changedetectionio/blueprint/backups/templates/backup_create.html ================================================ {% extends 'base.html' %} {% block content %} {% from '_helpers.html' import render_simple_field, render_field %}
{% if backup_running %}

 {{ _('A backup is running!') }}

{% endif %}

{{ _('Here you can download and request a new backup, when a backup is completed you will see it listed below.') }}


{% if available_backups %} {% else %}

{{ _('No backups found.') }}

{% endif %} {{ _('Create backup') }} {% if available_backups %} {{ _('Remove backups') }} {% endif %}
{% endblock %} ================================================ FILE: changedetectionio/blueprint/backups/templates/backup_restore.html ================================================ {% extends 'base.html' %} {% block content %} {% from '_helpers.html' import render_field, render_checkbox_field %}
{% if restore_running %}

 {{ _('A restore is running!') }}

{% endif %}

{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}

{{ _('Note: This does not override the main application settings, only watches and groups.') }}

{{ _('Max upload size: %(upload)s MB  ·  Max decompressed size: %(decomp)s MB', upload=max_upload_mb, decomp=max_decompressed_mb) }}

{{ render_checkbox_field(form.include_groups) }} {{ _('Include all groups found in backup?') }}
{{ render_checkbox_field(form.include_groups_replace_existing) }} {{ _('Replace any existing groups of the same UUID?') }}
{{ render_checkbox_field(form.include_watches) }} {{ _('Include all watches found in backup?') }}
{{ render_checkbox_field(form.include_watches_replace_existing) }} {{ _('Replace any existing watches of the same UUID?') }}
{{ render_field(form.zip_file) }}
{% endblock %} ================================================ FILE: changedetectionio/blueprint/browser_steps/TODO.txt ================================================ - This needs an abstraction to directly handle the puppeteer connection methods - Then remove the playwright stuff - Remove hack redirect at line 65 changedetectionio/processors/__init__.py The screenshots are base64 encoded/decoded which is very CPU intensive for large screenshots (in playwright) but not in the direct puppeteer connection (they are binary end to end) ================================================ FILE: changedetectionio/blueprint/browser_steps/__init__.py ================================================ # HORRIBLE HACK BUT WORKS :-) PR anyone? # # Why? # `browsersteps_playwright_browser_interface.chromium.connect_over_cdp()` will only run once without async() # - this flask app is not async() # - A single timeout/keepalive which applies to the session made at .connect_over_cdp() # # So it means that we must unfortunately for now just keep a single timer since .connect_over_cdp() was run # and know when that reaches timeout/keepalive :( when that time is up, restart the connection and tell the user # that their time is up, insert another coin. (reload) # # from changedetectionio.strtobool import strtobool from flask import Blueprint, request, make_response import os from changedetectionio.store import ChangeDetectionStore from changedetectionio.flask_app import login_optionally_required from loguru import logger browsersteps_sessions = {} browsersteps_watch_to_session = {} # Maps watch_uuid -> browsersteps_session_id io_interface_context = None import json import hashlib from flask import Response import asyncio import threading import time # Dedicated event loop for ALL browser steps sessions _browser_steps_loop = None _browser_steps_thread = None _browser_steps_loop_lock = threading.Lock() def _start_browser_steps_loop(): """Start a dedicated event loop for browser steps in its own thread""" global _browser_steps_loop # Create and set the event loop for this thread loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) _browser_steps_loop = loop logger.debug("Browser steps event loop started") try: # Run the loop forever - handles all browsersteps sessions loop.run_forever() except Exception as e: logger.error(f"Browser steps event loop error: {e}") finally: try: # Cancel all remaining tasks pending = asyncio.all_tasks(loop) for task in pending: task.cancel() # Wait for tasks to finish cancellation if pending: loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True)) except Exception as e: logger.debug(f"Error during browser steps loop cleanup: {e}") finally: loop.close() logger.debug("Browser steps event loop closed") def _ensure_browser_steps_loop(): """Ensure the browser steps event loop is running""" global _browser_steps_loop, _browser_steps_thread with _browser_steps_loop_lock: if _browser_steps_thread is None or not _browser_steps_thread.is_alive(): logger.debug("Starting browser steps event loop thread") _browser_steps_thread = threading.Thread( target=_start_browser_steps_loop, daemon=True, name="BrowserStepsEventLoop" ) _browser_steps_thread.start() # Wait for the loop to be ready timeout = 5.0 start_time = time.time() while _browser_steps_loop is None: if time.time() - start_time > timeout: raise RuntimeError("Browser steps event loop failed to start") time.sleep(0.01) logger.debug("Browser steps event loop thread started and ready") def run_async_in_browser_loop(coro): """Run async coroutine using the dedicated browser steps event loop""" _ensure_browser_steps_loop() if _browser_steps_loop and not _browser_steps_loop.is_closed(): logger.debug("Browser steps using dedicated event loop") future = asyncio.run_coroutine_threadsafe(coro, _browser_steps_loop) return future.result() else: raise RuntimeError("Browser steps event loop is not available") async def _close_session_resources(session_data, label=''): """Close all browser resources for a session in the correct order. browserstepper.cleanup() closes page+context but not the browser itself. For CloakBrowser, browser.close() is what stops the local Chromium process via pw.stop(). For the default CDP path, playwright_context.stop() shuts down the playwright instance. """ browserstepper = session_data.get('browserstepper') if browserstepper: try: await browserstepper.cleanup() except Exception as e: logger.error(f"Error cleaning up browserstepper{label}: {e}") browser = session_data.get('browser') if browser: try: await asyncio.wait_for(browser.close(), timeout=5.0) except Exception as e: logger.warning(f"Error closing browser{label}: {e}") playwright_context = session_data.get('playwright_context') if playwright_context: try: await playwright_context.stop() except Exception as e: logger.warning(f"Error stopping playwright context{label}: {e}") def cleanup_expired_sessions(): """Remove expired browsersteps sessions and cleanup their resources""" global browsersteps_sessions, browsersteps_watch_to_session expired_session_ids = [] # Find expired sessions for session_id, session_data in browsersteps_sessions.items(): browserstepper = session_data.get('browserstepper') if browserstepper and browserstepper.has_expired: expired_session_ids.append(session_id) # Cleanup expired sessions for session_id in expired_session_ids: logger.debug(f"Cleaning up expired browsersteps session {session_id}") session_data = browsersteps_sessions[session_id] try: run_async_in_browser_loop(_close_session_resources(session_data, label=f" for session {session_id}")) except Exception as e: logger.error(f"Error cleaning up session {session_id}: {e}") # Remove from sessions dict del browsersteps_sessions[session_id] # Remove from watch mapping for watch_uuid, mapped_session_id in list(browsersteps_watch_to_session.items()): if mapped_session_id == session_id: del browsersteps_watch_to_session[watch_uuid] break if expired_session_ids: logger.info(f"Cleaned up {len(expired_session_ids)} expired browsersteps session(s)") def cleanup_session_for_watch(watch_uuid): """Cleanup a specific browsersteps session for a watch UUID""" global browsersteps_sessions, browsersteps_watch_to_session session_id = browsersteps_watch_to_session.get(watch_uuid) if not session_id: logger.debug(f"No browsersteps session found for watch {watch_uuid}") return logger.debug(f"Cleaning up browsersteps session {session_id} for watch {watch_uuid}") session_data = browsersteps_sessions.get(session_id) if session_data: try: run_async_in_browser_loop(_close_session_resources(session_data, label=f" for watch {watch_uuid}")) except Exception as e: logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}") # Remove from sessions dict del browsersteps_sessions[session_id] # Remove from watch mapping del browsersteps_watch_to_session[watch_uuid] logger.debug(f"Cleaned up session for watch {watch_uuid}") # Opportunistically cleanup any other expired sessions cleanup_expired_sessions() def construct_blueprint(datastore: ChangeDetectionStore): browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates") async def start_browsersteps_session(watch_uuid): from changedetectionio.browser_steps import browser_steps import time from playwright.async_api import async_playwright keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60 keepalive_ms = ((keepalive_seconds + 3) * 1000) browsersteps_start_session = {'start_time': time.time()} # Build proxy dict first — needed by both the CDP path and fetcher-specific launchers proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid) proxy = None if proxy_id: proxy_url = datastore.proxy_list.get(proxy_id, {}).get('url') if proxy_url: from urllib.parse import urlparse parsed = urlparse(proxy_url) proxy = {'server': proxy_url} if parsed.username: proxy['username'] = parsed.username if parsed.password: proxy['password'] = parsed.password logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}") # Resolve the fetcher class for this watch so we can ask it to launch its own browser # if it supports that (e.g. CloakBrowser, which runs locally rather than via CDP) watch = datastore.data['watching'][watch_uuid] from changedetectionio import content_fetchers fetcher_name = watch.get_fetch_backend or 'system' if fetcher_name == 'system': fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests') fetcher_class = getattr(content_fetchers, fetcher_name, None) browser = None playwright_context = None # If the fetcher has its own browser launch for the live steps UI, use it. # get_browsersteps_browser(proxy, keepalive_ms) returns (browser, playwright_context_or_None) # or None to fall back to the default CDP path. if fetcher_class and hasattr(fetcher_class, 'get_browsersteps_browser'): result = await fetcher_class.get_browsersteps_browser(proxy=proxy, keepalive_ms=keepalive_ms) if result is not None: browser, playwright_context = result logger.debug(f"Browser Steps: using fetcher-specific browser for '{fetcher_name}'") # Default: connect to the remote Playwright/sockpuppetbrowser via CDP if browser is None: playwright_instance = async_playwright() playwright_context = await playwright_instance.start() base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"') a = "?" if '?' not in base_url else '&' base_url += a + f"timeout={keepalive_ms}" browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms) logger.debug(f"Browser Steps: using CDP connection to {base_url}") browsersteps_start_session['browser'] = browser browsersteps_start_session['playwright_context'] = playwright_context browserstepper = browser_steps.browsersteps_live_ui( playwright_browser=browser, proxy=proxy, start_url=watch.link, headers=watch.get('headers') ) await browserstepper.connect(proxy=proxy) browsersteps_start_session['browserstepper'] = browserstepper return browsersteps_start_session @login_optionally_required @browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET']) def browsersteps_start_session(): # A new session was requested, return sessionID import uuid browsersteps_session_id = str(uuid.uuid4()) watch_uuid = request.args.get('uuid') if not watch_uuid: return make_response('No Watch UUID specified', 500) # Cleanup any existing session for this watch cleanup_session_for_watch(watch_uuid) logger.debug("Starting connection with playwright") logger.debug("browser_steps.py connecting") try: # Run the async function in the dedicated browser steps event loop browsersteps_sessions[browsersteps_session_id] = run_async_in_browser_loop( start_browsersteps_session(watch_uuid) ) # Store the mapping of watch_uuid -> browsersteps_session_id browsersteps_watch_to_session[watch_uuid] = browsersteps_session_id except Exception as e: if 'ECONNREFUSED' in str(e): return make_response('Unable to start the Playwright Browser session, is sockpuppetbrowser running? Network configuration is OK?', 401) else: # Other errors, bad URL syntax, bad reply etc return make_response(str(e), 401) logger.debug("Starting connection with playwright - done") return {'browsersteps_session_id': browsersteps_session_id} @login_optionally_required @browser_steps_blueprint.route("/browsersteps_image", methods=['GET']) def browser_steps_fetch_screenshot_image(): from flask import ( make_response, request, send_from_directory, ) uuid = request.args.get('uuid') step_n = int(request.args.get('step_n')) watch = datastore.data['watching'].get(uuid) filename = f"step_before-{step_n}.jpeg" if request.args.get('type', '') == 'before' else f"step_{step_n}.jpeg" if step_n and watch and os.path.isfile(os.path.join(watch.data_dir, filename)): response = make_response(send_from_directory(directory=watch.data_dir, path=filename)) response.headers['Content-type'] = 'image/jpeg' response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' response.headers['Pragma'] = 'no-cache' response.headers['Expires'] = 0 return response else: return make_response('Unable to fetch image, is the URL correct? does the watch exist? does the step_type-n.jpeg exist?', 401) # A request for an action was received @login_optionally_required @browser_steps_blueprint.route("/browsersteps_update", methods=['POST']) def browsersteps_ui_update(): import base64 remaining = 0 uuid = request.args.get('uuid') goto_website_url_first_step = request.args.get('goto_website_url_first_step') browsersteps_session_id = request.args.get('browsersteps_session_id') if not browsersteps_session_id: return make_response('No browsersteps_session_id specified', 500) if not browsersteps_sessions.get(browsersteps_session_id): return make_response('No session exists under that ID', 500) is_last_step = False # @todo - should always be an existing session if goto_website_url_first_step: logger.debug("Going to site (requested automatically before stepping)..") step_operation = "Goto site" step_selector = None step_optional_value = None else: step_operation = request.form.get('operation') step_selector = request.form.get('selector') step_optional_value = request.form.get('optional_value') is_last_step = strtobool(request.form.get('is_last_step')) try: # Run the async call_action method in the dedicated browser steps event loop run_async_in_browser_loop( browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action( action_name=step_operation, selector=step_selector, optional_value=step_optional_value ) ) except Exception as e: logger.error(f"Exception when calling step operation {step_operation} {str(e)}") # Try to find something of value to give back to the user return make_response(str(e).splitlines()[0], 401) # Screenshots and other info only needed on requesting a step (POST) try: # Run the async get_current_state method in the dedicated browser steps event loop (screenshot, xpath_data) = run_async_in_browser_loop( browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state() ) if is_last_step: watch = datastore.data['watching'].get(uuid) u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url if watch and u: watch.save_screenshot(screenshot=screenshot) watch.save_xpath_data(data=xpath_data) except Exception as e: return make_response(f"Error fetching screenshot and element data - {str(e)}", 401) # SEND THIS BACK TO THE BROWSER output = { "screenshot": f"data:image/jpeg;base64,{base64.b64encode(screenshot).decode('ascii')}", "xpath_data": xpath_data, "session_age_start": browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start, "browser_time_remaining": round(remaining) } json_data = json.dumps(output) # Generate an ETag (hash of the response body) etag_hash = hashlib.md5(json_data.encode('utf-8')).hexdigest() # Create the response with ETag response = Response(json_data, mimetype="application/json; charset=UTF-8") response.set_etag(etag_hash) return response return browser_steps_blueprint ================================================ FILE: changedetectionio/blueprint/check_proxies/__init__.py ================================================ import importlib from concurrent.futures import ThreadPoolExecutor from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse from changedetectionio.store import ChangeDetectionStore from functools import wraps from flask import Blueprint from flask_login import login_required STATUS_CHECKING = 0 STATUS_FAILED = 1 STATUS_OK = 2 THREADPOOL_MAX_WORKERS = 3 _DEFAULT_POOL = ThreadPoolExecutor(max_workers=THREADPOOL_MAX_WORKERS) # Maybe use fetch-time if its >5 to show some expected load time? def threadpool(f, executor=None): @wraps(f) def wrap(*args, **kwargs): return (executor or _DEFAULT_POOL).submit(f, *args, **kwargs) return wrap def construct_blueprint(datastore: ChangeDetectionStore): check_proxies_blueprint = Blueprint('check_proxies', __name__) checks_in_progress = {} @threadpool def long_task(uuid, preferred_proxy): import time from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions from changedetectionio.jinja2_custom import render as jinja_render status = {'status': '', 'length': 0, 'text': ''} contents = '' now = time.time() try: processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor") update_handler = processor_module.perform_site_check(datastore=datastore, watch_uuid=uuid ) update_handler.call_browser(preferred_proxy_id=preferred_proxy) # title, size is len contents not len xfer except content_fetcher_exceptions.Non200ErrorCodeReceived as e: if e.status_code == 404: status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but 404 (page not found)"}) elif e.status_code == 403 or e.status_code == 401: status.update({'status': 'ERROR', 'length': len(contents), 'text': f"{e.status_code} - Access denied"}) else: status.update({'status': 'ERROR', 'length': len(contents), 'text': f"Status code: {e.status_code}"}) except FilterNotFoundInResponse: status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but CSS/xPath filter not found (page changed layout?)"}) except content_fetcher_exceptions.EmptyReply as e: if e.status_code == 403 or e.status_code == 401: status.update({'status': 'ERROR OTHER', 'length': len(contents), 'text': f"Got empty reply with code {e.status_code} - Access denied"}) else: status.update({'status': 'ERROR OTHER', 'length': len(contents) if contents else 0, 'text': f"Empty reply with code {e.status_code}, needs chrome?"}) except content_fetcher_exceptions.ReplyWithContentButNoText as e: txt = f"Got reply but with no content - Status code {e.status_code} - It's possible that the filters were found, but contained no usable text (or contained only an image)." status.update({'status': 'ERROR', 'text': txt}) except Exception as e: status.update({'status': 'ERROR OTHER', 'length': len(contents) if contents else 0, 'text': 'Error: '+type(e).__name__+str(e)}) else: status.update({'status': 'OK', 'length': len(contents), 'text': ''}) if status.get('text'): # parse 'text' as text for safety v = {'text': status['text']} status['text'] = jinja_render(template_str='{{text|e}}', **v) status['time'] = "{:.2f}s".format(time.time() - now) return status def _recalc_check_status(uuid): results = {} for k, v in checks_in_progress.get(uuid, {}).items(): try: r_1 = v.result(timeout=0.05) except Exception as e: # If timeout error? results[k] = {'status': 'RUNNING'} else: results[k] = r_1 return results @login_required @check_proxies_blueprint.route("//status", methods=['GET']) def get_recheck_status(uuid): results = _recalc_check_status(uuid=uuid) return results @login_required @check_proxies_blueprint.route("//start", methods=['GET']) def start_check(uuid): if not datastore.proxy_list: return if checks_in_progress.get(uuid): state = _recalc_check_status(uuid=uuid) for proxy_key, v in state.items(): if v.get('status') == 'RUNNING': return state else: checks_in_progress[uuid] = {} for k, v in datastore.proxy_list.items(): if not checks_in_progress[uuid].get(k): checks_in_progress[uuid][k] = long_task(uuid=uuid, preferred_proxy=k) results = _recalc_check_status(uuid=uuid) return results return check_proxies_blueprint ================================================ FILE: changedetectionio/blueprint/imports/__init__.py ================================================ from flask import Blueprint, request, redirect, url_for, flash, render_template from loguru import logger from changedetectionio.store import ChangeDetectionStore from changedetectionio.auth_decorator import login_optionally_required def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData): import_blueprint = Blueprint('imports', __name__, template_folder="templates") @import_blueprint.route("/import", methods=['GET', 'POST']) @login_optionally_required def import_page(): remaining_urls = [] from changedetectionio import forms # if request.method == 'POST': # from changedetectionio import worker_pool from changedetectionio.blueprint.imports.importer import ( import_url_list, import_distill_io_json, import_xlsx_wachete, import_xlsx_custom ) # URL List import if request.values.get('urls') and len(request.values.get('urls').strip()): # Import and push into the queue for immediate update check from changedetectionio import processors importer_handler = import_url_list() importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', processors.get_default_processor())) logger.debug(f"Imported {len(importer_handler.new_uuids)} new UUIDs") # Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue # for uuid in importer_handler.new_uuids: # worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) if len(importer_handler.remaining_data) == 0: return redirect(url_for('watchlist.index')) else: remaining_urls = importer_handler.remaining_data # Distill.io import if request.values.get('distill-io') and len(request.values.get('distill-io').strip()): # Import and push into the queue for immediate update check d_importer = import_distill_io_json() d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore) # Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue # for uuid in importer_handler.new_uuids: # worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) # XLSX importer if request.files and request.files.get('xlsx_file'): file = request.files['xlsx_file'] if request.values.get('file_mapping') == 'wachete': w_importer = import_xlsx_wachete() w_importer.run(data=file, flash=flash, datastore=datastore) else: w_importer = import_xlsx_custom() # Building mapping of col # to col # type map = {} for i in range(10): c = request.values.get(f"custom_xlsx[col_{i}]") v = request.values.get(f"custom_xlsx[col_type_{i}]") if c and v: map[int(c)] = v w_importer.import_profile = map w_importer.run(data=file, flash=flash, datastore=datastore) # Dont' add to queue because scheduler can see that they haven't been checked and will add them to the queue # for uuid in importer_handler.new_uuids: # worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) # Could be some remaining, or we could be on GET form = forms.importForm(formdata=request.form if request.method == 'POST' else None) output = render_template("import.html", form=form, import_url_list_remaining="\n".join(remaining_urls), original_distill_json='' ) return output return import_blueprint ================================================ FILE: changedetectionio/blueprint/imports/importer.py ================================================ from abc import abstractmethod import time from wtforms import ValidationError from loguru import logger from flask_babel import gettext from changedetectionio.forms import validate_url class Importer(): remaining_data = [] new_uuids = [] good = 0 def __init__(self): self.new_uuids = [] self.good = 0 self.remaining_data = [] self.import_profile = None @abstractmethod def run(self, data, flash, datastore): pass class import_url_list(Importer): """ Imports a list, can be in https://example.com tag1, tag2, last tag format """ def run(self, data, flash, datastore, processor=None ): urls = data.split("\n") good = 0 now = time.time() if (len(urls) > 5000): flash(gettext("Importing 5,000 of the first URLs from your list, the rest can be imported again.")) for url in urls: url = url.strip() if not len(url): continue tags = "" # 'tags' should be a csv list after the URL if ' ' in url: url, tags = url.split(" ", 1) # Flask wtform validators wont work with basic auth, use validators package # Up to 5000 per batch so we dont flood the server # @todo validators.url will fail when you add your own IP etc if len(url) and 'http' in url.lower() and good < 5000: extras = None if processor: extras = {'processor': processor} new_uuid = datastore.add_watch(url=url.strip(), tag=tags, save_immediately=False, extras=extras) if new_uuid: # Straight into the queue. self.new_uuids.append(new_uuid) good += 1 continue # Worked past the 'continue' above, append it to the bad list if self.remaining_data is None: self.remaining_data = [] self.remaining_data.append(url) flash(gettext("{} Imported from list in {:.2f}s, {} Skipped.").format(good, time.time() - now, len(self.remaining_data))) class import_distill_io_json(Importer): def run(self, data, flash, datastore, ): import json good = 0 now = time.time() self.new_uuids=[] # @todo Use JSONSchema like in the API to validate here. try: data = json.loads(data.strip()) except json.decoder.JSONDecodeError: flash(gettext("Unable to read JSON file, was it broken?"), 'error') return if not data.get('data'): flash(gettext("JSON structure looks invalid, was it broken?"), 'error') return for d in data.get('data'): d_config = json.loads(d['config']) extras = {'title': d.get('name', None)} if len(d['uri']) and good < 5000: try: # @todo we only support CSS ones at the moment if d_config['selections'][0]['frames'][0]['excludes'][0]['type'] == 'css': extras['subtractive_selectors'] = d_config['selections'][0]['frames'][0]['excludes'][0]['expr'] except KeyError: pass except IndexError: pass extras['include_filters'] = [] try: if d_config['selections'][0]['frames'][0]['includes'][0]['type'] == 'xpath': extras['include_filters'].append('xpath:' + d_config['selections'][0]['frames'][0]['includes'][0]['expr']) else: extras['include_filters'].append(d_config['selections'][0]['frames'][0]['includes'][0]['expr']) except KeyError: pass except IndexError: pass new_uuid = datastore.add_watch(url=d['uri'].strip(), tag=",".join(d.get('tags', [])), extras=extras, save_immediately=False) if new_uuid: # Straight into the queue. self.new_uuids.append(new_uuid) good += 1 flash(gettext("{} Imported from Distill.io in {:.2f}s, {} Skipped.").format(len(self.new_uuids), time.time() - now, len(self.remaining_data))) class import_xlsx_wachete(Importer): def run(self, data, flash, datastore, ): good = 0 now = time.time() self.new_uuids = [] from openpyxl import load_workbook try: wb = load_workbook(data) except Exception as e: # @todo correct except flash(gettext("Unable to read export XLSX file, something wrong with the file?"), 'error') return row_id = 2 for row in wb.active.iter_rows(min_row=row_id): try: extras = {} data = {} for cell in row: if not cell.value: continue column_title = wb.active.cell(row=1, column=cell.column).value.strip().lower() data[column_title] = cell.value # Forced switch to webdriver/playwright/etc dynamic_wachet = str(data.get('dynamic wachet', '')).strip().lower() # Convert bool to str to cover all cases # libreoffice and others can have it as =FALSE() =TRUE(), or bool(true) if 'true' in dynamic_wachet or dynamic_wachet == '1': extras['fetch_backend'] = 'html_webdriver' elif 'false' in dynamic_wachet or dynamic_wachet == '0': extras['fetch_backend'] = 'html_requests' if data.get('xpath'): # @todo split by || ? extras['include_filters'] = [data.get('xpath')] if data.get('name'): extras['title'] = data.get('name').strip() if data.get('interval (min)'): minutes = int(data.get('interval (min)')) hours, minutes = divmod(minutes, 60) days, hours = divmod(hours, 24) weeks, days = divmod(days, 7) extras['time_between_check'] = {'weeks': weeks, 'days': days, 'hours': hours, 'minutes': minutes, 'seconds': 0} # At minimum a URL is required. if data.get('url'): try: validate_url(data.get('url')) except ValidationError as e: logger.error(f">> Import URL error {data.get('url')} {str(e)}") flash(gettext("Error processing row number {}, URL value was incorrect, row was skipped.").format(row_id), 'error') # Don't bother processing anything else on this row continue new_uuid = datastore.add_watch(url=data['url'].strip(), extras=extras, tag=data.get('folder'), save_immediately=False) if new_uuid: # Straight into the queue. self.new_uuids.append(new_uuid) good += 1 except Exception as e: logger.error(e) flash(gettext("Error processing row number {}, check all cell data types are correct, row was skipped.").format(row_id), 'error') else: row_id += 1 flash(gettext("{} imported from Wachete .xlsx in {:.2f}s").format(len(self.new_uuids), time.time() - now)) class import_xlsx_custom(Importer): def run(self, data, flash, datastore, ): good = 0 now = time.time() self.new_uuids = [] from openpyxl import load_workbook try: wb = load_workbook(data) except Exception as e: # @todo correct except flash(gettext("Unable to read export XLSX file, something wrong with the file?"), 'error') return # @todo cehck atleast 2 rows, same in other method from changedetectionio.forms import validate_url row_i = 1 try: for row in wb.active.iter_rows(): url = None tags = None extras = {} for cell in row: if not self.import_profile.get(cell.col_idx): continue if not cell.value: continue cell_map = self.import_profile.get(cell.col_idx) cell_val = str(cell.value).strip() # could be bool if cell_map == 'url': url = cell.value.strip() try: validate_url(url) except ValidationError as e: logger.error(f">> Import URL error {url} {str(e)}") flash(gettext("Error processing row number {}, URL value was incorrect, row was skipped.").format(row_i), 'error') # Don't bother processing anything else on this row url = None break elif cell_map == 'tag': tags = cell.value.strip() elif cell_map == 'include_filters': # @todo validate? extras['include_filters'] = [cell.value.strip()] elif cell_map == 'interval_minutes': hours, minutes = divmod(int(cell_val), 60) days, hours = divmod(hours, 24) weeks, days = divmod(days, 7) extras['time_between_check'] = {'weeks': weeks, 'days': days, 'hours': hours, 'minutes': minutes, 'seconds': 0} else: extras[cell_map] = cell_val # At minimum a URL is required. if url: new_uuid = datastore.add_watch(url=url, extras=extras, tag=tags, save_immediately=False) if new_uuid: # Straight into the queue. self.new_uuids.append(new_uuid) good += 1 except Exception as e: logger.error(e) flash(gettext("Error processing row number {}, check all cell data types are correct, row was skipped.").format(row_i), 'error') else: row_i += 1 flash(gettext("{} imported from custom .xlsx in {:.2f}s").format(len(self.new_uuids), time.time() - now)) ================================================ FILE: changedetectionio/blueprint/imports/templates/import.html ================================================ {% extends 'base.html' %} {% block content %} {% from '_helpers.html' import render_field %}

{{ _('Restoring changedetection.io backups is in the') }} {{ _('backups section') }}.

{{ _('Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):') }}

{{ _('Example:') }} https://example.com tag1, tag2, last tag

{{ _('URLs which do not pass validation will stay in the textarea.') }}
{{ render_field(form.processor, class="processor") }}
{{ _('Copy and Paste your Distill.io watch \'export\' file, this should be a JSON file.') }}
{{ _('This is') }} {{ _('experimental') }}, {{ _('supported fields are') }} name, uri, tags, config:selections, {{ _('the rest (including') }} schedule) {{ _('are ignored.') }}

{{ _('How to export?') }} https://distill.io/docs/web-monitor/how-export-and-import-monitors/
{{ _('Be sure to set your default fetcher to Chrome if required.') }}

{{ render_field(form.xlsx_file, class="processor") }}
{{ render_field(form.file_mapping, class="processor") }}
{{ _('Table of custom column and data types mapping for the') }} {{ _('Custom mapping') }} {{ _('File mapping type.') }} {% for n in range(4) %} {% endfor %} {% for n in range(4) %} {% endfor %}
{{ _('Column #') }}
{{ _('Type') }}
{% endblock %} ================================================ FILE: changedetectionio/blueprint/price_data_follower/__init__.py ================================================ from changedetectionio.strtobool import strtobool from flask import Blueprint, flash, redirect, url_for from flask_login import login_required from changedetectionio.store import ChangeDetectionStore from changedetectionio import queuedWatchMetaData from changedetectionio import worker_pool from queue import PriorityQueue PRICE_DATA_TRACK_ACCEPT = 'accepted' PRICE_DATA_TRACK_REJECT = 'rejected' def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue): price_data_follower_blueprint = Blueprint('price_data_follower', __name__) @login_required @price_data_follower_blueprint.route("//accept", methods=['GET']) def accept(uuid): datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT datastore.data['watching'][uuid]['processor'] = 'restock_diff' datastore.data['watching'][uuid].clear_watch() datastore.data['watching'][uuid].commit() worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) return redirect(url_for("watchlist.index")) @login_required @price_data_follower_blueprint.route("//reject", methods=['GET']) def reject(uuid): datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT datastore.data['watching'][uuid].commit() return redirect(url_for("watchlist.index")) return price_data_follower_blueprint ================================================ FILE: changedetectionio/blueprint/rss/__init__.py ================================================ from copy import deepcopy from loguru import logger from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH from changedetectionio.notification import valid_notification_formats RSS_CONTENT_FORMAT_DEFAULT = 'text' # Some stuff not related RSS_FORMAT_TYPES = deepcopy(valid_notification_formats) if RSS_FORMAT_TYPES.get('markdown'): del RSS_FORMAT_TYPES['markdown'] if RSS_FORMAT_TYPES.get(USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH): del RSS_FORMAT_TYPES[USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH] if not RSS_FORMAT_TYPES.get(RSS_CONTENT_FORMAT_DEFAULT): logger.critical(f"RSS_CONTENT_FORMAT_DEFAULT not in the acceptable list {RSS_CONTENT_FORMAT_DEFAULT}") RSS_TEMPLATE_TYPE_OPTIONS = {'system_default': 'System default', 'notification_body': 'Notification body'} # @note: We use
 because nearly all RSS readers render only HTML (Thunderbird for example cant do just plaintext)
RSS_TEMPLATE_PLAINTEXT_DEFAULT = "
{{watch_label}} had a change.\n\n{{diff}}\n
" # @todo add some [edit]/[history]/[goto] etc links # @todo need {{watch_edit_link}} + delete + history link token RSS_TEMPLATE_HTML_DEFAULT = "\n

{{watch_label}}

\n

{{diff}}

\n\n" ================================================ FILE: changedetectionio/blueprint/rss/_util.py ================================================ """ Utility functions for RSS feed generation. """ from changedetectionio.notification.handler import process_notification from changedetectionio.notification_service import NotificationContextData, _check_cascading_vars from loguru import logger import datetime import pytz import re BAD_CHARS_REGEX = r'[\x00-\x08\x0B\x0C\x0E-\x1F]' def scan_invalid_chars_in_rss(content): """ Scan for invalid characters in RSS content. Returns True if invalid characters are found. """ for match in re.finditer(BAD_CHARS_REGEX, content): i = match.start() bad_char = content[i] hex_value = f"0x{ord(bad_char):02x}" # Grab context start = max(0, i - 20) end = min(len(content), i + 21) context = content[start:end].replace('\n', '\\n').replace('\r', '\\r') logger.warning(f"Invalid char {hex_value} at pos {i}: ...{context}...") # First match is enough return True return False def clean_entry_content(content): """ Remove invalid characters from RSS content. """ cleaned = re.sub(BAD_CHARS_REGEX, '', content) return cleaned def generate_watch_guid(watch, timestamp): """ Generate a unique GUID for a watch RSS entry. Args: watch: The watch object timestamp: The timestamp of the specific change this entry represents """ return f"{watch['uuid']}/{timestamp}" def validate_rss_token(datastore, request): """ Validate the RSS access token from the request. Returns: tuple: (is_valid, error_response) where error_response is None if valid """ app_rss_token = datastore.data['settings']['application'].get('rss_access_token') rss_url_token = request.args.get('token') if rss_url_token != app_rss_token: return False, ("Access denied, bad token", 403) return True, None def get_rss_template(datastore, watch, rss_content_format, default_html, default_plaintext): """Get the appropriate template for RSS content.""" if datastore.data['settings']['application'].get('rss_template_type') == 'notification_body': return _check_cascading_vars(datastore=datastore, var_name='notification_body', watch=watch) override = datastore.data['settings']['application'].get('rss_template_override') if override and override.strip(): return override elif 'text' in rss_content_format: return default_plaintext else: return default_html def get_watch_label(datastore, watch): """Get the label for a watch based on settings.""" if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'): return watch.label else: return watch.get('url') def add_watch_categories(fe, watch, datastore): """Add category tags to a feed entry based on watch tags.""" for tag_uuid in watch.get('tags', []): tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid) if tag and tag.get('title'): fe.category(term=tag.get('title')) def build_notification_context(watch, timestamp_from, timestamp_to, watch_label, n_body_template, rss_content_format): """Build the notification context object.""" return NotificationContextData(initial_data={ 'notification_urls': ['null://just-sending-a-null-test-for-the-render-in-RSS'], 'notification_body': n_body_template, 'timestamp_to': timestamp_to, 'timestamp_from': timestamp_from, 'watch_label': watch_label, 'notification_format': rss_content_format }) def render_notification(n_object, notification_service, watch, datastore, date_index_from=None, date_index_to=None): """Process and render the notification content.""" kwargs = {'n_object': n_object, 'watch': watch} if date_index_from is not None and date_index_to is not None: kwargs['date_index_from'] = date_index_from kwargs['date_index_to'] = date_index_to n_object = notification_service.queue_notification_for_watch(**kwargs) n_object['watch_mime_type'] = None res = process_notification(n_object=n_object, datastore=datastore) return res[0] def populate_feed_entry(fe, watch, content, guid, timestamp, link=None, title_suffix=None): """Populate a feed entry with content and metadata.""" watch_label = watch.get('url') # Already determined by caller # Set link if link: fe.link(link=link) # Set title if title_suffix: fe.title(title=f"{watch_label} - {title_suffix}") else: fe.title(title=watch_label) # Clean and set content if scan_invalid_chars_in_rss(content): content = clean_entry_content(content) fe.content(content=content, type='CDATA') # Set GUID fe.guid(guid, permalink=False) # Set pubDate using the timestamp of this specific change dt = datetime.datetime.fromtimestamp(int(timestamp)) dt = dt.replace(tzinfo=pytz.UTC) fe.pubDate(dt) ================================================ FILE: changedetectionio/blueprint/rss/blueprint.py ================================================ from changedetectionio.store import ChangeDetectionStore from flask import Blueprint from . import tag as tag_routes from . import main_feed from . import single_watch def construct_blueprint(datastore: ChangeDetectionStore): """ Construct and configure the RSS blueprint with all routes. Args: datastore: The ChangeDetectionStore instance Returns: The configured Flask blueprint """ rss_blueprint = Blueprint('rss', __name__) # Register all route modules main_feed.construct_main_feed_routes(rss_blueprint, datastore) single_watch.construct_single_watch_routes(rss_blueprint, datastore) tag_routes.construct_tag_routes(rss_blueprint, datastore) return rss_blueprint ================================================ FILE: changedetectionio/blueprint/rss/main_feed.py ================================================ from flask import make_response, request, url_for, redirect def construct_main_feed_routes(rss_blueprint, datastore): """ Construct the main RSS feed routes. Args: rss_blueprint: The Flask blueprint to add routes to datastore: The ChangeDetectionStore instance """ # Some RSS reader situations ended up with rss/ (forward slash after RSS) due # to some earlier blueprint rerouting work, it should goto feed. @rss_blueprint.route("/", methods=['GET']) def extraslash(): return redirect(url_for('rss.feed')) # Import the login decorator if needed # from changedetectionio.auth_decorator import login_optionally_required @rss_blueprint.route("", methods=['GET']) def feed(): from feedgen.feed import FeedGenerator from loguru import logger import time from . import RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT from ._util import (validate_rss_token, generate_watch_guid, get_rss_template, get_watch_label, build_notification_context, render_notification, populate_feed_entry, add_watch_categories) from ...notification_service import NotificationService now = time.time() # Validate token is_valid, error = validate_rss_token(datastore, request) if not is_valid: return error rss_content_format = datastore.data['settings']['application'].get('rss_content_format') limit_tag = request.args.get('tag', '').lower().strip() # Be sure limit_tag is a uuid for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items(): if limit_tag == tag.get('title', '').lower().strip(): limit_tag = uuid # Sort by last_changed and add the uuid which is usually the key.. sorted_watches = [] # @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away for uuid, watch in datastore.data['watching'].items(): # @todo tag notification_muted skip also (improve Watch model) if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'): continue if limit_tag and not limit_tag in watch['tags']: continue sorted_watches.append(watch) sorted_watches.sort(key=lambda x: x.last_changed, reverse=False) fg = FeedGenerator() fg.title('changedetection.io') fg.description('Feed description') fg.link(href='https://changedetection.io') notification_service = NotificationService(datastore=datastore, notification_q=False) for watch in sorted_watches: dates = list(watch.history.keys()) # Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected. if len(dates) < 2: continue if not watch.viewed: # Re #239 - GUID needs to be individual for each event # @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228) watch_label = get_watch_label(datastore, watch) timestamp_to = dates[-1] timestamp_from = dates[-2] guid = generate_watch_guid(watch, timestamp_to) # Because we are called via whatever web server, flask should figure out the right path diff_link = {'href': url_for('ui.ui_diff.diff_history_page', uuid=watch['uuid'], _external=True)} # Get template and build notification context n_body_template = get_rss_template(datastore, watch, rss_content_format, RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT) n_object = build_notification_context(watch, timestamp_from, timestamp_to, watch_label, n_body_template, rss_content_format) # Render notification res = render_notification(n_object, notification_service, watch, datastore) # Create and populate feed entry fe = fg.add_entry() populate_feed_entry(fe, watch, res['body'], guid, timestamp_to, link=diff_link) fe.title(title=watch_label) # Override title to not include suffix add_watch_categories(fe, watch, datastore) response = make_response(fg.rss_str()) response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8') logger.trace(f"RSS generated in {time.time() - now:.3f}s") return response ================================================ FILE: changedetectionio/blueprint/rss/single_watch.py ================================================ def construct_single_watch_routes(rss_blueprint, datastore): """ Construct RSS feed routes for single watches. Args: rss_blueprint: The Flask blueprint to add routes to datastore: The ChangeDetectionStore instance """ @rss_blueprint.route("/watch/", methods=['GET']) def rss_single_watch(uuid): import time from flask import make_response, request, Response from flask_babel import lazy_gettext as _l from feedgen.feed import FeedGenerator from loguru import logger from . import RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT from ._util import (validate_rss_token, get_rss_template, get_watch_label, build_notification_context, render_notification, populate_feed_entry, add_watch_categories) from ...notification_service import NotificationService """ Display the most recent changes for a single watch as RSS feed. Returns RSS XML with multiple entries showing diffs between consecutive snapshots. The number of entries is controlled by the rss_diff_length setting. """ now = time.time() # Validate token is_valid, error = validate_rss_token(datastore, request) if not is_valid: return error rss_content_format = datastore.data['settings']['application'].get('rss_content_format') if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() # Get the watch by UUID watch = datastore.data['watching'].get(uuid) if not watch: return Response(_l("Watch with UUID %(uuid)s not found", uuid=uuid), status=404, mimetype='text/plain') # Check if watch has at least 2 history snapshots dates = list(watch.history.keys()) if len(dates) < 2: return Response(_l("Watch %(uuid)s does not have enough history snapshots to show changes (need at least 2)", uuid=uuid), status=400, mimetype='text/plain') # Get the number of diffs to include (default: 5) rss_diff_length = datastore.data['settings']['application'].get('rss_diff_length', 5) # Calculate how many diffs we can actually show (limited by available history) # We need at least 2 snapshots to create 1 diff max_possible_diffs = len(dates) - 1 num_diffs = min(rss_diff_length, max_possible_diffs) if rss_diff_length > 0 else max_possible_diffs # Create RSS feed fg = FeedGenerator() # Set title: use "label (url)" if label differs from url, otherwise just url watch_url = watch.get('url', '') watch_label = get_watch_label(datastore, watch) if watch_label != watch_url: feed_title = f'changedetection.io - {watch_label} ({watch_url})' else: feed_title = f'changedetection.io - {watch_url}' fg.title(feed_title) fg.description('Changes') fg.link(href='https://changedetection.io') # Loop through history and create RSS entries for each diff # Add entries in reverse order because feedgen reverses them # This way, the newest change appears first in the final RSS notification_service = NotificationService(datastore=datastore, notification_q=False) for i in range(num_diffs - 1, -1, -1): # Calculate indices for this diff (working backwards from newest) # i=0: compare dates[-2] to dates[-1] (most recent change) # i=1: compare dates[-3] to dates[-2] (previous change) # etc. date_index_to = -(i + 1) date_index_from = -(i + 2) timestamp_to = dates[date_index_to] timestamp_from = dates[date_index_from] # Get template and build notification context n_body_template = get_rss_template(datastore, watch, rss_content_format, RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT) n_object = build_notification_context(watch, timestamp_from, timestamp_to, watch_label, n_body_template, rss_content_format) # Render notification with date indices res = render_notification(n_object, notification_service, watch, datastore, date_index_from, date_index_to) # Create and populate feed entry guid = f"{uuid}/{timestamp_to}" fe = fg.add_entry() title_suffix = f"Change @ {res['original_context']['change_datetime']}" populate_feed_entry(fe, watch, res.get('body', ''), guid, timestamp_to, link={'href': watch.get('url')}, title_suffix=title_suffix) add_watch_categories(fe, watch, datastore) response = make_response(fg.rss_str()) response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8') logger.debug(f"RSS Single watch built in {time.time()-now:.2f}s") return response ================================================ FILE: changedetectionio/blueprint/rss/tag.py ================================================ def construct_tag_routes(rss_blueprint, datastore): """ Construct RSS feed routes for tags. Args: rss_blueprint: The Flask blueprint to add routes to datastore: The ChangeDetectionStore instance """ @rss_blueprint.route("/tag/", methods=['GET']) def rss_tag_feed(tag_uuid): from flask import make_response, request, url_for from feedgen.feed import FeedGenerator from . import RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT from ._util import (validate_rss_token, generate_watch_guid, get_rss_template, get_watch_label, build_notification_context, render_notification, populate_feed_entry, add_watch_categories) from ...notification_service import NotificationService """ Display an RSS feed for all unviewed watches that belong to a specific tag. Returns RSS XML with entries for each unviewed watch with sufficient history. """ # Validate token is_valid, error = validate_rss_token(datastore, request) if not is_valid: return error rss_content_format = datastore.data['settings']['application'].get('rss_content_format') # Verify tag exists tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid) if not tag: return f"Tag with UUID {tag_uuid} not found", 404 tag_title = tag.get('title', 'Unknown Tag') # Create RSS feed fg = FeedGenerator() fg.title(f'changedetection.io - {tag_title}') fg.description(f'Changes for watches tagged with {tag_title}') fg.link(href='https://changedetection.io') notification_service = NotificationService(datastore=datastore, notification_q=False) # Find all watches with this tag for uuid, watch in datastore.data['watching'].items(): #@todo This is wrong, it needs to sort by most recently changed and then limit it datastore.data['watching'].items().sorted(?) # So get all watches in this tag then sort # Skip if watch doesn't have this tag if tag_uuid not in watch.get('tags', []): continue # Skip muted watches if configured if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'): continue # Check if watch has at least 2 history snapshots dates = list(watch.history.keys()) if len(dates) < 2: continue # Only include unviewed watches if not watch.viewed: # Include a link to the diff page (use uuid from loop, don't modify watch dict) diff_link = {'href': url_for('ui.ui_diff.diff_history_page', uuid=uuid, _external=True)} # Get watch label watch_label = get_watch_label(datastore, watch) # Get template and build notification context timestamp_to = dates[-1] timestamp_from = dates[-2] # Generate GUID for this entry guid = generate_watch_guid(watch, timestamp_to) n_body_template = get_rss_template(datastore, watch, rss_content_format, RSS_TEMPLATE_HTML_DEFAULT, RSS_TEMPLATE_PLAINTEXT_DEFAULT) n_object = build_notification_context(watch, timestamp_from, timestamp_to, watch_label, n_body_template, rss_content_format) # Render notification res = render_notification(n_object, notification_service, watch, datastore) # Create and populate feed entry fe = fg.add_entry() title_suffix = f"Change @ {res['original_context']['change_datetime']}" populate_feed_entry(fe, watch, res['body'], guid, timestamp_to, link=diff_link, title_suffix=title_suffix) add_watch_categories(fe, watch, datastore) response = make_response(fg.rss_str()) response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8') return response ================================================ FILE: changedetectionio/blueprint/settings/__init__.py ================================================ import os from copy import deepcopy from datetime import datetime, timedelta from zoneinfo import ZoneInfo, available_timezones import secrets import time import flask_login from flask import Blueprint, render_template, request, redirect, url_for, flash from flask_babel import gettext from changedetectionio.store import ChangeDetectionStore from changedetectionio.auth_decorator import login_optionally_required def construct_blueprint(datastore: ChangeDetectionStore): settings_blueprint = Blueprint('settings', __name__, template_folder="templates") @settings_blueprint.route("", methods=['GET', "POST"]) @login_optionally_required def settings_page(): from changedetectionio import forms from changedetectionio.pluggy_interface import ( get_plugin_settings_tabs, load_plugin_settings, save_plugin_settings ) default = deepcopy(datastore.data['settings']) if datastore.proxy_list is not None: available_proxies = list(datastore.proxy_list.keys()) # When enabled system_proxy = datastore.data['settings']['requests']['proxy'] # In the case it doesnt exist anymore if not system_proxy in available_proxies: system_proxy = None default['requests']['proxy'] = system_proxy if system_proxy is not None else available_proxies[0] # Used by the form handler to keep or remove the proxy settings default['proxy_list'] = available_proxies[0] # Don't use form.data on POST so that it doesnt overrid the checkbox status from the POST status form = forms.globalSettingsForm(formdata=request.form if request.method == 'POST' else None, data=default, extra_notification_tokens=datastore.get_unique_notification_tokens_available() ) # Remove the last option 'System default' form.application.form.notification_format.choices.pop() if datastore.proxy_list is None: # @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead del form.requests.form.proxy else: form.requests.form.proxy.choices = [] for p in datastore.proxy_list: form.requests.form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label']))) if request.method == 'POST': # Password unset is a GET, but we can lock the session to a salted env password to always need the password if form.application.form.data.get('removepassword_button', False): # SALTED_PASS means the password is "locked" to what we set in the Env var if not os.getenv("SALTED_PASS", False): datastore.remove_password() flash(gettext("Password protection removed."), 'notice') flask_login.logout_user() return redirect(url_for('settings.settings_page')) if form.validate(): # Don't set password to False when a password is set - should be only removed with the `removepassword` button app_update = dict(deepcopy(form.data['application'])) # Never update password with '' or False (Added by wtforms when not in submission) if 'password' in app_update and not app_update['password']: del (app_update['password']) datastore.data['settings']['application'].update(app_update) # Handle dynamic worker count adjustment old_worker_count = datastore.data['settings']['requests'].get('workers', 1) new_worker_count = form.data['requests'].get('workers', 1) datastore.data['settings']['requests'].update(form.data['requests']) datastore.commit() # Clear all checksums to force reprocessing with new settings # Global settings can affect watch behavior (filters, rendering, etc.) datastore.clear_all_last_checksums() # Adjust worker count if it changed if new_worker_count != old_worker_count: from changedetectionio import worker_pool from changedetectionio.flask_app import update_q, notification_q, app, datastore as ds # Check CPU core availability and warn if worker count is high cpu_count = os.cpu_count() if cpu_count and new_worker_count >= (cpu_count * 0.9): flash(gettext("Warning: Worker count ({}) is close to or exceeds available CPU cores ({})").format( new_worker_count, cpu_count), 'warning') result = worker_pool.adjust_async_worker_count( new_count=new_worker_count, update_q=update_q, notification_q=notification_q, app=app, datastore=ds ) if result['status'] == 'success': flash(gettext("Worker count adjusted: {}").format(result['message']), 'notice') elif result['status'] == 'not_supported': flash(gettext("Dynamic worker adjustment not supported for sync workers"), 'warning') elif result['status'] == 'error': flash(gettext("Error adjusting workers: {}").format(result['message']), 'error') if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password): datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password datastore.commit() flash(gettext("Password protection enabled."), 'notice') flask_login.logout_user() return redirect(url_for('watchlist.index')) # Also save plugin settings from the same form submission plugin_tabs_list = get_plugin_settings_tabs() for tab in plugin_tabs_list: plugin_id = tab['plugin_id'] form_class = tab['form_class'] # Instantiate plugin form with POST data plugin_form = form_class(formdata=request.form) # Save plugin settings (validation is optional for plugins) if plugin_form.data: save_plugin_settings(datastore.datastore_path, plugin_id, plugin_form.data) flash(gettext("Settings updated.")) else: flash(gettext("An error occurred, please see below."), "error") # Convert to ISO 8601 format, all date/time relative events stored as UTC time utc_time = datetime.now(ZoneInfo("UTC")).isoformat() # Get active plugins from changedetectionio.pluggy_interface import get_active_plugins import sys active_plugins = get_active_plugins() python_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" # Calculate uptime in seconds uptime_seconds = time.time() - datastore.start_time # Get plugin settings tabs and instantiate forms plugin_tabs = get_plugin_settings_tabs() plugin_forms = {} for tab in plugin_tabs: plugin_id = tab['plugin_id'] form_class = tab['form_class'] # Load existing settings settings = load_plugin_settings(datastore.datastore_path, plugin_id) # Instantiate the form with existing settings plugin_forms[plugin_id] = form_class(data=settings) output = render_template("settings.html", active_plugins=active_plugins, api_key=datastore.data['settings']['application'].get('api_access_token'), python_version=python_version, uptime_seconds=uptime_seconds, available_timezones=sorted(available_timezones()), emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), extra_notification_token_placeholder_info=datastore.get_unique_notification_token_placeholders_available(), form=form, hide_remove_pass=os.getenv("SALTED_PASS", False), min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)), settings_application=datastore.data['settings']['application'], timezone_default_config=datastore.data['settings']['application'].get('scheduler_timezone_default'), utc_time=utc_time, plugin_tabs=plugin_tabs, plugin_forms=plugin_forms, ) return output @settings_blueprint.route("/reset-api-key", methods=['GET']) @login_optionally_required def settings_reset_api_key(): secret = secrets.token_hex(16) datastore.data['settings']['application']['api_access_token'] = secret datastore.commit() flash(gettext("API Key was regenerated.")) return redirect(url_for('settings.settings_page')+'#api') @settings_blueprint.route("/notification-logs", methods=['GET']) @login_optionally_required def notification_logs(): from changedetectionio.flask_app import notification_debug_log output = render_template("notification-log.html", logs=notification_debug_log if len(notification_debug_log) else ["Notification logs are empty - no notifications sent yet."]) return output @settings_blueprint.route("/toggle-all-paused", methods=['GET']) @login_optionally_required def toggle_all_paused(): current_state = datastore.data['settings']['application'].get('all_paused', False) datastore.data['settings']['application']['all_paused'] = not current_state datastore.commit() if datastore.data['settings']['application']['all_paused']: flash(gettext("Automatic scheduling paused - checks will not be queued."), 'notice') else: flash(gettext("Automatic scheduling resumed - checks will be queued normally."), 'notice') return redirect(url_for('watchlist.index')) @settings_blueprint.route("/toggle-all-muted", methods=['GET']) @login_optionally_required def toggle_all_muted(): current_state = datastore.data['settings']['application'].get('all_muted', False) datastore.data['settings']['application']['all_muted'] = not current_state datastore.commit() if datastore.data['settings']['application']['all_muted']: flash(gettext("All notifications muted."), 'notice') else: flash(gettext("All notifications unmuted."), 'notice') return redirect(url_for('watchlist.index')) return settings_blueprint ================================================ FILE: changedetectionio/blueprint/settings/templates/notification-log.html ================================================ {% extends 'base.html' %} {% block content %}

{{ _('Notification debug log') }}

    {% for log in logs|reverse %}
  • {{log}}
  • {% endfor %}
{% endblock %} ================================================ FILE: changedetectionio/blueprint/settings/templates/settings.html ================================================ {% extends 'base.html' %} {% block content %} {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, render_ternary_field, render_fieldlist_with_inline_errors %} {% from '_common_fields.html' import render_common_settings_form, show_token_placeholders %}
{{ render_field(form.requests.form.time_between_check, class="time-check-widget") }} {{ _('Default recheck time for all watches, current system minimum is') }} {{min_system_recheck_seconds}} {{ _('seconds') }} ({{ _('more info') }}).
{{ render_time_schedule_form(form.requests, available_timezones, timezone_default_config) }}
{{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }} {{ _('After this many consecutive times that the CSS/xPath filter is missing, send a notification') }}
{{ _('Set to') }} 0 {{ _('to disable') }}
{{ render_field(form.application.form.history_snapshot_max_length, class="history_snapshot_max_length") }} {{ _('Limit collection of history snapshots for each watch to this number of history items.') }}
{{ _('Set to empty to disable / no limit') }}
{% if not hide_remove_pass %} {% if current_user.is_authenticated %} {{ render_button(form.application.form.removepassword_button) }} {% else %} {{ render_field(form.application.form.password) }} {{ _('Password protection for your changedetection.io application.') }} {% endif %} {% else %} {{ _('Password is locked.') }} {% endif %}
{{ render_checkbox_field(form.application.form.shared_diff_access, class="shared_diff_access") }} {{ _('Allow access to the watch change history page when password is enabled (Good for sharing the diff page)') }}
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }} {{ _('When a request returns no content, or the HTML does not contain any text, is this considered a change?') }}
{% if form.requests.proxy %}

{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }} {{ _('Choose a default proxy for all watches') }}
{% endif %}
{{ render_common_settings_form(form.application.form, emailprefix, settings_application, extra_notification_token_placeholder_info) }}
{{ render_field(form.application.form.base_url, class="m-d") }} {{ _('Base URL used for the') }} {{ '{{ base_url }}' }} {{ _('token in notification links.') }}
{{ _('Default value is the system environment variable') }} 'BASE_URL' - {{ _('read more here') }}.
{{ render_field(form.application.form.fetch_backend, class="fetch-backend") }}

{{ _('Use the') }} {{ _('Basic') }} {{ _('method (default) where your watched sites don\'t need Javascript to render.') }}

{{ _('The') }} {{ _('Chrome/Javascript') }} {{ _('method requires a network connection to a running WebDriver+Chrome server, set by the ENV var') }} 'WEBDRIVER_URL'.

{{ _('If you\'re having trouble waiting for the page to be fully rendered (text missing etc), try increasing the \'wait\' time here.') }}
{{ _('This will wait') }} n {{ _('seconds before extracting the text.') }}
{{ render_field(form.application.form.webdriver_delay) }}
{{ render_field(form.requests.form.workers) }} {% set worker_info = get_worker_status_info() %} {{ _('Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.') }}
{{ _('Currently running:') }} {{ worker_info.count }} {{ _('operational') }} {{ worker_info.type }} {{ _('workers') }}{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} {{ _('actively processing') }}){% endif %}.
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }} {{ _('Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later') }}
{{ render_field(form.requests.form.timeout) }} {{ _('For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.') }}
{{ render_field(form.requests.form.default_ua) }} {{ _('Applied to all requests.') }}

{{ _('Note: Simply changing the User-Agent often does not defeat anti-robot technologies, it\'s important to consider') }} {{ _('all of the ways that the browser is detected') }}.
{{ render_checkbox_field(form.application.form.ignore_whitespace) }} {{ _('Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.') }}
{{ _('Note:') }} {{ _('Changing this will change the status of your existing watches, possibly trigger alerts etc.') }}
{{ render_checkbox_field(form.application.form.render_anchor_tag_content) }} {{ _('Render anchor tag content, default disabled, when enabled renders links as') }} (link text)[https://somesite.com]
{{ _('Note:') }} {{ _('Changing this could affect the content of your existing watches, possibly trigger alerts etc.') }}
{{ render_field(form.application.form.global_subtractive_selectors, rows=5, placeholder="header footer nav .stockticker //*[contains(text(), 'Advertisement')]") }}
  • {{ _('Remove HTML element(s) by CSS and XPath selectors before text conversion.') }}
  • {{ _('Don\'t paste HTML here, use only CSS and XPath selectors') }}
  • {{ _('Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML.') }}
{{ render_field(form.application.form.global_ignore_text, rows=5, placeholder="Some text to ignore in a line /some.regex\d{2}/ for case-INsensitive regex ") }} {{ _('Note: This is applied globally in addition to the per-watch rules.') }}
  • {{ _('Matching text will be') }} {{ _('ignored') }} {{ _('in the text snapshot (you can still see it but it wont trigger a change)') }}
  • {{ _('Note: This is applied globally in addition to the per-watch rules.') }}
  • {{ _('Each line processed separately, any line matching will be ignored (removed before creating the checksum)') }}
  • {{ _('Regular Expression support, wrap the entire line in forward slash') }} /regex/
  • {{ _('Changing this will affect the comparison checksum which may trigger an alert') }}
{{ render_checkbox_field(form.application.form.strip_ignored_lines) }} {{ _('Remove any text that appears in the "Ignore text" from the output (otherwise its just ignored for change-detection)') }}
{{ _('Note:') }} {{ _('Changing this will change the status of your existing watches, possibly trigger alerts etc.') }}

{{ _('API Access') }}

{{ _('Drive your changedetection.io via API, More about') }} {{ _('API access and examples here') }}.

{{ render_checkbox_field(form.application.form.api_access_token_enabled) }}
{{ _('Restrict API access limit by using') }} x-api-key {{ _('header - required for the Chrome Extension to work') }}


{{ _('API Key') }} {{api_key}}

{{ _('Chrome Extension') }}

{{ _('Easily add any web-page to your changedetection.io installation from within Chrome.') }}

{{ _('Step 1') }} {{ _('Install the extension,') }} {{ _('Step 2') }} {{ _('Navigate to this page,') }} {{ _('Step 3') }} {{ _('Open the extension from the toolbar and click') }} "{{ _('Sync API Access') }}"

{{ _('Chrome store icon') }} {{ _('Chrome Webstore') }}

{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
{{ render_field(form.application.form.rss_diff_length) }} {{ _('Maximum number of history snapshots to include in the watch specific RSS feed.') }}
{{ render_checkbox_field(form.application.form.rss_reader_mode) }} {{ _('For watching other RSS feeds - When watching RSS/Atom feeds, convert them into clean text for better change detection.') }}
{{ render_field(form.application.form.rss_content_format) }} {{ _('Does your reader support HTML? Set it here') }}
{{ render_field(form.application.form.rss_template_type) }} {{ _('\'System default\' for the same template for all items, or re-use your "Notification Body" as the template.') }}
{{ render_field(form.application.form.rss_template_override) }} {{ show_token_placeholders(extra_notification_token_placeholder_info=extra_notification_token_placeholder_info, suffix="-rss") }}

{{ _('Ensure the settings below are correct, they are used to manage the time schedule for checking your web page watches.') }}

{{ _('UTC Time & Date from Server:') }} {{ utc_time }}

{{ _('Local Time & Date in Browser:') }}

{{ render_field(form.application.form.scheduler_timezone_default) }}
{{ render_checkbox_field(form.application.form.ui.form.open_diff_in_new_tab, class="open_diff_in_new_tab") }} {{ _('Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.') }}
{{ render_checkbox_field(form.application.form.ui.form.socket_io_enabled, class="socket_io_enabled") }} {{ _('Realtime UI Updates Enabled - (Restart required if this is changed)') }}
{{ render_checkbox_field(form.application.form.ui.form.favicons_enabled, class="") }} {{ _('Enable or Disable Favicons next to the watch list') }}
{{ render_checkbox_field(form.application.form.ui.use_page_title_in_list) }}
{{ render_field(form.application.form.pager_size) }} {{ _('Number of items per page in the watch overview list, 0 to disable.') }}

{{ _('Tip') }}: {{ _('"Residential" and "Mobile" proxy type can be more successful than "Data Center" for blocked websites.') }}

{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }} {{ _('"Name" will be used for selecting the proxy in the Watch Edit settings') }}
{{ _('SOCKS5 proxies with authentication are only supported with \'plain requests\' fetcher, for other fetchers you should whitelist the IP access instead') }}

Extra Browsers can be attached to further defeat CAPTCHA's on websites that are particularly hard to scrape.
Simply paste the connection address into the box, More instructions and examples here

{{ render_fieldlist_with_inline_errors(form.requests.form.extra_browsers) }}
{% if plugin_tabs %} {% for tab in plugin_tabs %}
{% set plugin_form = plugin_forms[tab.plugin_id] %} {% if tab.template_path %} {# Plugin provides custom template - include it directly (no separate form) #} {% include tab.template_path with context %} {% else %} {# Default form rendering - fields only, no submit button #}
{% for field in plugin_form %} {% if field.type != 'CSRFToken' and field.type != 'SubmitField' %}
{% if field.type == 'BooleanField' %} {{ render_checkbox_field(field) }} {% else %} {{ render_field(field) }} {% endif %}
{% endif %} {% endfor %}
{% endif %}
{% endfor %} {% endif %}

{{ _('Uptime:') }} {{ uptime_seconds|format_duration }}

{{ _('Python version:') }} {{ python_version }}

{{ _('Plugins active:') }}

{% if active_plugins %}
    {% for plugin in active_plugins %}
  • {{ plugin.name }} - {{ plugin.description }}
  • {% endfor %}
{% else %}

{{ _('No plugins active') }}

{% endif %}
{{ render_button(form.save_button) }} {{ _('Back') }} {{ _('Clear Snapshot History') }}
{% endblock %} ================================================ FILE: changedetectionio/blueprint/tags/README.md ================================================ # Groups tags ## How it works Watch has a list() of tag UUID's, which relate to a config under application.settings.tags The 'tag' is actually a watch, because they basically will eventually share 90% of the same config. So a tag is like an abstract of a watch ================================================ FILE: changedetectionio/blueprint/tags/__init__.py ================================================ import threading from flask import Blueprint, request, render_template, flash, url_for, redirect from flask_babel import gettext from loguru import logger from changedetectionio.store import ChangeDetectionStore from changedetectionio.flask_app import login_optionally_required def construct_blueprint(datastore: ChangeDetectionStore): tags_blueprint = Blueprint('tags', __name__, template_folder="templates") @tags_blueprint.route("/list", methods=['GET']) @login_optionally_required def tags_overview_page(): from .form import SingleTag add_form = SingleTag(request.form) sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title']) from collections import Counter tag_count = Counter(tag for watch in datastore.data['watching'].values() if watch.get('tags') for tag in watch['tags']) output = render_template("groups-overview.html", app_rss_token=datastore.data['settings']['application'].get('rss_access_token'), available_tags=sorted_tags, form=add_form, tag_count=tag_count, ) return output @tags_blueprint.route("/add", methods=['POST']) @login_optionally_required def form_tag_add(): from .form import SingleTag add_form = SingleTag(request.form) if not add_form.validate(): for widget, l in add_form.errors.items(): flash(','.join(l), 'error') return redirect(url_for('tags.tags_overview_page')) title = request.form.get('name').strip() if datastore.tag_exists_by_name(title): flash(gettext('The tag "{}" already exists').format(title), "error") return redirect(url_for('tags.tags_overview_page')) datastore.add_tag(title) flash(gettext("Tag added")) return redirect(url_for('tags.tags_overview_page')) @tags_blueprint.route("/mute/", methods=['GET']) @login_optionally_required def mute(uuid): tag = datastore.data['settings']['application']['tags'].get(uuid) if tag: tag['notification_muted'] = not tag['notification_muted'] tag.commit() return redirect(url_for('tags.tags_overview_page')) @tags_blueprint.route("/delete/", methods=['GET']) @login_optionally_required def delete(uuid): # Delete the tag from settings immediately if datastore.data['settings']['application']['tags'].get(uuid): del datastore.data['settings']['application']['tags'][uuid] # Remove tag from all watches in background thread to avoid blocking def remove_tag_background(tag_uuid): """Background thread to remove tag from watches - discarded after completion.""" removed_count = 0 try: for watch_uuid, watch in datastore.data['watching'].items(): if watch.get('tags') and tag_uuid in watch['tags']: watch['tags'].remove(tag_uuid) watch.commit() removed_count += 1 logger.info(f"Background: Tag {tag_uuid} removed from {removed_count} watches") except Exception as e: logger.error(f"Error removing tag from watches: {e}") # Start daemon thread threading.Thread(target=remove_tag_background, args=(uuid,), daemon=True).start() flash(gettext("Tag deleted, removing from watches in background")) return redirect(url_for('tags.tags_overview_page')) @tags_blueprint.route("/unlink/", methods=['GET']) @login_optionally_required def unlink(uuid): # Unlink tag from all watches in background thread to avoid blocking def unlink_tag_background(tag_uuid): """Background thread to unlink tag from watches - discarded after completion.""" unlinked_count = 0 try: for watch_uuid, watch in datastore.data['watching'].items(): if watch.get('tags') and tag_uuid in watch['tags']: watch['tags'].remove(tag_uuid) watch.commit() unlinked_count += 1 logger.info(f"Background: Tag {tag_uuid} unlinked from {unlinked_count} watches") except Exception as e: logger.error(f"Error unlinking tag from watches: {e}") # Start daemon thread threading.Thread(target=unlink_tag_background, args=(uuid,), daemon=True).start() flash(gettext("Unlinking tag from watches in background")) return redirect(url_for('tags.tags_overview_page')) @tags_blueprint.route("/delete_all", methods=['GET']) @login_optionally_required def delete_all(): for tag_uuid in list(datastore.data['settings']['application']['tags'].keys()): # TagsDict 'del' handler will remove the dir del datastore.data['settings']['application']['tags'][tag_uuid] # Clear tags from all watches in background thread to avoid blocking def clear_all_tags_background(): """Background thread to clear tags from all watches - discarded after completion.""" cleared_count = 0 try: for watch_uuid, watch in datastore.data['watching'].items(): watch['tags'] = [] watch.commit() cleared_count += 1 logger.info(f"Background: Cleared tags from {cleared_count} watches") except Exception as e: logger.error(f"Error clearing tags from watches: {e}") # Start daemon thread threading.Thread(target=clear_all_tags_background, daemon=True).start() flash(gettext("All tags deleted, clearing from watches in background")) return redirect(url_for('tags.tags_overview_page')) @tags_blueprint.route("/edit/", methods=['GET']) @login_optionally_required def form_tag_edit(uuid): from changedetectionio.blueprint.tags.form import group_restock_settings_form if uuid == 'first': uuid = list(datastore.data['settings']['application']['tags'].keys()).pop() default = datastore.data['settings']['application']['tags'].get(uuid) if not default: flash(gettext("Tag not found"), "error") return redirect(url_for('watchlist.index')) form = group_restock_settings_form( formdata=request.form if request.method == 'POST' else None, data=default, extra_notification_tokens=datastore.get_unique_notification_tokens_available(), default_system_settings = datastore.data['settings'], ) # Bridge API-stored processor_config_* values into the form's FormField sub-forms. # The API stores processor_config_restock_diff in the tag dict; find the matching # FormField by checking which one's sub-fields cover the config keys. from wtforms.fields.form import FormField as WTFormField for key, value in default.items(): if not key.startswith('processor_config_') or not isinstance(value, dict): continue for form_field in form: if isinstance(form_field, WTFormField) and all(k in form_field.form._fields for k in value): for sub_key, sub_value in value.items(): sub_field = form_field.form._fields.get(sub_key) if sub_field is not None: sub_field.data = sub_value break template_args = { 'data': default, 'form': form, 'watch': default, 'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(), } included_content = {} if form.extra_form_content(): # So that the extra panels can access _helpers.html etc, we set the environment to load from templates/ # And then render the code from the module from jinja2 import Environment, FileSystemLoader import importlib.resources templates_dir = str(importlib.resources.files("changedetectionio").joinpath('templates')) env = Environment(loader=FileSystemLoader(templates_dir)) template_str = """{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
{{ render_checkbox_field(form.overrides_watch) }} Used for watches in "Restock & Price detection" mode
""" template_str += form.extra_form_content() template = env.from_string(template_str) included_content = template.render(**template_args) output = render_template("edit-tag.html", extra_form_content=included_content, extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None, settings_application=datastore.data['settings']['application'], **template_args ) return output @tags_blueprint.route("/edit/", methods=['POST']) @login_optionally_required def form_tag_edit_submit(uuid): from changedetectionio.blueprint.tags.form import group_restock_settings_form if uuid == 'first': uuid = list(datastore.data['settings']['application']['tags'].keys()).pop() tag = datastore.data['settings']['application']['tags'].get(uuid) form = group_restock_settings_form(formdata=request.form if request.method == 'POST' else None, data=tag, extra_notification_tokens=datastore.get_unique_notification_tokens_available() ) # @todo subclass form so validation works #if not form.validate(): # for widget, l in form.errors.items(): # flash(','.join(l), 'error') # return redirect(url_for('tags.form_tag_edit_submit', uuid=uuid)) tag.update(form.data) tag['processor'] = 'restock_diff' tag.commit() # Clear checksums for all watches using this tag to force reprocessing # Tag changes affect inherited configuration cleared_count = datastore.clear_checksums_for_tag(uuid) logger.info(f"Tag {uuid} updated, cleared {cleared_count} watch checksums") flash(gettext("Updated")) return redirect(url_for('tags.tags_overview_page')) return tags_blueprint ================================================ FILE: changedetectionio/blueprint/tags/form.py ================================================ from wtforms import ( Form, StringField, SubmitField, validators, ) from wtforms.fields.simple import BooleanField from changedetectionio.processors.restock_diff.forms import processor_settings_form as restock_settings_form class group_restock_settings_form(restock_settings_form): overrides_watch = BooleanField('Activate for individual watches in this tag/group?', default=False) class SingleTag(Form): name = StringField('Tag name', [validators.InputRequired()], render_kw={"placeholder": "Name"}) save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"}) ================================================ FILE: changedetectionio/blueprint/tags/templates/edit-tag.html ================================================ {% extends 'base.html' %} {% block content %} {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_ternary_field %} {% from '_common_fields.html' import render_common_settings_form %}
{{ render_field(form.title, placeholder="https://...", required=true, class="m-d") }}

{{ _('These settings are') }} {{ _('added') }} {{ _('to any existing watch configurations.') }}

{% include "edit/include_subtract.html" %}

{{ _('Text filtering') }}

{% include "edit/text-options.html" %}
{# rendered sub Template #} {% if extra_form_content %}
{{ extra_form_content|safe }}
{% endif %}
{{ render_ternary_field(form.notification_muted, BooleanField=True) }}
{% if 1 %}
{{ render_checkbox_field(form.notification_screenshot) }} {{ _('Use with caution!') }} {{ _('This will easily fill up your email storage quota or flood other storages.') }}
{% endif %}
{% if has_default_notification_urls %}
{{ _('Look out!') }} {{ _('There are') }} {{ _('system-wide notification URLs enabled') }}, {{ _('this form will override notification settings for this watch only') }} ‐ {{ _('an empty Notification URL list here will still send notifications.') }}
{% endif %} {{ _('Use system defaults') }} {{ render_common_settings_form(form, emailprefix, settings_application, extra_notification_token_placeholder_info) }}
{{ render_button(form.save_button) }}
{% endblock %} ================================================ FILE: changedetectionio/blueprint/tags/templates/groups-overview.html ================================================ {% extends 'base.html' %} {% block content %} {% from '_helpers.html' import render_simple_field, render_field %}
{{ _('Add a new organisational tag') }}
{{ render_simple_field(form.name, placeholder=_("Watch group / tag")) }}
{{ render_simple_field(form.save_button, title=_("Save") ) }}

{{ _('Groups allows you to manage filters and notifications for multiple watches under a single organisational tag.') }}
{% if not available_tags|length %} {% endif %} {% for uuid, tag in available_tags %} {% endfor %}
{{ _('# Watches') }} {{ _('Tag / Label name') }}
{{ _('No website organisational tags/groups configured') }}
Mute notifications {{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }} {{ tag.title }} {{ _('Edit') }} {{ _('Recheck') }} {{ _('Delete') }} {{ _('Unlink') }} {{ _('RSS Feed for this watch') }}
{% endblock %} ================================================ FILE: changedetectionio/blueprint/ui/__init__.py ================================================ import time import threading from flask import Blueprint, request, redirect, url_for, flash, render_template, session, current_app from flask_babel import gettext from loguru import logger from changedetectionio.store import ChangeDetectionStore from changedetectionio.blueprint.ui.edit import construct_blueprint as construct_edit_blueprint from changedetectionio.blueprint.ui.notification import construct_blueprint as construct_notification_blueprint from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint from changedetectionio.blueprint.ui import diff, preview def _handle_operations(op, uuids, datastore, worker_pool, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True): from flask import request, flash if op == 'delete': for uuid in uuids: if datastore.data['watching'].get(uuid): datastore.delete(uuid) if emit_flash: flash(gettext("{} watches deleted").format(len(uuids))) elif op == 'pause': for uuid in uuids: if datastore.data['watching'].get(uuid): datastore.data['watching'][uuid]['paused'] = True datastore.data['watching'][uuid].commit() if emit_flash: flash(gettext("{} watches paused").format(len(uuids))) elif op == 'unpause': for uuid in uuids: if datastore.data['watching'].get(uuid): datastore.data['watching'][uuid.strip()]['paused'] = False datastore.data['watching'][uuid].commit() if emit_flash: flash(gettext("{} watches unpaused").format(len(uuids))) elif (op == 'mark-viewed'): for uuid in uuids: if datastore.data['watching'].get(uuid): datastore.set_last_viewed(uuid, int(time.time())) if emit_flash: flash(gettext("{} watches updated").format(len(uuids))) elif (op == 'mute'): for uuid in uuids: if datastore.data['watching'].get(uuid): datastore.data['watching'][uuid]['notification_muted'] = True datastore.data['watching'][uuid].commit() if emit_flash: flash(gettext("{} watches muted").format(len(uuids))) elif (op == 'unmute'): for uuid in uuids: if datastore.data['watching'].get(uuid): datastore.data['watching'][uuid]['notification_muted'] = False datastore.data['watching'][uuid].commit() if emit_flash: flash(gettext("{} watches un-muted").format(len(uuids))) elif (op == 'recheck'): for uuid in uuids: if datastore.data['watching'].get(uuid): # Recheck and require a full reprocessing worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) if emit_flash: flash(gettext("{} watches queued for rechecking").format(len(uuids))) elif (op == 'clear-errors'): for uuid in uuids: if datastore.data['watching'].get(uuid): datastore.data['watching'][uuid]["last_error"] = False datastore.data['watching'][uuid].commit() if emit_flash: flash(gettext("{} watches errors cleared").format(len(uuids))) elif (op == 'clear-history'): for uuid in uuids: if datastore.data['watching'].get(uuid): datastore.clear_watch_history(uuid) if emit_flash: flash(gettext("{} watches cleared/reset.").format(len(uuids))) elif (op == 'notification-default'): from changedetectionio.notification import ( USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH ) for uuid in uuids: if datastore.data['watching'].get(uuid): datastore.data['watching'][uuid]['notification_title'] = None datastore.data['watching'][uuid]['notification_body'] = None datastore.data['watching'][uuid]['notification_urls'] = [] datastore.data['watching'][uuid]['notification_format'] = USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH datastore.data['watching'][uuid].commit() if emit_flash: flash(gettext("{} watches set to use default notification settings").format(len(uuids))) elif (op == 'assign-tag'): op_extradata = extra_data if op_extradata: tag_uuid = datastore.add_tag(title=op_extradata) if op_extradata and tag_uuid: for uuid in uuids: if datastore.data['watching'].get(uuid): # Bug in old versions caused by bad edit page/tag handler if isinstance(datastore.data['watching'][uuid]['tags'], str): datastore.data['watching'][uuid]['tags'] = [] datastore.data['watching'][uuid]['tags'].append(tag_uuid) datastore.data['watching'][uuid].commit() if emit_flash: flash(gettext("{} watches were tagged").format(len(uuids))) if uuids: for uuid in uuids: watch_check_update.send(watch_uuid=uuid) def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool, queuedWatchMetaData, watch_check_update): ui_blueprint = Blueprint('ui', __name__, template_folder="templates") # Register the edit blueprint edit_blueprint = construct_edit_blueprint(datastore, update_q, queuedWatchMetaData) ui_blueprint.register_blueprint(edit_blueprint) # Register the notification blueprint notification_blueprint = construct_notification_blueprint(datastore) ui_blueprint.register_blueprint(notification_blueprint) # Register the views blueprint views_blueprint = construct_views_blueprint(datastore, update_q, queuedWatchMetaData, watch_check_update) ui_blueprint.register_blueprint(views_blueprint) # Register diff and preview blueprints diff_blueprint = diff.construct_blueprint(datastore) ui_blueprint.register_blueprint(diff_blueprint) preview_blueprint = preview.construct_blueprint(datastore) ui_blueprint.register_blueprint(preview_blueprint) # Import the login decorator from changedetectionio.auth_decorator import login_optionally_required @ui_blueprint.route("/clear_history/", methods=['GET']) @login_optionally_required def clear_watch_history(uuid): try: datastore.clear_watch_history(uuid) except KeyError: flash(gettext('Watch not found'), 'error') else: flash(gettext("Cleared snapshot history for watch {}").format(uuid)) return redirect(url_for('watchlist.index')) @ui_blueprint.route("/clear_history", methods=['GET', 'POST']) @login_optionally_required def clear_all_history(): if request.method == 'POST': confirmtext = request.form.get('confirmtext', '') if confirmtext.strip().lower() == gettext('clear').strip().lower(): # Run in background thread to avoid blocking def clear_history_background(): # Capture UUIDs first to avoid race conditions watch_uuids = list(datastore.data['watching'].keys()) logger.info(f"Background: Clearing history for {len(watch_uuids)} watches") for uuid in watch_uuids: try: datastore.clear_watch_history(uuid) except Exception as e: logger.error(f"Error clearing history for watch {uuid}: {e}") logger.info("Background: Completed clearing history") # Start daemon thread threading.Thread(target=clear_history_background, daemon=True).start() flash(gettext("History clearing started in background")) else: flash(gettext('Incorrect confirmation text.'), 'error') return redirect(url_for('watchlist.index')) output = render_template("clear_all_history.html") return output # Clear all statuses, so we do not see the 'unviewed' class @ui_blueprint.route("/form/mark-all-viewed", methods=['GET']) @login_optionally_required def mark_all_viewed(): # Save the current newest history as the most recently viewed with_errors = request.args.get('with_errors') == "1" tag_limit = request.args.get('tag') now = int(time.time()) # Mark watches as viewed - use background thread only for large watch counts def mark_viewed_impl(): """Mark watches as viewed - can run synchronously or in background thread.""" marked_count = 0 try: for watch_uuid, watch in datastore.data['watching'].items(): if with_errors and not watch.get('last_error'): continue if tag_limit and (not watch.get('tags') or tag_limit not in watch['tags']): continue datastore.set_last_viewed(watch_uuid, now) marked_count += 1 logger.info(f"Marking complete: {marked_count} watches marked as viewed") except Exception as e: logger.error(f"Error marking as viewed: {e}") # For small watch counts (< 10), run synchronously to avoid race conditions in tests # For larger counts, use background thread to avoid blocking the UI watch_count = len(datastore.data['watching']) if watch_count < 10: # Run synchronously for small watch counts mark_viewed_impl() else: # Start background thread for large watch counts thread = threading.Thread(target=mark_viewed_impl, daemon=True) thread.start() return redirect(url_for('watchlist.index', tag=tag_limit)) @ui_blueprint.route("/delete", methods=['GET']) @login_optionally_required def form_delete(): uuid = request.args.get('uuid') # More for testing, possible to return the first/only if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() if uuid != 'all' and not uuid in datastore.data['watching'].keys(): flash(gettext('The watch by UUID {} does not exist.').format(uuid), 'error') return redirect(url_for('watchlist.index')) datastore.delete(uuid) flash(gettext('Deleted.')) return redirect(url_for('watchlist.index')) @ui_blueprint.route("/clone", methods=['GET']) @login_optionally_required def form_clone(): uuid = request.args.get('uuid') if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() new_uuid = datastore.clone(uuid) if not datastore.data['watching'].get(uuid).get('paused'): worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid})) flash(gettext('Cloned, you are editing the new watch.')) return redirect(url_for("ui.ui_edit.edit_page", uuid=new_uuid)) @ui_blueprint.route("/checknow", methods=['GET']) @login_optionally_required def form_watch_checknow(): # Forced recheck will skip the 'skip if content is the same' rule (, 'reprocess_existing_data': True}))) tag = request.args.get('tag') uuid = request.args.get('uuid') with_errors = request.args.get('with_errors') == "1" if uuid: # Single watch - check if already queued or running if worker_pool.is_watch_running(uuid) or uuid in update_q.get_queued_uuids(): flash(gettext("Watch is already queued or being checked.")) else: worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) flash(gettext("Queued 1 watch for rechecking.")) else: # Multiple watches - first count how many need to be queued watches_to_queue = [] for k in sorted(datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)): watch_uuid = k[0] watch = k[1] if not watch['paused'] and watch_uuid: if with_errors and not watch.get('last_error'): continue if tag != None and tag not in watch['tags']: continue watches_to_queue.append(watch_uuid) # If less than 20 watches, queue synchronously for immediate feedback if len(watches_to_queue) < 20: # Get already queued/running UUIDs once (efficient) queued_uuids = set(update_q.get_queued_uuids()) running_uuids = set(worker_pool.get_running_uuids()) # Filter out watches that are already queued or running watches_to_queue_filtered = [] for watch_uuid in watches_to_queue: if watch_uuid not in queued_uuids and watch_uuid not in running_uuids: watches_to_queue_filtered.append(watch_uuid) # Queue only the filtered watches for watch_uuid in watches_to_queue_filtered: worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})) # Provide feedback about skipped watches skipped_count = len(watches_to_queue) - len(watches_to_queue_filtered) if skipped_count > 0: flash(gettext("Queued {} watches for rechecking ({} already queued or running).").format( len(watches_to_queue_filtered), skipped_count)) else: if len(watches_to_queue_filtered) == 1: flash(gettext("Queued 1 watch for rechecking.")) else: flash(gettext("Queued {} watches for rechecking.").format(len(watches_to_queue_filtered))) else: # 20+ watches - queue in background thread to avoid blocking HTTP response # Capture queued/running state before background thread queued_uuids = set(update_q.get_queued_uuids()) running_uuids = set(worker_pool.get_running_uuids()) def queue_watches_background(): """Background thread to queue watches - discarded after completion.""" try: queued_count = 0 skipped_count = 0 for watch_uuid in watches_to_queue: # Check if already queued or running (state captured at start) if watch_uuid not in queued_uuids and watch_uuid not in running_uuids: worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})) queued_count += 1 else: skipped_count += 1 logger.info(f"Background queueing complete: {queued_count} watches queued, {skipped_count} skipped (already queued/running)") except Exception as e: logger.error(f"Error in background queueing: {e}") # Start background thread and return immediately thread = threading.Thread(target=queue_watches_background, daemon=True, name="QueueWatches-Background") thread.start() # Return immediately with approximate message flash(gettext("Queueing watches for rechecking in background...")) return redirect(url_for('watchlist.index', **({'tag': tag} if tag else {}))) @ui_blueprint.route("/form/checkbox-operations", methods=['POST']) @login_optionally_required def form_watch_list_checkbox_operations(): op = request.form['op'] uuids = [u.strip() for u in request.form.getlist('uuids') if u] extra_data = request.form.get('op_extradata', '').strip() _handle_operations( datastore=datastore, extra_data=extra_data, queuedWatchMetaData=queuedWatchMetaData, uuids=uuids, worker_pool=worker_pool, update_q=update_q, watch_check_update=watch_check_update, op=op, ) return redirect(url_for('watchlist.index')) @ui_blueprint.route("/share-url/", methods=['GET']) @login_optionally_required def form_share_put_watch(uuid): """Given a watch UUID, upload the info and return a share-link the share-link can be imported/added""" import requests import json from copy import deepcopy # copy it to memory as trim off what we dont need (history) watch = deepcopy(datastore.data['watching'].get(uuid)) # For older versions that are not a @property if (watch.get('history')): del (watch['history']) # for safety/privacy for k in list(watch.keys()): if k.startswith('notification_'): del watch[k] for r in['uuid', 'last_checked', 'last_changed']: if watch.get(r): del (watch[r]) # Add the global stuff which may have an impact watch['ignore_text'] += datastore.data['settings']['application']['global_ignore_text'] watch['subtractive_selectors'] += datastore.data['settings']['application']['global_subtractive_selectors'] watch_json = json.dumps(watch) try: r = requests.request(method="POST", data={'watch': watch_json}, url="https://changedetection.io/share/share", headers={'App-Guid': datastore.data['app_guid']}) res = r.json() # Add to the flask session session['share-link'] = f"https://changedetection.io/share/{res['share_key']}" except Exception as e: logger.error(f"Error sharing -{str(e)}") flash(gettext("Could not share, something went wrong while communicating with the share server - {}").format(str(e)), 'error') return redirect(url_for('watchlist.index')) @ui_blueprint.route("/language/auto-detect", methods=['GET']) def delete_locale_language_session_var_if_it_exists(): """Clear the session locale preference to auto-detect from browser Accept-Language header""" if 'locale' in session: session.pop('locale', None) # Refresh Flask-Babel to clear cached locale from flask_babel import refresh refresh() flash(gettext("Language set to auto-detect from browser")) # Check if there's a redirect parameter to return to the same page redirect_url = request.args.get('redirect') # If redirect is provided and safe, use it from changedetectionio.is_safe_url import is_safe_url if redirect_url and is_safe_url(redirect_url, current_app): return redirect(redirect_url) # Otherwise redirect to watchlist return redirect(url_for('watchlist.index')) return ui_blueprint ================================================ FILE: changedetectionio/blueprint/ui/diff.py ================================================ from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory from flask_babel import gettext import re import importlib from loguru import logger from markupsafe import Markup from changedetectionio.diff import ( REMOVED_STYLE, ADDED_STYLE, REMOVED_INNER_STYLE, ADDED_INNER_STYLE, REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED, ADDED_PLACEMARKER_OPEN, ADDED_PLACEMARKER_CLOSED, CHANGED_PLACEMARKER_OPEN, CHANGED_PLACEMARKER_CLOSED, CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED ) from changedetectionio.store import ChangeDetectionStore from changedetectionio.auth_decorator import login_optionally_required def construct_blueprint(datastore: ChangeDetectionStore): diff_blueprint = Blueprint('ui_diff', __name__, template_folder="../ui/templates") @diff_blueprint.app_template_filter('diff_unescape_difference_spans') def diff_unescape_difference_spans(content): """Emulate Jinja2's auto-escape, then selectively unescape our diff spans.""" from markupsafe import escape if not content: return Markup('') # Step 1: Escape everything like Jinja2 would (this makes it XSS-safe) escaped_content = escape(str(content)) # Step 2: Unescape only our exact diff spans generated by apply_html_color_to_body() # Pattern matches the exact structure: # # Unescape outer span opening tags with full attributes (role, aria-label, title) # Matches removed/added/changed/changed_into spans result = re.sub( rf'<span style="({re.escape(REMOVED_STYLE)}|{re.escape(ADDED_STYLE)})" ' rf'role="(deletion|insertion|note)" ' rf'aria-label="([^&]+?)" ' rf'title="([^&]+?)">', r'', str(escaped_content), flags=re.IGNORECASE ) # Unescape inner span opening tags (without additional attributes) # This matches the darker background styles for changed parts within lines result = re.sub( rf'<span style="({re.escape(REMOVED_INNER_STYLE)}|{re.escape(ADDED_INNER_STYLE)})">', r'', result, flags=re.IGNORECASE ) # Unescape closing tags (but only as many as we opened) open_count = result.count('', 1) return Markup(result) @diff_blueprint.route("/diff/", methods=['GET']) @login_optionally_required def diff_history_page(uuid): """ Render the history/diff page for a watch. This route is processor-aware: it delegates rendering to the processor's difference.py module, allowing different processor types to provide custom visualizations: - text_json_diff: Text/HTML diff with syntax highlighting - restock_diff: Could show price charts and stock history - image_diff: Could show image comparison slider/overlay Each processor implements processors/{type}/difference.py::render() If a processor doesn't have a difference module, falls back to text_json_diff. """ if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() try: watch = datastore.data['watching'][uuid] except KeyError: flash(gettext("No history found for the specified link, bad link?"), "error") return redirect(url_for('watchlist.index')) dates = list(watch.history.keys()) if not dates or len(dates) < 2: flash(gettext("Not enough history (2 snapshots required) to show difference page for this watch."), "error") return redirect(url_for('watchlist.index')) # Get the processor type for this watch processor_name = watch.get('processor', 'text_json_diff') # Try to get the processor's difference module (works for both built-in and plugin processors) from changedetectionio.processors import get_processor_submodule processor_module = get_processor_submodule(processor_name, 'difference') # Call the processor's render() function if processor_module and hasattr(processor_module, 'render'): return processor_module.render( watch=watch, datastore=datastore, request=request, url_for=url_for, render_template=render_template, flash=flash, redirect=redirect ) # Fallback: if processor doesn't have difference module, use text_json_diff as default from changedetectionio.processors.text_json_diff.difference import render as default_render return default_render( watch=watch, datastore=datastore, request=request, url_for=url_for, render_template=render_template, flash=flash, redirect=redirect ) @diff_blueprint.route("/diff//extract", methods=['GET']) @login_optionally_required def diff_history_page_extract_GET(uuid): """ Render the data extraction form for a watch. This route is processor-aware: it delegates to the processor's extract.py module, allowing different processor types to provide custom extraction interfaces. Each processor implements processors/{type}/extract.py::render_form() If a processor doesn't have an extract module, falls back to text_json_diff. """ if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() try: watch = datastore.data['watching'][uuid] except KeyError: flash(gettext("No history found for the specified link, bad link?"), "error") return redirect(url_for('watchlist.index')) # Get the processor type for this watch processor_name = watch.get('processor', 'text_json_diff') # Try to get the processor's extract module (works for both built-in and plugin processors) from changedetectionio.processors import get_processor_submodule processor_module = get_processor_submodule(processor_name, 'extract') # Call the processor's render_form() function if processor_module and hasattr(processor_module, 'render_form'): return processor_module.render_form( watch=watch, datastore=datastore, request=request, url_for=url_for, render_template=render_template, flash=flash, redirect=redirect ) # Fallback: if processor doesn't have extract module, use base processors.extract as default from changedetectionio.processors.extract import render_form as default_render_form return default_render_form( watch=watch, datastore=datastore, request=request, url_for=url_for, render_template=render_template, flash=flash, redirect=redirect ) @diff_blueprint.route("/diff//extract", methods=['POST']) @login_optionally_required def diff_history_page_extract_POST(uuid): """ Process the data extraction request. This route is processor-aware: it delegates to the processor's extract.py module, allowing different processor types to provide custom extraction logic. Each processor implements processors/{type}/extract.py::process_extraction() If a processor doesn't have an extract module, falls back to text_json_diff. """ if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() try: watch = datastore.data['watching'][uuid] except KeyError: flash(gettext("No history found for the specified link, bad link?"), "error") return redirect(url_for('watchlist.index')) # Get the processor type for this watch processor_name = watch.get('processor', 'text_json_diff') # Try to get the processor's extract module (works for both built-in and plugin processors) from changedetectionio.processors import get_processor_submodule processor_module = get_processor_submodule(processor_name, 'extract') # Call the processor's process_extraction() function if processor_module and hasattr(processor_module, 'process_extraction'): return processor_module.process_extraction( watch=watch, datastore=datastore, request=request, url_for=url_for, make_response=make_response, send_from_directory=send_from_directory, flash=flash, redirect=redirect ) # Fallback: if processor doesn't have extract module, use base processors.extract as default from changedetectionio.processors.extract import process_extraction as default_process_extraction return default_process_extraction( watch=watch, datastore=datastore, request=request, url_for=url_for, make_response=make_response, send_from_directory=send_from_directory, flash=flash, redirect=redirect ) @diff_blueprint.route("/diff//processor-asset/", methods=['GET']) @login_optionally_required def processor_asset(uuid, asset_name): """ Serve processor-specific binary assets (images, files, etc.). This route is processor-aware: it delegates to the processor's difference.py module, allowing different processor types to serve custom assets without embedding them as base64 in templates. This solves memory issues with large binary data (e.g., screenshots) by streaming them as separate HTTP responses instead of embedding in the HTML template. Each processor implements processors/{type}/difference.py::get_asset() which returns (binary_data, content_type, cache_control_header). Example URLs: - /diff/{uuid}/processor-asset/before - /diff/{uuid}/processor-asset/after - /diff/{uuid}/processor-asset/rendered_diff """ if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() try: watch = datastore.data['watching'][uuid] except KeyError: flash(gettext("No history found for the specified link, bad link?"), "error") return redirect(url_for('watchlist.index')) # Get the processor type for this watch processor_name = watch.get('processor', 'text_json_diff') # Try to get the processor's difference module (works for both built-in and plugin processors) from changedetectionio.processors import get_processor_submodule processor_module = get_processor_submodule(processor_name, 'difference') # Call the processor's get_asset() function if processor_module and hasattr(processor_module, 'get_asset'): result = processor_module.get_asset( asset_name=asset_name, watch=watch, datastore=datastore, request=request ) if result is None: from flask import abort abort(404, description=f"Asset '{asset_name}' not found") binary_data, content_type, cache_control = result response = make_response(binary_data) response.headers['Content-Type'] = content_type if cache_control: response.headers['Cache-Control'] = cache_control return response else: logger.warning(f"Processor {processor_name} does not implement get_asset()") from flask import abort abort(404, description=f"Processor '{processor_name}' does not support assets") return diff_blueprint ================================================ FILE: changedetectionio/blueprint/ui/edit.py ================================================ from copy import deepcopy import os import importlib.resources from flask import Blueprint, request, redirect, url_for, flash, render_template, abort from flask_babel import gettext from loguru import logger from jinja2 import Environment, FileSystemLoader from changedetectionio.store import ChangeDetectionStore from changedetectionio.auth_decorator import login_optionally_required from changedetectionio.time_handler import is_within_schedule from changedetectionio import worker_pool def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData): edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates") def _watch_has_tag_options_set(watch): """This should be fixed better so that Tag is some proper Model, a tag is just a Watch also""" for tag_uuid, tag in datastore.data['settings']['application'].get('tags', {}).items(): if tag_uuid in watch.get('tags', []) and (tag.get('include_filters') or tag.get('subtractive_selectors')): return True @edit_blueprint.route("/edit/", methods=['GET', 'POST']) @login_optionally_required # https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists # https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ? def edit_page(uuid): from changedetectionio import forms from changedetectionio.browser_steps.browser_steps import browser_step_ui_config from changedetectionio import processors import importlib if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() # More for testing, possible to return the first/only if not datastore.data['watching'].keys(): flash(gettext("No watches to edit"), "error") return redirect(url_for('watchlist.index')) if not uuid in datastore.data['watching']: flash(gettext("No watch with the UUID {} found.").format(uuid), "error") return redirect(url_for('watchlist.index')) switch_processor = request.args.get('switch_processor') if switch_processor: for p in processors.available_processors(): if p[0] == switch_processor: datastore.data['watching'][uuid]['processor'] = switch_processor flash(gettext("Switched to mode - {}.").format(p[1])) datastore.clear_watch_history(uuid) redirect(url_for('ui_edit.edit_page', uuid=uuid)) # be sure we update with a copy instead of accidently editing the live object by reference default = None while not default: try: default = deepcopy(datastore.data['watching'][uuid]) except RuntimeError as e: # Dictionary changed continue # Defaults for proxy choice if datastore.proxy_list is not None: # When enabled # @todo # Radio needs '' not None, or incase that the chosen one no longer exists if default['proxy'] is None or not any(default['proxy'] in tup for tup in datastore.proxy_list): default['proxy'] = '' # proxy_override set to the json/text list of the items # Does it use some custom form? does one exist? processor_name = datastore.data['watching'][uuid].get('processor', '') processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None) if not processor_classes: flash(gettext("Could not load '{}' processor, processor plugin might be missing. Please select a different processor.").format(processor_name), 'error') # Fall back to default processor so user can still edit and change processor processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == 'text_json_diff'), None) if not processor_classes: # If even text_json_diff is missing, something is very wrong flash(gettext("Could not load '{}' processor, processor plugin might be missing.").format(processor_name), 'error') return redirect(url_for('watchlist.index')) parent_module = processors.get_parent_module(processor_classes[0]) try: # Get the parent of the "processor.py" go up one, get the form (kinda spaghetti but its reusing existing code) forms_module = importlib.import_module(f"{parent_module.__name__}.forms") # Access the 'processor_settings_form' class from the 'forms' module form_class = getattr(forms_module, 'processor_settings_form') except ModuleNotFoundError as e: # .forms didnt exist form_class = forms.processor_text_json_diff_form except AttributeError as e: # .forms exists but no useful form form_class = forms.processor_text_json_diff_form form = form_class(formdata=request.form if request.method == 'POST' else None, data=default, extra_notification_tokens=default.extra_notification_token_values(), default_system_settings=datastore.data['settings'] ) # For the form widget tag UUID back to "string name" for the field form.tags.datastore = datastore # Used by some forms that need to dig deeper form.datastore = datastore form.watch = default # Load processor-specific config from JSON file for GET requests if request.method == 'GET' and processor_name: try: from changedetectionio.processors.base import difference_detection_processor # Create a processor instance to access config methods processor_instance = difference_detection_processor(datastore, uuid) # Use processor name as filename so each processor keeps its own config config_filename = f'{processor_name}.json' processor_config = processor_instance.get_extra_watch_config(config_filename) if processor_config: from wtforms.fields.form import FormField # Populate processor-config-* fields from JSON for config_key, config_value in processor_config.items(): if not isinstance(config_value, dict): continue # Try exact API-named field first (e.g., processor_config_restock_diff) target_field = getattr(form, f'processor_config_{config_key}', None) # Fallback: find any FormField sub-form whose fields cover config_value keys if target_field is None: for form_field in form: if isinstance(form_field, FormField) and all(k in form_field.form._fields for k in config_value): target_field = form_field break if target_field is not None: for sub_key, sub_value in config_value.items(): sub_field = target_field.form._fields.get(sub_key) if sub_field is not None: sub_field.data = sub_value logger.debug(f"Loaded processor config from {config_filename}: {sub_key} = {sub_value}") except Exception as e: logger.warning(f"Failed to load processor config: {e}") for p in datastore.extra_browsers: form.fetch_backend.choices.append(p) form.fetch_backend.choices.append(("system", 'System settings default')) # form.browser_steps[0] can be assumed that we 'goto url' first if datastore.proxy_list is None: # @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead del form.proxy else: form.proxy.choices = [('', 'Default')] for p in datastore.proxy_list: form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label']))) if request.method == 'POST' and form.validate(): extra_update_obj = { 'consecutive_filter_failures': 0, 'last_error' : False } if request.args.get('unpause_on_save'): extra_update_obj['paused'] = False extra_update_obj['time_between_check'] = form.time_between_check.data # Handle processor-config-* fields separately (save to JSON, not datastore) # IMPORTANT: These must NOT be saved to url-watches.json, only to the processor-specific JSON file processor_config_data = processors.extract_processor_config_from_form_data(form.data) processors.save_processor_config(datastore, uuid, processor_config_data) # Ignore text form_ignore_text = form.ignore_text.data datastore.data['watching'][uuid]['ignore_text'] = form_ignore_text # Be sure proxy value is None if datastore.proxy_list is not None and form.data['proxy'] == '': extra_update_obj['proxy'] = None # Unsetting all filter_text methods should make it go back to default # This particularly affects tests running if 'filter_text_added' in form.data and not form.data.get('filter_text_added') \ and 'filter_text_replaced' in form.data and not form.data.get('filter_text_replaced') \ and 'filter_text_removed' in form.data and not form.data.get('filter_text_removed'): extra_update_obj['filter_text_added'] = True extra_update_obj['filter_text_replaced'] = True extra_update_obj['filter_text_removed'] = True # Because wtforms doesn't support accessing other data in process_ , but we convert the CSV list of tags back to a list of UUIDs tag_uuids = [] if form.data.get('tags'): # Sometimes in testing this can be list, dont know why if type(form.data.get('tags')) == list: extra_update_obj['tags'] = form.data.get('tags') else: for t in form.data.get('tags').split(','): tag_uuids.append(datastore.add_tag(title=t)) extra_update_obj['tags'] = tag_uuids datastore.data['watching'][uuid].update(form.data) datastore.data['watching'][uuid].update(extra_update_obj) if not datastore.data['watching'][uuid].get('tags'): # Force it to be a list, because form.data['tags'] will be string if nothing found # And del(form.data['tags'] ) wont work either for some reason datastore.data['watching'][uuid]['tags'] = [] # Recast it if need be to right data Watch handler watch_class = processors.get_custom_watch_obj_for_processor(form.data.get('processor')) datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, __datastore=datastore.data, default=datastore.data['watching'][uuid]) # Save the watch immediately datastore.data['watching'][uuid].commit() flash(gettext("Updated watch - unpaused!") if request.args.get('unpause_on_save') else gettext("Updated watch.")) # Cleanup any browsersteps session for this watch try: from changedetectionio.blueprint.browser_steps import cleanup_session_for_watch cleanup_session_for_watch(uuid) except Exception as e: logger.debug(f"Error cleaning up browsersteps session: {e}") # Do not queue on edit if its not within the time range # @todo maybe it should never queue anyway on edit... is_in_schedule = True watch = datastore.data['watching'].get(uuid) if watch.get('time_between_check_use_default'): time_schedule_limit = datastore.data['settings']['requests'].get('time_schedule_limit', {}) else: time_schedule_limit = watch.get('time_schedule_limit') tz_name = time_schedule_limit.get('timezone') if not tz_name: tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip()) if time_schedule_limit and time_schedule_limit.get('enabled'): try: is_in_schedule = is_within_schedule(time_schedule_limit=time_schedule_limit, default_tz=tz_name ) except Exception as e: logger.error( f"{uuid} - Recheck scheduler, error handling timezone, check skipped - TZ name '{tz_name}' - {str(e)}") return False ############################# if not datastore.data['watching'][uuid].get('paused') and is_in_schedule: # Queue the watch for immediate recheck, with a higher priority worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) # Diff page [edit] link should go back to diff page if request.args.get("next") and request.args.get("next") == 'diff': return redirect(url_for('ui.ui_diff.diff_history_page', uuid=uuid)) return redirect(url_for('watchlist.index', tag=request.args.get("tag",''))) else: if request.method == 'POST' and not form.validate(): flash(gettext("An error occurred, please see below."), "error") # JQ is difficult to install on windows and must be manually added (outside requirements.txt) jq_support = True try: import jq except ModuleNotFoundError: jq_support = False watch = datastore.data['watching'].get(uuid) from zoneinfo import available_timezones # Import the global plugin system from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras, get_fetcher_capabilities # Get fetcher capabilities instead of hardcoded logic capabilities = get_fetcher_capabilities(watch, datastore) # Add processor capabilities from module capabilities['supports_visual_selector'] = getattr(parent_module, 'supports_visual_selector', False) capabilities['supports_text_filters_and_triggers'] = getattr(parent_module, 'supports_text_filters_and_triggers', False) capabilities['supports_text_filters_and_triggers_elements'] = getattr(parent_module, 'supports_text_filters_and_triggers_elements', False) capabilities['supports_request_type'] = getattr(parent_module, 'supports_request_type', False) app_rss_token = datastore.data['settings']['application'].get('rss_access_token'), c = [f"processor-{watch.get('processor')}"] if worker_pool.is_watch_running(uuid): c.append('checking-now') template_args = { 'available_processors': processors.available_processors(), 'available_timezones': sorted(available_timezones()), 'browser_steps_config': browser_step_ui_config, 'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), 'extra_classes': ' '.join(c), 'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(), 'extra_processor_config': form.extra_tab_content(), 'extra_title': f" - Edit - {watch.label}", 'form': form, 'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False, 'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0, 'has_special_tag_options': _watch_has_tag_options_set(watch=watch), 'jq_support': jq_support, 'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False), 'app_rss_token': app_rss_token, 'rss_uuid_feed' : { 'label': watch.label, 'url': url_for('rss.rss_single_watch', uuid=watch['uuid'], token=app_rss_token) }, 'settings_application': datastore.data['settings']['application'], 'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch), 'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid), 'timezone_default_config': datastore.data['settings']['application'].get('scheduler_timezone_default'), 'using_global_webdriver_wait': not default['webdriver_delay'], 'uuid': uuid, 'watch': watch, 'capabilities': capabilities } included_content = None if form.extra_form_content(): # So that the extra panels can access _helpers.html etc, we set the environment to load from templates/ # And then render the code from the module templates_dir = str(importlib.resources.files("changedetectionio").joinpath('templates')) env = Environment(loader=FileSystemLoader(templates_dir)) template = env.from_string(form.extra_form_content()) included_content = template.render(**template_args) output = render_template("edit.html", extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None, extra_form_content=included_content, **template_args ) return output @edit_blueprint.route("/edit//get-html", methods=['GET']) @login_optionally_required def watch_get_latest_html(uuid): from io import BytesIO from flask import send_file import brotli if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() watch = datastore.data['watching'].get(uuid) if watch and watch.history.keys() and os.path.isdir(watch.data_dir): latest_filename = list(watch.history.keys())[-1] html_fname = os.path.join(watch.data_dir, f"{latest_filename}.html.br") with open(html_fname, 'rb') as f: if html_fname.endswith('.br'): # Read and decompress the Brotli file decompressed_data = brotli.decompress(f.read()) else: decompressed_data = f.read() buffer = BytesIO(decompressed_data) return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html') # Return a 500 error abort(500) @edit_blueprint.route("/edit//get-data-package", methods=['GET']) @login_optionally_required def watch_get_data_package(uuid): """Download all data for a single watch as a zip file""" from io import BytesIO from flask import send_file import zipfile from pathlib import Path import datetime watch = datastore.data['watching'].get(uuid) if not watch: abort(404) # Create zip in memory memory_file = BytesIO() with zipfile.ZipFile(memory_file, 'w', compression=zipfile.ZIP_DEFLATED, compresslevel=8) as zipObj: # Add the watch's JSON file if it exists watch_json_path = os.path.join(watch.data_dir, 'watch.json') if os.path.isfile(watch_json_path): zipObj.write(watch_json_path, arcname=os.path.join(uuid, 'watch.json'), compress_type=zipfile.ZIP_DEFLATED, compresslevel=8) # Add all files in the watch data directory if os.path.isdir(watch.data_dir): for f in Path(watch.data_dir).glob('*'): if f.is_file() and f.name != 'watch.json': # Skip watch.json since we already added it zipObj.write(f, arcname=os.path.join(uuid, f.name), compress_type=zipfile.ZIP_DEFLATED, compresslevel=8) # Seek to beginning of file memory_file.seek(0) # Generate filename with timestamp timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") filename = f"watch-data-{uuid[:8]}-{timestamp}.zip" return send_file(memory_file, as_attachment=True, download_name=filename, mimetype='application/zip') # Ajax callback @edit_blueprint.route("/edit//preview-rendered", methods=['POST']) @login_optionally_required def watch_get_preview_rendered(uuid): '''For when viewing the "preview" of the rendered text from inside of Edit''' from flask import jsonify if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() from changedetectionio.processors.text_json_diff import prepare_filter_prevew result = prepare_filter_prevew(watch_uuid=uuid, form_data=request.form, datastore=datastore) return jsonify(result) @edit_blueprint.route("/highlight_submit_ignore_url", methods=['POST']) @login_optionally_required def highlight_submit_ignore_url(): import re mode = request.form.get('mode') selection = request.form.get('selection') uuid = request.args.get('uuid','') if datastore.data["watching"].get(uuid): if mode == 'exact': for l in selection.splitlines(): datastore.data["watching"][uuid]['ignore_text'].append(l.strip()) elif mode == 'digit-regex': for l in selection.splitlines(): # Replace any series of numbers with a regex s = re.escape(l.strip()) s = re.sub(r'[0-9]+', r'\\d+', s) datastore.data["watching"][uuid]['ignore_text'].append('/' + s + '/') # Save the updated ignore_text datastore.data["watching"][uuid].commit() return f"Click to preview" return edit_blueprint ================================================ FILE: changedetectionio/blueprint/ui/notification.py ================================================ from flask import Blueprint, request, make_response import random from loguru import logger from changedetectionio.store import ChangeDetectionStore from changedetectionio.auth_decorator import login_optionally_required def construct_blueprint(datastore: ChangeDetectionStore): notification_blueprint = Blueprint('ui_notification', __name__, template_folder="../ui/templates") # AJAX endpoint for sending a test @notification_blueprint.route("/notification/send-test/", methods=['POST']) @notification_blueprint.route("/notification/send-test", methods=['POST']) @notification_blueprint.route("/notification/send-test/", methods=['POST']) @login_optionally_required def ajax_callback_send_notification_test(watch_uuid=None): from changedetectionio.notification_service import NotificationContextData, set_basic_notification_vars # Watch_uuid could be unset in the case it`s used in tag editor, global settings import apprise from changedetectionio.notification.handler import process_notification from changedetectionio.notification.apprise_plugin.assets import apprise_asset from changedetectionio.jinja2_custom import render as jinja_render from changedetectionio.notification.apprise_plugin.custom_handlers import apprise_http_custom_handler apobj = apprise.Apprise(asset=apprise_asset) is_global_settings_form = request.args.get('mode', '') == 'global-settings' is_group_settings_form = request.args.get('mode', '') == 'group-settings' # Use an existing random one on the global/main settings form if not watch_uuid and (is_global_settings_form or is_group_settings_form) \ and datastore.data.get('watching'): logger.debug(f"Send test notification - Choosing random Watch {watch_uuid}") watch_uuid = random.choice(list(datastore.data['watching'].keys())) if not watch_uuid: return make_response("Error: You must have atleast one watch configured for 'test notification' to work", 400) watch = datastore.data['watching'].get(watch_uuid) notification_urls = request.form.get('notification_urls','').strip().splitlines() if not notification_urls: logger.debug("Test notification - Trying by group/tag in the edit form if available") # On an edit page, we should also fire off to the tags if they have notifications if request.form.get('tags') and request.form['tags'].strip(): for k in request.form['tags'].split(','): tag = datastore.tag_exists_by_name(k.strip()) notification_urls = tag.get('notifications_urls') if tag and tag.get('notifications_urls') else None if not notification_urls and not is_global_settings_form and not is_group_settings_form: # In the global settings, use only what is typed currently in the text box logger.debug("Test notification - Trying by global system settings notifications") if datastore.data['settings']['application'].get('notification_urls'): notification_urls = datastore.data['settings']['application']['notification_urls'] if not notification_urls: return 'Error: No Notification URLs set/found' for n_url in notification_urls: # We are ONLY validating the apprise:// part here, convert all tags to something so as not to break apprise URLs generic_notification_context_data = NotificationContextData() generic_notification_context_data.set_random_for_validation() n_url = jinja_render(template_str=n_url, **generic_notification_context_data).strip() if len(n_url.strip()): if not apobj.add(n_url): return f'Error: {n_url} is not a valid AppRise URL.' try: # use the same as when it is triggered, but then override it with the form test values n_object = NotificationContextData({ 'watch_url': request.form.get('window_url', "https://changedetection.io"), 'notification_urls': notification_urls }) # Only use if present, if not set in n_object it should use the default system value if 'notification_format' in request.form and request.form['notification_format'].strip(): n_object['notification_format'] = request.form.get('notification_format', '').strip() else: n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format') if 'notification_title' in request.form and request.form['notification_title'].strip(): n_object['notification_title'] = request.form.get('notification_title', '').strip() elif datastore.data['settings']['application'].get('notification_title'): n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title') else: n_object['notification_title'] = "Test title" if 'notification_body' in request.form and request.form['notification_body'].strip(): n_object['notification_body'] = request.form.get('notification_body', '').strip() elif datastore.data['settings']['application'].get('notification_body'): n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body') else: n_object['notification_body'] = "Test body" n_object['as_async'] = False # Same like in notification service, should be refactored dates = list(watch.history.keys()) trigger_text = '' snapshot_contents = '' # Could be called as a 'test notification' with only 1 snapshot available prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n" current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples" if len(dates) > 1: prev_snapshot = watch.get_history_snapshot(timestamp=dates[-2]) current_snapshot = watch.get_history_snapshot(timestamp=dates[-1]) n_object.update(set_basic_notification_vars(current_snapshot=current_snapshot, prev_snapshot=prev_snapshot, watch=watch, triggered_text=trigger_text, timestamp_changed=dates[-1] if dates else None)) sent_obj = process_notification(n_object, datastore) except Exception as e: logger.error(e) e_str = str(e) # Remove this text which is not important and floods the container e_str = e_str.replace( "DEBUG - .CustomNotifyPluginWrapper'>", '') return make_response(e_str, 400) return 'OK - Sent test notifications' return notification_blueprint ================================================ FILE: changedetectionio/blueprint/ui/preview.py ================================================ from flask import Blueprint, request, url_for, flash, render_template, redirect from flask_babel import gettext import time from loguru import logger from changedetectionio.store import ChangeDetectionStore from changedetectionio.auth_decorator import login_optionally_required from changedetectionio import html_tools def construct_blueprint(datastore: ChangeDetectionStore): preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates") @preview_blueprint.route("/preview/", methods=['GET', 'POST']) @login_optionally_required def preview_page(uuid): """ Render the preview page for a watch. This route is processor-aware: it delegates rendering to the processor's preview.py module, allowing different processor types to provide custom visualizations: - text_json_diff: Text preview with syntax highlighting - image_ssim_diff: Image preview with proper rendering - restock_diff: Could show latest price/stock data Each processor implements processors/{type}/preview.py::render() If a processor doesn't have a preview module, falls back to default text preview. """ if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() try: watch = datastore.data['watching'][uuid] except KeyError: flash(gettext("No history found for the specified link, bad link?"), "error") return redirect(url_for('watchlist.index')) # Get the processor type for this watch processor_name = watch.get('processor', 'text_json_diff') # Try to get the processor's preview module (works for both built-in and plugin processors) from changedetectionio.processors import get_processor_submodule processor_module = get_processor_submodule(processor_name, 'preview') # Call the processor's render() function if processor_module and hasattr(processor_module, 'render'): return processor_module.render( watch=watch, datastore=datastore, request=request, url_for=url_for, render_template=render_template, flash=flash, redirect=redirect ) # Fallback: if processor doesn't have preview module, use default text preview content = [] versions = [] timestamp = None extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')] is_html_webdriver = watch.fetcher_supports_screenshots triggered_line_numbers = [] ignored_line_numbers = [] blocked_line_numbers = [] if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()): flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error") else: # So prepare the latest preview or not preferred_version = request.values.get('version') if request.method == 'POST' else request.args.get('version') versions = list(watch.history.keys()) timestamp = versions[-1] if preferred_version and preferred_version in versions: timestamp = preferred_version try: versions = list(watch.history.keys()) content = watch.get_history_snapshot(timestamp=timestamp) triggered_line_numbers = html_tools.strip_ignore_text(content=content, wordlist=watch.get('trigger_text'), mode='line numbers' ) ignored_line_numbers = html_tools.strip_ignore_text(content=content, wordlist=watch.get('ignore_text'), mode='line numbers' ) blocked_line_numbers = html_tools.strip_ignore_text(content=content, wordlist=watch.get("text_should_not_be_present"), mode='line numbers' ) except Exception as e: content.append({'line': f"File doesnt exist or unable to read timestamp {timestamp}", 'classes': ''}) from changedetectionio.pluggy_interface import get_fetcher_capabilities capabilities = get_fetcher_capabilities(watch, datastore) output = render_template("preview.html", capabilities=capabilities, content=content, current_diff_url=watch['url'], current_version=timestamp, extra_stylesheets=extra_stylesheets, extra_title=f" - Diff - {watch.label} @ {timestamp}", highlight_ignored_line_numbers=ignored_line_numbers, highlight_triggered_line_numbers=triggered_line_numbers, highlight_blocked_line_numbers=blocked_line_numbers, history_n=watch.history_n, is_html_webdriver=is_html_webdriver, last_error=watch['last_error'], last_error_screenshot=watch.get_error_snapshot(), last_error_text=watch.get_error_text(), screenshot=watch.get_screenshot(), uuid=uuid, versions=versions, watch=watch, ) return output @preview_blueprint.route("/preview//processor-asset/", methods=['GET']) @login_optionally_required def processor_asset(uuid, asset_name): """ Serve processor-specific binary assets for preview (images, files, etc.). This route is processor-aware: it delegates to the processor's preview.py module, allowing different processor types to serve custom assets without embedding them as base64 in templates. This solves memory issues with large binary data by streaming them as separate HTTP responses instead of embedding in the HTML template. Each processor implements processors/{type}/preview.py::get_asset() which returns (binary_data, content_type, cache_control_header). Example URLs: - /preview/{uuid}/processor-asset/screenshot?version=123456789 """ from flask import make_response if uuid == 'first': uuid = list(datastore.data['watching'].keys()).pop() try: watch = datastore.data['watching'][uuid] except KeyError: flash(gettext("No history found for the specified link, bad link?"), "error") return redirect(url_for('watchlist.index')) # Get the processor type for this watch processor_name = watch.get('processor', 'text_json_diff') # Try to get the processor's preview module (works for both built-in and plugin processors) from changedetectionio.processors import get_processor_submodule processor_module = get_processor_submodule(processor_name, 'preview') # Call the processor's get_asset() function if processor_module and hasattr(processor_module, 'get_asset'): result = processor_module.get_asset( asset_name=asset_name, watch=watch, datastore=datastore, request=request ) if result is None: from flask import abort abort(404, description=f"Asset '{asset_name}' not found") binary_data, content_type, cache_control = result response = make_response(binary_data) response.headers['Content-Type'] = content_type if cache_control: response.headers['Cache-Control'] = cache_control return response else: logger.warning(f"Processor {processor_name} does not implement get_asset()") from flask import abort abort(404, description=f"Processor '{processor_name}' does not support assets") return preview_blueprint ================================================ FILE: changedetectionio/blueprint/ui/templates/clear_all_history.html ================================================ {% extends 'base.html' %} {% block content %}
{{ _('This will remove version history (snapshots) for ALL watches, but keep your list of URLs!') }}
{{ _('You may like to use the') }} {{ _('BACKUP') }} {{ _('link first.') }}

{{ _('Type in the word') }} {{ _('clear') }} {{ _('to confirm that you understand.') }}


{% endblock %} ================================================ FILE: changedetectionio/blueprint/ui/templates/diff-offscreen-options.html ================================================ ================================================ FILE: changedetectionio/blueprint/ui/templates/diff.html ================================================ {% extends 'base.html' %} {% from '_helpers.html' import render_field, render_checkbox_field, render_button %} {% block content %}
{% if versions|length >= 1 %} {##} {% endif %}
{%- if versions|length >= 2 -%} {%- endif -%}
{{watch_a.error_text_ctime|format_seconds_ago}} {{ _('seconds ago.') }}
            {{ last_error_text }}
        
{{watch_a.snapshot_error_screenshot_ctime|format_seconds_ago}} {{ _('seconds ago') }}
{{ _('Current error-ing screenshot from most recent request') }}
{%- if (content | default('')).split('\n') | length > 100 -%}
{%- for cell in diff_cell_grid -%}
{%- endfor -%}
{%- endif -%} {%- if password_enabled_and_share_is_off -%}
{{ _('Pro-tip: You can enable') }} {{ _('"share access when password is enabled"') }} {{ _('from settings.') }}
{%- endif -%}
{{ from_version|format_timestamp_timeago }} {%- if note -%}{{ note }}{%- endif -%} {{ _('Goto single snapshot') }}
{{ content| diff_unescape_difference_spans }}
{{ _('Tip:') }} {{ _('Highlight text to share or add to ignore lists.') }}
{{ _('For now, Differences are performed on text, not graphically, only the latest screenshot is available.') }}
{% if is_html_webdriver %} {% if screenshot %}
{{watch_a.snapshot_screenshot_ctime|format_timestamp_timeago}}
{{ _('Current screenshot from most recent request') }} {% else %} {{ _('No screenshot available just yet! Try rechecking the page.') }} {% endif %} {% else %} {{ _('Screenshot requires Playwright/WebDriver enabled') }} {% endif %}
{% endblock %} ================================================ FILE: changedetectionio/blueprint/ui/templates/edit.html ================================================ {% extends 'base.html' %} {% block content %} {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, highlight_trigger_ignored_explainer, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %} {% from '_common_fields.html' import render_common_settings_form %} {% if playwright_enabled %} {% endif %} {% set has_tag_filters_extra="WARNING: Watch has tag/groups set with special filters\n" if has_special_tag_options else '' %}
{{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
{{ _('Some sites use JavaScript to create the content, for this you should') }} {{ _('use the Chrome/WebDriver Fetcher') }}
{{ _('Variables are supported in the URL') }} ({{ _('help and examples here') }}).
{{ render_field(form.tags) }} {{ _('Organisational tag/group name used in the main listing page') }}
{{ render_field(form.processor) }}
{{ render_field(form.title, class="m-d", placeholder=watch.label) }} {{ _('Automatically uses the page title if found, you can also use your own title/description here') }}
{{ render_checkbox_field(form.time_between_check_use_default, class="use-default-timecheck") }}
{{ render_field(form.time_between_check, class="time-check-widget") }} {{ _('The interval/amount of time between each check.') }}
{{ render_time_schedule_form(form, available_timezones, timezone_default_config) }}

{{ render_checkbox_field(form.filter_failure_notification_send) }} {{ _('Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.') }}
{{ render_field(form.history_snapshot_max_length, class="history_snapshot_max_length") }} {{ _('Limit collection of history snapshots for each watch to this number of history items.') }}
{{ _('Set to empty to use system settings default') }}
{{ render_ternary_field(form.use_page_title_in_list) }}
{% if capabilities.supports_request_type %}
{{ render_field(form.fetch_backend, class="fetch-backend") }}

{{ _('Use the') }} {{ _('Basic') }} {{ _('method (default) where your watched site doesn\'t need Javascript to render.') }}

{{ _('The') }} {{ _('Chrome/Javascript') }} {{ _('method requires a network connection to a running WebDriver+Chrome server, set by the ENV var \'WEBDRIVER_URL\'.') }}

{{ _('Tip:') }} {{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}
{% if form.proxy %}
{{ form.proxy.label }} {{ _('Check/Scan all') }}
{{ form.proxy(class="fetch-backend-proxy") }}
{{ _('Choose a proxy for this watch') }}
{% endif %}
{{ render_field(form.webdriver_delay) }}
{{ _('If you\'re having trouble waiting for the page to be fully rendered (text missing etc), try increasing the \'wait\' time here.') }}
{{ _('This will wait') }} n {{ _('seconds before extracting the text.') }} {% if using_global_webdriver_wait %}
{{ _('Using the current global default settings') }} {% endif %}
{% endif %}
{% if capabilities.supports_browser_steps %} {% if true %}

{{ _('Click here to Start') }}


{{ _('Please allow 10-15 seconds for the browser to connect.') }}
{{ _('Press "Play" to start.') }} (?) {{ render_field(form.browser_steps) }}
{% else %} {{ _('Visual Selector data is not ready, watch needs to be checked atleast once.') }} {% endif %} {% else %}

{{ _('Sorry, this functionality only works with fetchers that support interactive Javascript (so far only Playwright based fetchers)') }}
{{ _('You need to') }} {{ _('Set the fetch method') }} {{ _('to one that supports interactive Javascript.') }}

{% endif %}
{{ render_ternary_field(form.notification_muted, BooleanField=true) }}
{% if capabilities.supports_screenshots %}
{{ render_checkbox_field(form.notification_screenshot) }} {{ _('Use with caution!') }} {{ _('This will easily fill up your email storage quota or flood other storages.') }}
{% endif %}
{% if has_default_notification_urls %}
{{ _('Look out!') }} {{ _('There are') }} {{ _('system-wide notification URLs enabled') }}, {{ _('this form will override notification settings for this watch only') }} ‐ {{ _('an empty Notification URL list here will still send notifications.') }}
{% endif %} {{ _('Use system defaults') }} {{ render_common_settings_form(form, emailprefix, settings_application, extra_notification_token_placeholder_info) }}
{% if capabilities.supports_text_filters_and_triggers %}
{{ render_field(form.conditions_match_logic) }} {{ render_conditions_fieldlist_of_formfields_as_table(form.conditions) }}

{{ _('Use the verify (✓) button to test if a condition passes against the current snapshot.') }}

{{ _('Read a quick tutorial about') }} {{ _('using conditional web page changes here') }}.
{{ _('Activate preview') }}
{% if capabilities.supports_text_filters_and_triggers_elements %}
{{ _('Pro-tips:') }}
{% include "edit/include_subtract.html" %} {% endif %}

{{ _('Text filtering') }}

{{ _('Limit trigger/ignore/block/extract to;') }}
{{ render_checkbox_field(form.filter_text_added) }} {{ render_checkbox_field(form.filter_text_replaced) }} {{ render_checkbox_field(form.filter_text_removed) }} {{ _('Note: Depending on the length and similarity of the text on each line, the algorithm may consider an') }} {{ _('addition') }} {{ _('instead of') }} {{ _('replacement') }} {{ _('for example.') }}
 {{ _('So it\'s always better to select') }} {{ _('Added') }}+{{ _('Replaced') }} {{ _('when you\'re interested in new content.') }}
 {{ _('When content is merely moved in a list, it will also trigger an') }} {{ _('addition') }}, {{ _('consider enabling') }} {{ _('Only trigger when unique lines appear') }}
{{ render_checkbox_field(form.check_unique_lines) }} {{ _('Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.') }}
{{ render_checkbox_field(form.remove_duplicate_lines) }} {{ _('Remove duplicate lines of text') }}
{{ render_checkbox_field(form.sort_text_alphabetically) }} {{ _('Helps reduce changes detected caused by sites shuffling lines around, combine with') }} {{ _('check unique lines') }} {{ _('below.') }}
{{ render_checkbox_field(form.trim_text_whitespace) }} {{ _('Remove any whitespace before and after each line of text') }}
{% include "edit/text-options.html" %}
{% endif %} {# rendered sub Template #} {% if extra_form_content %}
{{ extra_form_content|safe }}
{% endif %} {% if capabilities.supports_visual_selector %}
{% if capabilities.supports_screenshots and capabilities.supports_xpath_element_data %} {% if visual_selector_data_ready %} {{ _('The Visual Selector tool lets you select the') }} {{ _('text') }} {{ _('elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the') }} {{ _('Filters & Triggers') }} {{ _('tab. Use') }} {{ _('Shift+Click') }} {{ _('to select multiple items.') }} {% if watch['processor'] == 'image_ssim_diff' %} {# @todo, integrate with image_ssim_diff selector better, use some extra form ? #}
{{ render_field(form.processor_config_bounding_box) }} {{ render_field(form.processor_config_selection_mode) }}
{% endif %}
{{ _('Clear selection') }} {{ _('One moment, fetching screenshot and element information..') }}
{{ _('Currently:') }} {{ _('Loading...') }}
{% else %} {{ _('Visual Selector data is not ready, watch needs to be checked atleast once.') }} {% endif %} {% else %}

{{ _('Sorry, this functionality only works with fetchers that support Javascript and screenshots (such as playwright etc).') }}
{{ _('You need to') }} {{ _('Set the fetch method') }} {{ _('to one that supports Javascript and screenshots.') }}

{% endif %}
{% endif %}
{{ _('Check count') }} {{ "{:,}".format( watch.check_count) }}
{{ _('Consecutive filter failures') }} {{ "{:,}".format( watch.consecutive_filter_failures) }}
{{ _('History length') }} {{ "{:,}".format(watch.history|length) }}
{{ _('Last fetch duration') }} {{ watch.fetch_time }}s
{{ _('Notification alert count') }} {{ watch.notification_alert_count }}
{{ _('Server type reply') }} {{ watch.get('remote_server_reply') }}
{% if ui_edit_stats_extras %}
{{ ui_edit_stats_extras|safe }}
{% endif %} {% if watch.history_n %}

{{ _('Download latest HTML snapshot') }} {{ _('Download watch data package') }}

{% endif %}
{{ render_button(form.save_button) }} {{ _('Delete') }} {% if watch.history_n %}{{ _('Clear History') }}{% endif %} {{ _('Clone & Edit') }} {{ _('RSS Feed for this watch') }}
{% endblock %} ================================================ FILE: changedetectionio/blueprint/ui/templates/preview.html ================================================ {% extends 'base.html' %} {% from '_helpers.html' import highlight_trigger_ignored_explainer %} {% block content %} {% if versions|length >= 2 %}

{{ _('Keyboard:') }} ← {{ _('Previous') }}   → {{ _('Next') }}
{% endif %}
{{ watch.error_text_ctime|format_seconds_ago }} {{ _('seconds ago') }}
            {{ last_error_text }}
        
{{ watch.snapshot_error_screenshot_ctime|format_seconds_ago }} {{ _('seconds ago') }}
{{ _('Current erroring screenshot from most recent request') }}
{{ highlight_trigger_ignored_explainer() }}
{{ current_version|format_timestamp_timeago }}
{{ content| diff_unescape_difference_spans }}
{{ _('For now, Differences are performed on text, not graphically, only the latest screenshot is available.') }}

{% if capabilities.supports_screenshots %} {% if screenshot %}
{{ watch.snapshot_screenshot_ctime|format_timestamp_timeago }}
{{ _('Current screenshot from most recent request') }} {% else %} {{ _('No screenshot available just yet! Try rechecking the page.') }} {% endif %} {% else %} {{ _('Screenshot requires a Content Fetcher ( Sockpuppetbrowser, selenium, etc ) that supports screenshots.') }} {% endif %}
{% endblock %} ================================================ FILE: changedetectionio/blueprint/ui/views.py ================================================ from flask import Blueprint, request, redirect, url_for, flash from flask_babel import gettext from changedetectionio.store import ChangeDetectionStore from changedetectionio.auth_decorator import login_optionally_required from changedetectionio import worker_pool def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData, watch_check_update): views_blueprint = Blueprint('ui_views', __name__, template_folder="../ui/templates") @views_blueprint.route("/form/add/quickwatch", methods=['POST']) @login_optionally_required def form_quick_watch_add(): from changedetectionio import forms form = forms.quickWatchForm(request.form) if not form.validate(): for widget, l in form.errors.items(): flash(','.join(l), 'error') return redirect(url_for('watchlist.index')) url = request.form.get('url').strip() if datastore.url_exists(url): flash(gettext('Warning, URL {} already exists').format(url), "notice") add_paused = request.form.get('edit_and_watch_submit_button') != None from changedetectionio import processors processor = request.form.get('processor', processors.get_default_processor()) new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags','').strip(), extras={'paused': add_paused, 'processor': processor}) if new_uuid: if add_paused: flash(gettext('Watch added in Paused state, saving will unpause.')) return redirect(url_for('ui.ui_edit.edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag'))) else: # Straight into the queue. worker_pool.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid})) flash(gettext("Watch added.")) return redirect(url_for('watchlist.index', tag=request.args.get('tag',''))) return views_blueprint ================================================ FILE: changedetectionio/blueprint/watchlist/__init__.py ================================================ import os import time from flask import Blueprint, request, make_response, render_template, redirect, url_for, flash, session from flask_paginate import Pagination, get_page_parameter from flask_babel import gettext as _ from changedetectionio import forms from changedetectionio import processors from changedetectionio.store import ChangeDetectionStore from changedetectionio.auth_decorator import login_optionally_required def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData): watchlist_blueprint = Blueprint('watchlist', __name__, template_folder="templates") @watchlist_blueprint.route("/", methods=['GET']) @login_optionally_required def index(): active_tag_req = request.args.get('tag', '').lower().strip() active_tag_uuid = active_tag = None # Be sure limit_tag is a uuid if active_tag_req: for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items(): if active_tag_req == tag.get('title', '').lower().strip() or active_tag_req == uuid: active_tag = tag active_tag_uuid = uuid break # Redirect for the old rss path which used the /?rss=true if request.args.get('rss'): return redirect(url_for('rss.feed', tag=active_tag_uuid)) op = request.args.get('op') if op: uuid = request.args.get('uuid') if op == 'pause': datastore.data['watching'][uuid].toggle_pause() elif op == 'mute': datastore.data['watching'][uuid].toggle_mute() datastore.data['watching'][uuid].commit() return redirect(url_for('watchlist.index', tag = active_tag_uuid)) # Sort by last_changed and add the uuid which is usually the key.. sorted_watches = [] with_errors = request.args.get('with_errors') == "1" unread_only = request.args.get('unread') == "1" errored_count = 0 search_q = request.args.get('q').strip().lower() if request.args.get('q') else False for uuid, watch in datastore.data['watching'].items(): if with_errors and not watch.get('last_error'): continue if unread_only and (watch.viewed or watch.last_changed == 0) : continue if active_tag_uuid and not active_tag_uuid in watch['tags']: continue if watch.get('last_error'): errored_count += 1 if search_q: if (watch.get('title') and search_q in watch.get('title').lower()) or search_q in watch.get('url', '').lower(): sorted_watches.append(watch) elif watch.get('last_error') and search_q in watch.get('last_error').lower(): sorted_watches.append(watch) else: sorted_watches.append(watch) form = forms.quickWatchForm(request.form) page = request.args.get(get_page_parameter(), type=int, default=1) total_count = len(sorted_watches) pagination = Pagination(page=page, total=total_count, per_page=datastore.data['settings']['application'].get('pager_size', 50), css_framework="semantic", display_msg=_('displaying {start} - {end} {record_name} in total {total}'), record_name=_('records')) sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title']) proxy_list = datastore.proxy_list output = render_template( "watch-overview.html", active_tag=active_tag, active_tag_uuid=active_tag_uuid, app_rss_token=datastore.data['settings']['application'].get('rss_access_token'), datastore=datastore, errored_count=errored_count, extra_classes='has-queue' if not update_q.empty() else '', form=form, generate_tag_colors=processors.generate_processor_badge_colors, guid=datastore.data['app_guid'], has_proxies=proxy_list, hosted_sticky=os.getenv("SALTED_PASS", False) == False, now_time_server=round(time.time()), pagination=pagination, processor_badge_css=processors.get_processor_badge_css(), processor_badge_texts=processors.get_processor_badge_texts(), processor_descriptions=processors.get_processor_descriptions(), queue_size=update_q.qsize(), queued_uuids=update_q.get_queued_uuids(), search_q=request.args.get('q', '').strip(), sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'), sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'), system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'), tags=sorted_tags, unread_changes_count=datastore.unread_changes_count, watches=sorted_watches ) # Return freed template-building memory to the OS immediately. # render_template allocates ~20MB of intermediate strings that are freed on return, # but glibc keeps those pages mapped in its arenas as RSS. malloc_trim() forces # glibc to release them, preventing RSS growth from concurrent Chrome connections. try: import ctypes ctypes.CDLL('libc.so.6').malloc_trim(0) except Exception: pass if session.get('share-link'): del (session['share-link']) resp = make_response(output) # The template can run on cookie or url query info if request.args.get('sort'): resp.set_cookie('sort', request.args.get('sort')) if request.args.get('order'): resp.set_cookie('order', request.args.get('order')) return resp return watchlist_blueprint ================================================ FILE: changedetectionio/blueprint/watchlist/templates/watch-overview.html ================================================ {%- extends 'base.html' -%} {%- block content -%} {%- set tips = [ _("Changedetection.io can monitor more than just web-pages! See our plugins!") ~ ' ' ~ _('More info') ~ '', _("You can also add 'shared' watches.") ~ ' ' ~ _('More info') ~ '' ] -%} {%- from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title -%}
{{ _('Add a new web page change detection watch') }}
{{ render_nolabel_field(form.url, placeholder="https://...", required=true) }} {{ render_nolabel_field(form.watch_submit_button, title=_("Watch this URL!") ) }} {{ render_nolabel_field(form.edit_and_watch_submit_button, title=_("Edit first then Watch") ) }}
{{ render_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder=_("Watch group / tag"), class="transparent-field") }}
{{ render_simple_field(form.processor) }}
Tip: {{ tips | random | safe }}
{%- if watches|length >= pagination.per_page -%}{{ pagination.info }}{%- endif -%}
{{ _('Queued size') }}: {{ queue_size }}
{%- if search_q -%}
{{ _('Searching') }} "{{search_q}}"
{%- endif -%}
{{ _('All') }} {%- for uuid, tag in tags -%} {%- if tag != "" -%} {{ tag.title }} {%- endif -%} {%- endfor -%}
{%- set sort_order = sort_order or 'asc' -%} {%- set sort_attribute = sort_attribute or 'last_changed' -%} {%- set pagination_page = request.args.get('page', 0) -%} {%- set cols_required = 6 -%} {%- set any_has_restock_price_processor = datastore.any_watches_have_processor_by_name("restock_diff") -%} {%- if any_has_restock_price_processor -%} {%- set cols_required = cols_required + 1 -%} {%- endif -%} {%- set ui_settings = datastore.data['settings']['application']['ui'] -%} {%- set wrapper_classes = [ 'has-unread-changes' if unread_changes_count else '', 'has-error' if errored_count else '', ] -%}
{%- set table_classes = [ 'favicon-enabled' if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] else 'favicon-not-enabled', ] -%} {%- set link_order = "desc" if sort_order == 'asc' else "asc" -%} {%- set arrow_span = "" -%} {%- if any_has_restock_price_processor -%} {%- endif -%} {%- if not watches|length -%} {%- endif -%} {%- for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) -%} {%- set checking_now = is_checking_now(watch) -%} {%- set history_n = watch.history_n -%} {%- set favicon = watch.get_favicon_filename() -%} {%- set error_texts = watch.compile_error_texts(has_proxies=has_proxies) -%} {%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%} {# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #} {%- set row_classes = [ loop.cycle('pure-table-odd', 'pure-table-even'), 'processor-' ~ watch['processor'], 'has-error' if error_texts|length > 2 else '', 'paused' if watch.paused is defined and watch.paused != False else '', 'unviewed' if watch.has_unviewed else '', 'has-restock-info' if watch.has_restock_info else 'no-restock-info', 'has-favicon' if favicon else '', 'in-stock' if watch.has_restock_info and watch['restock']['in_stock'] else '', 'not-in-stock' if watch.has_restock_info and not watch['restock']['in_stock'] else '', 'queued' if watch.uuid in queued_uuids else '', 'checking-now' if checking_now else '', 'notification_muted' if watch.notification_muted else '', 'single-history' if history_n == 1 else '', 'multiple-history' if history_n >= 2 else '', 'use-html-title' if system_use_url_watchlist else 'no-html-title', ] -%} {%- if any_has_restock_price_processor -%} {%- endif -%} {#last_checked becomes fetch-start-time#} {%- endfor -%}
#   {{ _('Website') }} {{ _('Restock & Price') }}{{ _('Last') }} {{ _('Checked') }} {{ _('Last') }} {{ _('Changed') }}
{{ _('No web page change detection watches configured, please add a URL in the box above, or') }} {{ _('import a list') }}.
{{ loop.index+pagination.skip }}
Pause checks Mute notification
{% if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] %}
{# Intersection Observer lazy loading: store real URL in data-src, load only when visible in viewport #} Favicon thumbnail
{% endif %}
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%} {{ processor_badge_texts[watch['processor']] }} {%- endif -%} {% if system_use_url_watchlist or watch.get('use_page_title_in_list') %} {{ watch.label }} {% else %} {{ watch.get('title') or watch.link }} {% endif %}   {%- if watch['processor'] == 'text_json_diff' -%} {%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
Switch to Restock & Price watch mode? Yes No
{%- endif -%} {%- endif -%} {%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%} {{ watch_tag.title }} {%- endfor -%}
{%- set effective_fetcher = watch.get_fetch_backend if watch.get_fetch_backend != "system" else system_default_fetcher -%} {%- if effective_fetcher and ("html_webdriver" in effective_fetcher or "html_" in effective_fetcher or "extra_browser_" in effective_fetcher) -%} {{ effective_fetcher|fetcher_status_icons }} {%- endif -%} {%- if watch.is_pdf -%}Converting PDF to text{%- endif -%} {%- if watch.has_browser_steps -%}Browser Steps is enabled{%- endif -%}
{%- if watch['processor'] == 'restock_diff' -%} {%- if watch.has_restock_info -%} {%- if watch['restock']['in_stock']-%} {{ _('In stock') }} {%- else-%} {{ _('Not in stock') }} {%- endif -%} {%- endif -%} {%- if watch.get('restock') and watch['restock'].get('price') -%} {%- set restock = watch['restock'] -%} {%- set price = restock.get('price') -%} {%- set cur = restock.get('currency','') -%} {%- if price is not none and (price|string)|regex_search('\d') -%} {# @todo: make parse_currency/parse_decimal aware of the locale of the actual web page and use that instead changedetectionio/processors/restock_diff/__init__.py #} {%- if price is number -%}{# It's a number so we can convert it to their locale' #} {{ price|format_number_locale }} {{ cur }} {%- else -%}{# It's totally fine if it arrives as something else, the website might be something weird in this field #} {{ price }} {{ cur }} {%- endif -%} {%- endif -%} {%- elif not watch.has_restock_info -%} {{ _('No information') }} {%- endif -%} {%- endif -%} {{watch|format_last_checked_time|safe}} {%- if watch.history_n >=2 and watch.last_changed >0 -%} {{watch.last_changed|format_timestamp_timeago}} {%- else -%} {{ _('Not yet') }} {%- endif -%}
{%- set target_attr = ' target="' ~ watch.uuid ~ '"' if datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') else '' -%} {{ _('Recheck') }} {{ _('Edit') }}
{{ pagination.links }}
{%- endblock -%} ================================================ FILE: changedetectionio/browser_steps/__init__.py ================================================ ================================================ FILE: changedetectionio/browser_steps/browser_steps.py ================================================ import os import time import re from random import randint from loguru import logger from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT from changedetectionio.content_fetchers.base import manage_user_agent from changedetectionio.jinja2_custom import render as jinja_render def browser_steps_get_valid_steps(browser_steps: list): if browser_steps is not None and len(browser_steps): valid_steps = list(filter( lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one'),browser_steps)) # Just incase they selected Goto site by accident with older JS if valid_steps and valid_steps[0]['operation'] == 'Goto site': del(valid_steps[0]) return valid_steps return [] # Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end # 0- off, 1- on browser_step_ui_config = {'Choose one': '0 0', # 'Check checkbox': '1 0', # 'Click button containing text': '0 1', # 'Scroll to bottom': '0 0', # 'Scroll to element': '1 0', # 'Scroll to top': '0 0', # 'Switch to iFrame by index number': '0 1' # 'Uncheck checkbox': '1 0', # @todo 'Check checkbox': '1 0', 'Click X,Y': '0 1', 'Click element if exists': '1 0', 'Click element': '1 0', 'Click element containing text': '0 1', 'Click element containing text if exists': '0 1', 'Enter text in field': '1 1', 'Execute JS': '0 1', # 'Extract text and use as filter': '1 0', 'Goto site': '0 0', 'Goto URL': '0 1', 'Make all child elements visible': '1 0', 'Press Enter': '0 0', 'Select by label': '1 1', ' is rendered def _value(self): # Tag UUID to name, on submit it will convert it back (in the submit handler of init.py) if self.data and type(self.data) is list: tag_titles = [] for i in self.data: tag = self.datastore.data['settings']['application']['tags'].get(i) if tag: tag_title = tag.get('title') if tag_title: tag_titles.append(tag_title) return ', '.join(tag_titles) if not self.data: return '' return 'error' class TimeDurationForm(Form): hours = SelectField(choices=[(f"{i}", f"{i}") for i in range(0, 25)], default="24", validators=[validators.Optional()]) minutes = SelectField(choices=[(f"{i}", f"{i}") for i in range(0, 60)], default="00", validators=[validators.Optional()]) class TimeStringField(Field): """ A WTForms field for time inputs (HH:MM) that stores the value as a string. """ widget = TimeInput() # Use the built-in time input widget def _value(self): """ Returns the value for rendering in the form. """ return self.data if self.data is not None else "" def process_formdata(self, valuelist): """ Processes the raw input from the form and stores it as a string. """ if valuelist: time_str = valuelist[0] # Simple validation for HH:MM format if not time_str or len(time_str.split(":")) != 2: raise ValidationError(_l("Invalid time format. Use HH:MM.")) self.data = time_str class validateTimeZoneName(object): """ Flask wtform validators wont work with basic auth """ def __init__(self, message=None): self.message = message def __call__(self, form, field): from zoneinfo import available_timezones python_timezones = available_timezones() if field.data and field.data not in python_timezones: raise ValidationError(_l("Not a valid timezone name")) class ScheduleLimitDaySubForm(Form): enabled = BooleanField(_l("not set"), default=True) start_time = TimeStringField(_l("Start At"), default="00:00", validators=[validators.Optional()]) duration = FormField(TimeDurationForm, label=_l("Run duration")) class ScheduleLimitForm(Form): enabled = BooleanField(_l("Use time scheduler"), default=False) # Because the label for=""" doesnt line up/work with the actual checkbox monday = FormField(ScheduleLimitDaySubForm, label="") tuesday = FormField(ScheduleLimitDaySubForm, label="") wednesday = FormField(ScheduleLimitDaySubForm, label="") thursday = FormField(ScheduleLimitDaySubForm, label="") friday = FormField(ScheduleLimitDaySubForm, label="") saturday = FormField(ScheduleLimitDaySubForm, label="") sunday = FormField(ScheduleLimitDaySubForm, label="") timezone = StringField(_l("Optional timezone to run in"), render_kw={"list": "timezones"}, validators=[validateTimeZoneName()] ) def __init__( self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs, ): super().__init__(formdata, obj, prefix, data, meta, **kwargs) self.monday.form.enabled.label.text=_l("Monday") self.tuesday.form.enabled.label.text = _l("Tuesday") self.wednesday.form.enabled.label.text = _l("Wednesday") self.thursday.form.enabled.label.text = _l("Thursday") self.friday.form.enabled.label.text = _l("Friday") self.saturday.form.enabled.label.text = _l("Saturday") self.sunday.form.enabled.label.text = _l("Sunday") def validate_time_between_check_has_values(form): """ Custom validation function for TimeBetweenCheckForm. Returns True if at least one time interval field has a value > 0. """ res = any([ form.weeks.data and int(form.weeks.data) > 0, form.days.data and int(form.days.data) > 0, form.hours.data and int(form.hours.data) > 0, form.minutes.data and int(form.minutes.data) > 0, form.seconds.data and int(form.seconds.data) > 0 ]) return res class RequiredTimeInterval(object): """ WTForms validator that ensures at least one time interval field has a value > 0. Use this with FormField(TimeBetweenCheckForm, validators=[RequiredTimeInterval()]). """ def __init__(self, message=None): self.message = message or _l('At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.') def __call__(self, form, field): if not validate_time_between_check_has_values(field.form): raise ValidationError(self.message) class TimeBetweenCheckForm(Form): weeks = IntegerField(_l('Weeks'), validators=[validators.Optional(), validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))]) days = IntegerField(_l('Days'), validators=[validators.Optional(), validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))]) hours = IntegerField(_l('Hours'), validators=[validators.Optional(), validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))]) minutes = IntegerField(_l('Minutes'), validators=[validators.Optional(), validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))]) seconds = IntegerField(_l('Seconds'), validators=[validators.Optional(), validators.NumberRange(min=0, message=_l("Should contain zero or more seconds"))]) # @todo add total seconds minimum validatior = minimum_seconds_recheck_time def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs): super().__init__(formdata, obj, prefix, data, meta, **kwargs) self.require_at_least_one = kwargs.get('require_at_least_one', False) self.require_at_least_one_message = kwargs.get('require_at_least_one_message', REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT) def validate(self, **kwargs): """Custom validation that can optionally require at least one time interval.""" # Run normal field validation first if not super().validate(**kwargs): return False # Apply optional "at least one" validation if self.require_at_least_one: if not validate_time_between_check_has_values(self): # Add error to the form's general errors (not field-specific) if not hasattr(self, '_formdata_errors'): self._formdata_errors = [] self._formdata_errors.append(self.require_at_least_one_message) return False return True class EnhancedFormField(FormField): """ An enhanced FormField that supports conditional validation with top-level error messages. Adds a 'top_errors' property for validation errors at the FormField level. """ def __init__(self, form_class, label=None, validators=None, separator="-", conditional_field=None, conditional_message=None, conditional_test_function=None, **kwargs): """ Initialize EnhancedFormField with optional conditional validation. :param conditional_field: Name of the field this FormField depends on (e.g. 'time_between_check_use_default') :param conditional_message: Error message to show when validation fails :param conditional_test_function: Custom function to test if FormField has valid values. Should take self.form as parameter and return True if valid. """ super().__init__(form_class, label, validators, separator, **kwargs) self.top_errors = [] self.conditional_field = conditional_field self.conditional_message = conditional_message or "At least one field must have a value when not using defaults." self.conditional_test_function = conditional_test_function def validate(self, form, extra_validators=()): """ Custom validation that supports conditional logic and stores top-level errors. """ self.top_errors = [] # First run the normal FormField validation base_valid = super().validate(form, extra_validators) # Apply conditional validation if configured if self.conditional_field and hasattr(form, self.conditional_field): conditional_field_obj = getattr(form, self.conditional_field) # If the conditional field is False/unchecked, check if this FormField has any values if not conditional_field_obj.data: # Use custom test function if provided, otherwise use generic fallback if self.conditional_test_function: has_any_value = self.conditional_test_function(self.form) else: # Generic fallback - check if any field has truthy data has_any_value = any(field.data for field in self.form if hasattr(field, 'data') and field.data) if not has_any_value: self.top_errors.append(self.conditional_message) base_valid = False return base_valid class RequiredFormField(FormField): """ A FormField that passes require_at_least_one=True to TimeBetweenCheckForm. Use this when you want the sub-form to always require at least one value. """ def __init__(self, form_class, label=None, validators=None, separator="-", **kwargs): super().__init__(form_class, label, validators, separator, **kwargs) def process(self, formdata, data=unset_value, extra_filters=None): if extra_filters: raise TypeError( "FormField cannot take filters, as the encapsulated" "data is not mutable." ) if data is unset_value: try: data = self.default() except TypeError: data = self.default self._obj = data self.object_data = data prefix = self.name + self.separator # Pass require_at_least_one=True to the sub-form if isinstance(data, dict): self.form = self.form_class(formdata=formdata, prefix=prefix, require_at_least_one=True, **data) else: self.form = self.form_class(formdata=formdata, obj=data, prefix=prefix, require_at_least_one=True) @property def errors(self): """Include sub-form validation errors""" form_errors = self.form.errors # Add any general form errors to a special 'form' key if hasattr(self.form, '_formdata_errors') and self.form._formdata_errors: form_errors = dict(form_errors) # Make a copy form_errors['form'] = self.form._formdata_errors return form_errors # Separated by key:value class StringDictKeyValue(StringField): widget = widgets.TextArea() def _value(self): if self.data: output = '' for k, v in self.data.items(): output += f"{k}: {v}\r\n" return output else: return '' # incoming data processing + validation def process_formdata(self, valuelist): self.data = {} errors = [] if valuelist: # Remove empty strings (blank lines) cleaned = [line.strip() for line in valuelist[0].split("\n") if line.strip()] for idx, s in enumerate(cleaned, start=1): if ':' not in s: errors.append(f"Line {idx} is missing a ':' separator.") continue parts = s.split(':', 1) key = parts[0].strip() value = parts[1].strip() if not key: errors.append(f"Line {idx} has an empty key.") if not value: errors.append(f"Line {idx} has an empty value.") self.data[key] = value if errors: raise ValidationError("Invalid input:\n" + "\n".join(errors)) class ValidateContentFetcherIsReady(object): """ Validates that anything that looks like a regex passes as a regex """ def __init__(self, message=None): self.message = message def __call__(self, form, field): return # AttributeError: module 'changedetectionio.content_fetcher' has no attribute 'extra_browser_unlocked<>ASDF213r123r' # Better would be a radiohandler that keeps a reference to each class # if field.data is not None and field.data != 'system': # klass = getattr(content_fetcher, field.data) # some_object = klass() # try: # ready = some_object.is_ready() # # except urllib3.exceptions.MaxRetryError as e: # driver_url = some_object.command_executor # message = field.gettext('Content fetcher \'%s\' did not respond.' % (field.data)) # message += '
' + field.gettext( # 'Be sure that the selenium/webdriver runner is running and accessible via network from this container/host.') # message += '
' + field.gettext('Did you follow the instructions in the wiki?') # message += '

' + field.gettext('WebDriver Host: %s' % (driver_url)) # message += '
Go here for more information' # message += '
'+field.gettext('Content fetcher did not respond properly, unable to use it.\n %s' % (str(e))) # # raise ValidationError(message) # # except Exception as e: # message = field.gettext('Content fetcher \'%s\' did not respond properly, unable to use it.\n %s') # raise ValidationError(message % (field.data, e)) class ValidateNotificationBodyAndTitleWhenURLisSet(object): """ Validates that they entered something in both notification title+body when the URL is set Due to https://github.com/dgtlmoon/changedetection.io/issues/360 """ def __init__(self, message=None): self.message = message def __call__(self, form, field): if len(field.data): if not len(form.notification_title.data) or not len(form.notification_body.data): message = field.gettext('Notification Body and Title is required when a Notification URL is used') raise ValidationError(message) class ValidateAppRiseServers(object): """ Validates that each URL given is compatible with AppRise """ def __init__(self, message=None): self.message = message def __call__(self, form, field): import apprise from .notification.apprise_plugin.assets import apprise_asset from .notification.apprise_plugin.custom_handlers import apprise_http_custom_handler # noqa: F401 from changedetectionio.jinja2_custom import render as jinja_render apobj = apprise.Apprise(asset=apprise_asset) for server_url in field.data: generic_notification_context_data = NotificationContextData() # Make sure something is atleast in all those regular token fields generic_notification_context_data.set_random_for_validation() url = jinja_render(template_str=server_url.strip(), **generic_notification_context_data).strip() if url.startswith("#"): continue if not apobj.add(url): message = field.gettext('\'%s\' is not a valid AppRise URL.' % (url)) raise ValidationError(message) class ValidateJinja2Template(object): """ Validates that a {token} is from a valid set """ def __call__(self, form, field): from changedetectionio.jinja2_custom import create_jinja_env from jinja2 import BaseLoader, TemplateSyntaxError, UndefinedError from jinja2.meta import find_undeclared_variables import jinja2.exceptions # Might be a list of text, or might be just text (like from the apprise url list) joined_data = ' '.join(map(str, field.data)) if isinstance(field.data, list) else f"{field.data}" try: # Use the shared helper to create a properly configured environment jinja2_env = create_jinja_env(loader=BaseLoader) # Add notification tokens for validation static_token_placeholders = NotificationContextData() static_token_placeholders.set_random_for_validation() jinja2_env.globals.update(static_token_placeholders) if hasattr(field, 'extra_notification_tokens'): jinja2_env.globals.update(field.extra_notification_tokens) jinja2_env.from_string(joined_data).render() except TemplateSyntaxError as e: raise ValidationError(f"This is not a valid Jinja2 template: {e}") from e except UndefinedError as e: raise ValidationError(f"A variable or function is not defined: {e}") from e except jinja2.exceptions.SecurityError as e: raise ValidationError(f"This is not a valid Jinja2 template: {e}") from e # Check for undeclared variables ast = jinja2_env.parse(joined_data) undefined = ", ".join(find_undeclared_variables(ast)) if undefined: raise ValidationError( f"The following tokens used in the notification are not valid: {undefined}" ) class validateURL(object): """ Flask wtform validators wont work with basic auth """ def __init__(self, message=None): self.message = message def __call__(self, form, field): # This should raise a ValidationError() or not validate_url(field.data) def validate_url(test_url): from changedetectionio.validate_url import is_safe_valid_url if not is_safe_valid_url(test_url): # This should be wtforms.validators. raise ValidationError('Watch protocol is not permitted or invalid URL format') class ValidateSinglePythonRegexString(object): def __init__(self, message=None): self.message = message def __call__(self, form, field): try: re.compile(field.data) except re.error: message = field.gettext('RegEx \'%s\' is not a valid regular expression.') raise ValidationError(message % (field.data)) class ValidateListRegex(object): """ Validates that anything that looks like a regex passes as a regex """ def __init__(self, message=None): self.message = message def __call__(self, form, field): for line in field.data: if re.search(html_tools.PERL_STYLE_REGEX, line, re.IGNORECASE): try: regex = html_tools.perl_style_slash_enclosed_regex_to_options(line) re.compile(regex) except re.error: message = field.gettext('RegEx \'%s\' is not a valid regular expression.') raise ValidationError(message % (line)) class ValidateCSSJSONXPATHInput(object): """ Filter validation @todo CSS validator ;) """ def __init__(self, message=None, allow_xpath=True, allow_json=True): self.message = message self.allow_xpath = allow_xpath self.allow_json = allow_json def __call__(self, form, field): if isinstance(field.data, str): data = [field.data] else: data = field.data for line in data: # Nothing to see here if not len(line.strip()): return # Does it look like XPath? if line.strip()[0] == '/' or line.strip().startswith('xpath:'): if not self.allow_xpath: raise ValidationError("XPath not permitted in this field!") from lxml import etree, html import elementpath from changedetectionio.html_tools import SafeXPath3Parser tree = html.fromstring("") line = line.replace('xpath:', '') try: elementpath.select(tree, line.strip(), parser=SafeXPath3Parser) except elementpath.ElementPathError as e: message = field.gettext('\'%s\' is not a valid XPath expression. (%s)') raise ValidationError(message % (line, str(e))) except: raise ValidationError("A system-error occurred when validating your XPath expression") if line.strip().startswith('xpath1:'): if not self.allow_xpath: raise ValidationError("XPath not permitted in this field!") from lxml import etree, html tree = html.fromstring("") line = re.sub(r'^xpath1:', '', line) try: tree.xpath(line.strip()) except etree.XPathEvalError as e: message = field.gettext('\'%s\' is not a valid XPath expression. (%s)') raise ValidationError(message % (line, str(e))) except: raise ValidationError("A system-error occurred when validating your XPath expression") if 'json:' in line: if not self.allow_json: raise ValidationError("JSONPath not permitted in this field!") from jsonpath_ng.exceptions import ( JsonPathLexerError, JsonPathParserError, ) from jsonpath_ng.ext import parse input = line.replace('json:', '') try: parse(input) except (JsonPathParserError, JsonPathLexerError) as e: message = field.gettext('\'%s\' is not a valid JSONPath expression. (%s)') raise ValidationError(message % (input, str(e))) except: raise ValidationError("A system-error occurred when validating your JSONPath expression") # Re #265 - maybe in the future fetch the page and offer a # warning/notice that its possible the rule doesnt yet match anything? if not self.allow_json: raise ValidationError("jq not permitted in this field!") if 'jq:' in line: try: import jq except ModuleNotFoundError: # `jq` requires full compilation in windows and so isn't generally available raise ValidationError("jq not support not found") input = line.replace('jq:', '') try: jq.compile(input) except (ValueError) as e: message = field.gettext('\'%s\' is not a valid jq expression. (%s)') raise ValidationError(message % (input, str(e))) except: raise ValidationError("A system-error occurred when validating your jq expression") class ValidateSimpleURL: """Validate that the value can be parsed by urllib.parse.urlparse() and has a scheme/netloc.""" def __init__(self, message=None): self.message = message or "Invalid URL." def __call__(self, form, field): data = (field.data or "").strip() if not data: return # empty is OK — pair with validators.Optional() from urllib.parse import urlparse parsed = urlparse(data) if not parsed.scheme or not parsed.netloc: raise ValidationError(self.message) class ValidateStartsWithRegex(object): def __init__(self, regex, *, flags=0, message=None, allow_empty=True, split_lines=True): # compile with given flags (we’ll pass re.IGNORECASE below) self.pattern = re.compile(regex, flags) if isinstance(regex, str) else regex self.message = message self.allow_empty = allow_empty self.split_lines = split_lines def __call__(self, form, field): data = field.data if not data: return # normalize into list of lines if isinstance(data, str) and self.split_lines: lines = data.splitlines() elif isinstance(data, (list, tuple)): lines = data else: lines = [data] for line in lines: stripped = line.strip() if not stripped: if self.allow_empty: continue raise ValidationError(self.message or _l("Empty value not allowed.")) if not self.pattern.match(stripped): raise ValidationError(self.message or _l("Invalid value.")) class quickWatchForm(Form): url = fields.URLField(_l('URL'), validators=[validateURL()]) tags = StringTagUUID(_l('Group tag'), validators=[validators.Optional()]) watch_submit_button = SubmitField(_l('Watch'), render_kw={"class": "pure-button pure-button-primary"}) processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor) edit_and_watch_submit_button = SubmitField(_l('Edit > Watch'), render_kw={"class": "pure-button pure-button-primary"}) # Common to a single watch and the global settings class commonSettingsForm(Form): from . import processors def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs): super().__init__(formdata, obj, prefix, data, meta, **kwargs) self.notification_body.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) fetch_backend = RadioField(_l('Fetch Method'), choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) notification_body = TextAreaField(_l('Notification Body'), default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()]) notification_format = SelectField(_l('Notification format'), choices=list(valid_notification_formats.items())) notification_title = StringField(_l('Notification Title'), default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()]) notification_urls = StringListField(_l('Notification URL List'), validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()]) processor = RadioField( label=_l("Processor - What do you want to achieve?"), choices=lambda: processors.available_processors(), default=processors.get_default_processor) scheduler_timezone_default = StringField(_l("Default timezone for watch check scheduler"), render_kw={"list": "timezones"}, validators=[validateTimeZoneName()]) webdriver_delay = IntegerField(_l('Wait seconds before extracting text'), validators=[validators.Optional(), validators.NumberRange(min=1, message=_l("Should contain one or more seconds"))]) # Not true anymore but keep the validate_ hook for future use, we convert color tags # def validate_notification_urls(self, field): # """Validate that HTML Color format is not used with Telegram""" # if self.notification_format.data == 'HTML Color' and field.data: # for url in field.data: # if url and ('tgram://' in url or 'discord://' in url or 'discord.com/api/webhooks' in url): # raise ValidationError('HTML Color format is not supported by Telegram and Discord. Please choose another Notification Format (Plain Text, HTML, or Markdown to HTML).') class importForm(Form): processor = RadioField(_l('Processor'), choices=lambda: processors.available_processors(), default=processors.get_default_processor) urls = TextAreaField(_l('URLs')) xlsx_file = FileField(_l('Upload .xlsx file'), validators=[FileAllowed(['xlsx'], _l('Must be .xlsx file!'))]) file_mapping = SelectField(_l('File mapping'), [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')}) class SingleBrowserStep(Form): operation = SelectField(_l('Operation'), [validators.Optional()], choices=browser_step_ui_config.keys()) # maybe better to set some blob.. just return the first that matches json_filter # As a last resort, try to parse the whole soup = BeautifulSoup(content, 'html.parser') if ensure_is_ldjson_info_type: bs_result = soup.find_all('script', {"type": "application/ld+json"}) else: bs_result = soup.find_all('script') bs_result += soup.find_all('body') bs_jsons = [] for result in bs_result: # result.text is how bs4 magically strips JSON from the body content_start = result.text.lstrip("\ufeff").strip()[:100] if result.text else '' # Skip empty tags, and things that dont even look like JSON if not result.text or not (content_start[0] == '{' or content_start[0] == '['): continue try: json_data = json.loads(result.text) bs_jsons.append(json_data) except json.JSONDecodeError: # Skip objects which cannot be parsed continue if not bs_jsons: raise JSONNotFound("No parsable JSON found in this document") for json_data in bs_jsons: stripped_text_from_html = _parse_json(json_data, json_filter) if ensure_is_ldjson_info_type: # Could sometimes be list, string or something else random if isinstance(json_data, dict): # If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search # (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part) # @type could also be a list although non-standard ("@type": ["Product", "SubType"],) # LD_JSON auto-extract also requires some content PLUS the ldjson to be present # 1833 - could be either str or dict, should not be anything else t = json_data.get('@type') if t and stripped_text_from_html: if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower(): break # The non-standard part, some have a list elif isinstance(t, list): if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]: break elif stripped_text_from_html: break return stripped_text_from_html # content - json # json_filter - ie json:$..price # ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector) def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None): stripped_text_from_html = False # https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags # Looks like clean JSON, dont bother extracting from HTML content_start = content.lstrip("\ufeff").strip()[:100] if content_start[0] == '{' or content_start[0] == '[': try: # .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff")), json_filter) except json.JSONDecodeError as e: logger.warning(f"Error processing JSON {content[:20]}...{str(e)})") else: # Check for JSONP wrapper: someCallback({...}) or some.namespace({...}) # Server may claim application/json but actually return JSONP jsonp_match = re.match(r'^\w[\w.]*\s*\((.+)\)\s*;?\s*$', content.lstrip("\ufeff").strip(), re.DOTALL) if jsonp_match: try: inner = jsonp_match.group(1).strip() logger.warning(f"Content looks like JSONP, attempting to extract inner JSON for filter '{json_filter}'") stripped_text_from_html = _parse_json(json.loads(inner), json_filter) except json.JSONDecodeError as e: logger.warning(f"Error processing JSONP inner content {content[:20]}...{str(e)})") if not stripped_text_from_html: # Probably something else, go fish inside for it try: stripped_text_from_html = extract_json_blob_from_html(content=content, ensure_is_ldjson_info_type=ensure_is_ldjson_info_type, json_filter=json_filter) except json.JSONDecodeError as e: logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})") if not stripped_text_from_html: # Re 265 - Just return an empty string when filter not found return '' return stripped_text_from_html # Mode - "content" return the content without the matches (default) # - "line numbers" return a list of line numbers that match (int list) # # wordlist - list of regex's (str) or words (str) # Preserves all linefeeds and other whitespacing, its not the job of this to remove that def strip_ignore_text(content, wordlist, mode="content"): ignore_text = [] ignore_regex = [] ignore_regex_multiline = [] ignored_lines = [] if not content: return '' for k in wordlist: # Skip empty strings to avoid matching everything if not k or not k.strip(): continue # Is it a regex? res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE) if res: res = re.compile(perl_style_slash_enclosed_regex_to_options(k)) if res.flags & re.DOTALL or res.flags & re.MULTILINE: ignore_regex_multiline.append(res) else: ignore_regex.append(res) else: ignore_text.append(k.strip()) for r in ignore_regex_multiline: for match in r.finditer(content): content_lines = content[:match.end()].splitlines(keepends=True) match_lines = content[match.start():match.end()].splitlines(keepends=True) end_line = len(content_lines) start_line = end_line - len(match_lines) if end_line - start_line <= 1: # Match is empty or in the middle of the line ignored_lines.append(start_line) else: for i in range(start_line, end_line): ignored_lines.append(i) line_index = 0 lines = content.splitlines(keepends=True) for line in lines: # Always ignore blank lines in this mode. (when this function gets called) got_match = False for l in ignore_text: if l.lower() in line.lower(): got_match = True if not got_match: for r in ignore_regex: if r.search(line): got_match = True if got_match: ignored_lines.append(line_index) line_index += 1 ignored_lines = set([i for i in ignored_lines if i >= 0 and i < len(lines)]) # Used for finding out what to highlight if mode == "line numbers": return [i + 1 for i in ignored_lines] output_lines = set(range(len(lines))) - ignored_lines return ''.join([lines[i] for i in output_lines]) def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str: from xml.sax.saxutils import escape as xml_escape pattern = ')\s*)*)\]\]>' def repl(m): text = m.group(1) return xml_escape(html_to_text(html_content=text)).strip() return re.sub(pattern, repl, html_content) # NOTE!! ANYTHING LIBXML, HTML5LIB ETC WILL CAUSE SOME SMALL MEMORY LEAK IN THE LOCAL "LIB" IMPLEMENTATION OUTSIDE PYTHON def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False, timeout=10) -> str: """ Convert HTML content to plain text using inscriptis. Thread-Safety: This function uses inscriptis.get_text() which internally calls lxml.html.fromstring() with the default parser. Testing with 50 concurrent threads confirms this approach is thread-safe and produces deterministic output. Alternative Approach Rejected: An explicit HTMLParser instance (thread-local or fresh) would also be thread-safe, but was found to break change detection logic in subtle ways (test_check_basic_change_detection_functionality). The default parser provides correct and reliable behavior. """ from inscriptis import get_text from inscriptis.model.config import ParserConfig if render_anchor_tag_content: parser_config = ParserConfig( annotation_rules={"a": ["hyperlink"]}, display_links=True ) else: parser_config = None if is_rss: html_content = re.sub(r'])', r'', r'', html_content) else: # Use BS4 html.parser to strip bloat — SPA's often dump 10MB+ of CSS/JS into , # causing inscriptis to silently give up. Regex-based stripping is unsafe because tags # can appear inside JSON data attributes with JS-escaped closing tags (e.g. <\/script>), # causing the regex to scan past the intended close and eat real page content. from bs4 import BeautifulSoup soup = BeautifulSoup(html_content, 'html.parser') # Strip tags that inscriptis cannot render as meaningful text and which can be very large. # svg/math: produce path-data/MathML garbage; canvas/iframe/template: no inscriptis handlers. # video/audio/picture are kept — they may contain meaningful fallback text or captions. for tag in soup.find_all(['head', 'script', 'style', 'noscript', 'svg', 'math', 'canvas', 'iframe', 'template']): tag.decompose() # SPAs often use to hide content until JS loads. # inscriptis respects CSS display rules, so strip hiding styles from the body tag. body_tag = soup.find('body') if body_tag and body_tag.get('style'): style = body_tag['style'] if re.search(r'\b(?:display\s*:\s*none|visibility\s*:\s*hidden)\b', style, re.IGNORECASE): logger.debug(f"html_to_text: Removing hiding styles from body tag (found: '{style}')") del body_tag['style'] html_content = str(soup) text_content = get_text(html_content, config=parser_config) return text_content # Does LD+JSON exist with a @type=='product' and a .price set anywhere? def has_ldjson_product_info(content): try: # Better than .lower() which can use a lot of ram if (re.search(r'application/ld\+json', content, re.IGNORECASE) and re.search(r'"price"', content, re.IGNORECASE) and re.search(r'"pricecurrency"', content, re.IGNORECASE)): return True # On some pages this is really terribly expensive when they dont really need it # (For example you never want price monitoring, but this runs on every watch to suggest it) # for filter in LD_JSON_PRODUCT_OFFER_SELECTORS: # pricing_data += extract_json_as_string(content=content, # json_filter=filter, # ensure_is_ldjson_info_type="product") except Exception as e: # OK too return False return False def workarounds_for_obfuscations(content): """ Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis This could go into its own Pip package in the future, for faster updates """ # HomeDepot.com style $90.74 # https://github.com/weblyzard/inscriptis/issues/45 if not content: return content content = re.sub('', '', content) return content def get_triggered_text(content, trigger_text): triggered_text = [] result = strip_ignore_text(content=content, wordlist=trigger_text, mode="line numbers") i = 1 for p in content.splitlines(): if i in result: triggered_text.append(p) i += 1 return triggered_text def extract_title(data: bytes | str, sniff_bytes: int = 2048, scan_chars: int = 8192) -> str | None: try: # Only decode/process the prefix we need for title extraction match data: case bytes() if data.startswith((b"\xff\xfe", b"\xfe\xff")): prefix = data[:scan_chars * 2].decode("utf-16", errors="replace") case bytes() if data.startswith((b"\xff\xfe\x00\x00", b"\x00\x00\xfe\xff")): prefix = data[:scan_chars * 4].decode("utf-32", errors="replace") case bytes(): try: prefix = data[:scan_chars].decode("utf-8") except UnicodeDecodeError: try: head = data[:sniff_bytes].decode("ascii", errors="ignore") if m := (META_CS.search(head) or META_CT.search(head)): enc = m.group(1).lower() else: enc = "cp1252" prefix = data[:scan_chars * 2].decode(enc, errors="replace") except Exception as e: logger.error(f"Title extraction encoding detection failed: {e}") return None case str(): prefix = data[:scan_chars] if len(data) > scan_chars else data case _: logger.error(f"Title extraction received unsupported data type: {type(data)}") return None # Search only in the prefix if m := TITLE_RE.search(prefix): title = html.unescape(" ".join(m.group(1).split())).strip() # Some safe limit return title[:2000] return None except Exception as e: logger.error(f"Title extraction failed: {e}") return None ================================================ FILE: changedetectionio/is_safe_url.py ================================================ """ URL redirect validation module for preventing open redirect vulnerabilities. This module provides functionality to safely validate redirect URLs, ensuring they: 1. Point to internal routes only (no external redirects) 2. Are properly normalized (preventing browser parsing differences) 3. Match registered Flask routes (no fake/non-existent pages) 4. Are fully logged for security monitoring References: - https://flask-login.readthedocs.io/ (safe redirect patterns) - https://blog.miguelgrinberg.com/post/the-flask-mega-tutorial-part-v-user-logins - https://www.pythonkitchen.com/how-prevent-open-redirect-vulnerab-flask/ """ from urllib.parse import urlparse, urljoin from flask import request from loguru import logger def is_safe_url(target, app): """ Validate that a redirect URL is safe to prevent open redirect vulnerabilities. This follows Flask/Werkzeug best practices by ensuring the redirect URL: 1. Is a relative path starting with exactly one '/' 2. Does not start with '//' (double-slash attack) 3. Has no external protocol handlers 4. Points to a valid registered route in the application 5. Is properly normalized to prevent browser parsing differences Args: target: The URL to validate (e.g., '/settings', '/login#top') app: The Flask application instance (needed for route validation) Returns: bool: True if the URL is safe for redirection, False otherwise Examples: >>> is_safe_url('/settings', app) True >>> is_safe_url('//evil.com', app) False >>> is_safe_url('/settings#general', app) True >>> is_safe_url('/fake-page', app) False """ if not target: return False # Normalize the URL to prevent browser parsing differences # Strip whitespace and replace backslashes (which some browsers interpret as forward slashes) target = target.strip() target = target.replace('\\', '/') # First, check if it starts with // or more (double-slash attack) if target.startswith('//'): logger.warning(f"Blocked redirect attempt with double-slash: {target}") return False # Parse the URL to check for scheme and netloc parsed = urlparse(target) # Block any URL with a scheme (http://, https://, javascript:, etc.) if parsed.scheme: logger.warning(f"Blocked redirect attempt with scheme: {target}") return False # Block any URL with a network location (netloc) # This catches patterns like //evil.com, user@host, etc. if parsed.netloc: logger.warning(f"Blocked redirect attempt with netloc: {target}") return False # At this point, we have a relative URL with no scheme or netloc # Use urljoin to resolve it and verify it points to the same host ref_url = urlparse(request.host_url) test_url = urlparse(urljoin(request.host_url, target)) # Check: ensure the resolved URL has the same netloc as current host if not (test_url.scheme in ('http', 'https') and ref_url.netloc == test_url.netloc): logger.warning(f"Blocked redirect attempt with mismatched netloc: {target}") return False # Additional validation: Check if the URL matches a registered route # This prevents redirects to non-existent pages or unintended endpoints try: # Get the path without query string and fragment # Fragments (like #general) are automatically stripped by urlparse path = parsed.path # Create a URL adapter bound to the server name adapter = app.url_map.bind(ref_url.netloc) # Try to match the path to a registered route # This will raise NotFound if the route doesn't exist endpoint, values = adapter.match(path, return_rule=False) # Block redirects to static file endpoints - these are catch-all routes # that would match arbitrary paths, potentially allowing unintended redirects if endpoint in ('static_content', 'static', 'static_flags'): logger.warning(f"Blocked redirect to static endpoint: {target}") return False # Successfully matched a valid route logger.debug(f"Validated safe redirect to endpoint '{endpoint}': {target}") return True except Exception as e: # Route doesn't exist or can't be matched logger.warning(f"Blocked redirect to non-existent route: {target} (error: {e})") return False ================================================ FILE: changedetectionio/jinja2_custom/__init__.py ================================================ """ Jinja2 custom extensions and safe rendering utilities. """ from .extensions.TimeExtension import TimeExtension from .safe_jinja import ( render, render_fully_escaped, create_jinja_env, JINJA2_MAX_RETURN_PAYLOAD_SIZE, DEFAULT_JINJA2_EXTENSIONS, ) from .plugins.regex import regex_replace __all__ = [ 'TimeExtension', 'render', 'render_fully_escaped', 'create_jinja_env', 'JINJA2_MAX_RETURN_PAYLOAD_SIZE', 'DEFAULT_JINJA2_EXTENSIONS', 'regex_replace', ] ================================================ FILE: changedetectionio/jinja2_custom/extensions/TimeExtension.py ================================================ """ Jinja2 TimeExtension - Custom date/time handling for templates. This extension provides the {% now %} tag for Jinja2 templates, offering timezone-aware date/time formatting with support for time offsets. Why This Extension Exists: The Arrow library has a now() function (arrow.now()), but Jinja2 templates cannot directly call Python functions - they need extensions or filters to expose functionality. This TimeExtension serves as a Jinja2-to-Arrow bridge that: 1. Makes Arrow accessible in templates - Jinja2 requires registering functions/tags through extensions. You cannot use arrow.now() directly in a template. 2. Provides template-friendly syntax - Instead of complex Python code, you get clean tags: {% now 'UTC' %} {% now 'UTC' + 'hours=2' %} {% now 'Europe/London', '%Y-%m-%d' %} 3. Adds convenience features on top of Arrow: - Default timezone from environment variable (TZ) or config - Default datetime format configuration - Offset syntax parsing: 'hours=2,minutes=30' → shift(hours=2, minutes=30) - Empty string timezone support to use configured defaults 4. Maintains security - Works within Jinja2's sandboxed environment so users cannot access arbitrary Python code or objects. Essentially, this is a Jinja2 wrapper around arrow.now() and arrow.shift() that provides user-friendly template syntax while maintaining security. Basic Usage: {% now 'UTC' %} # Output: Wed, 09 Dec 2015 23:33:01 Custom Format: {% now 'UTC', '%Y-%m-%d %H:%M:%S' %} # Output: 2015-12-09 23:33:01 Timezone Support: {% now 'America/New_York' %} {% now 'Europe/London' %} {% now '' %} # Uses default timezone from environment.default_timezone Time Offsets (Addition): {% now 'UTC' + 'hours=2' %} {% now 'UTC' + 'hours=2,minutes=30' %} {% now 'UTC' + 'days=1,hours=2,minutes=15,seconds=10' %} Time Offsets (Subtraction): {% now 'UTC' - 'minutes=11' %} {% now 'UTC' - 'days=2,minutes=33,seconds=1' %} Time Offsets with Custom Format: {% now 'UTC' + 'hours=2', '%Y-%m-%d %H:%M:%S' %} # Output: 2015-12-10 01:33:01 Weekday Support (for finding next/previous weekday): {% now 'UTC' + 'weekday=0' %} # Next Monday (0=Monday, 6=Sunday) {% now 'UTC' + 'weekday=4' %} # Next Friday Configuration: - Default timezone: Set via TZ environment variable or override environment.default_timezone - Default format: '%a, %d %b %Y %H:%M:%S' (can be overridden via environment.datetime_format) Environment Customization: from changedetectionio.jinja2_custom import create_jinja_env jinja2_env = create_jinja_env() jinja2_env.default_timezone = 'America/New_York' # Override default timezone jinja2_env.datetime_format = '%Y-%m-%d %H:%M' # Override default format Supported Offset Parameters: - years, months, weeks, days - hours, minutes, seconds, microseconds - weekday (0=Monday through 6=Sunday, must be integer) Note: This extension uses the Arrow library for timezone-aware datetime handling. All timezone names should be valid IANA timezone identifiers (e.g., 'America/New_York'). """ import arrow from jinja2 import nodes from jinja2.ext import Extension import os class TimeExtension(Extension): """ Jinja2 Extension providing the {% now %} tag for timezone-aware date/time rendering. This extension adds two attributes to the Jinja2 environment: - datetime_format: Default strftime format string (default: '%a, %d %b %Y %H:%M:%S') - default_timezone: Default timezone for rendering (default: TZ env var or 'UTC') Both can be overridden after environment creation by setting the attributes directly. """ tags = {'now'} def __init__(self, environment): """Jinja2 Extension constructor.""" super().__init__(environment) environment.extend( datetime_format='%a, %d %b %Y %H:%M:%S', default_timezone=os.getenv('TZ', 'UTC').strip() ) def _datetime(self, timezone, operator, offset, datetime_format): """ Get current datetime with time offset applied. Args: timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default operator: '+' for addition or '-' for subtraction offset: Comma-separated offset parameters (e.g., 'hours=2,minutes=30') datetime_format: strftime format string or None to use environment default Returns: Formatted datetime string with offset applied Example: _datetime('UTC', '+', 'hours=2,minutes=30', '%Y-%m-%d %H:%M:%S') # Returns current time + 2.5 hours """ # Use default timezone if none specified if not timezone or timezone == '': timezone = self.environment.default_timezone d = arrow.now(timezone) # parse shift params from offset and include operator shift_params = {} for param in offset.split(','): interval, value = param.split('=') shift_params[interval.strip()] = float(operator + value.strip()) # Fix weekday parameter can not be float if 'weekday' in shift_params: shift_params['weekday'] = int(shift_params['weekday']) d = d.shift(**shift_params) if datetime_format is None: datetime_format = self.environment.datetime_format return d.strftime(datetime_format) def _now(self, timezone, datetime_format): """ Get current datetime without any offset. Args: timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default datetime_format: strftime format string or None to use environment default Returns: Formatted datetime string for current time Example: _now('America/New_York', '%Y-%m-%d %H:%M:%S') # Returns current time in New York timezone """ # Use default timezone if none specified if not timezone or timezone == '': timezone = self.environment.default_timezone if datetime_format is None: datetime_format = self.environment.datetime_format return arrow.now(timezone).strftime(datetime_format) def parse(self, parser): """ Parse the {% now %} tag and generate appropriate AST nodes. This method is called by Jinja2 when it encounters a {% now %} tag. It parses the tag syntax and determines whether to call _now() or _datetime() based on whether offset operations (+ or -) are present. Supported syntax: {% now 'timezone' %} -> calls _now() {% now 'timezone', 'format' %} -> calls _now() {% now 'timezone' + 'offset' %} -> calls _datetime() {% now 'timezone' + 'offset', 'format' %} -> calls _datetime() {% now 'timezone' - 'offset', 'format' %} -> calls _datetime() Args: parser: Jinja2 parser instance Returns: nodes.Output: AST output node containing the formatted datetime string """ lineno = next(parser.stream).lineno node = parser.parse_expression() if parser.stream.skip_if('comma'): datetime_format = parser.parse_expression() else: datetime_format = nodes.Const(None) if isinstance(node, nodes.Add): call_method = self.call_method( '_datetime', [node.left, nodes.Const('+'), node.right, datetime_format], lineno=lineno, ) elif isinstance(node, nodes.Sub): call_method = self.call_method( '_datetime', [node.left, nodes.Const('-'), node.right, datetime_format], lineno=lineno, ) else: call_method = self.call_method( '_now', [node, datetime_format], lineno=lineno, ) return nodes.Output([call_method], lineno=lineno) ================================================ FILE: changedetectionio/jinja2_custom/extensions/__init__.py ================================================ ================================================ FILE: changedetectionio/jinja2_custom/plugins/__init__.py ================================================ """ Jinja2 custom filter plugins for changedetection.io """ from .regex import regex_replace __all__ = ['regex_replace'] ================================================ FILE: changedetectionio/jinja2_custom/plugins/regex.py ================================================ """ Regex filter plugin for Jinja2 templates. Provides regex_replace filter for pattern-based string replacements in templates. """ import re import signal from loguru import logger def regex_replace(value: str, pattern: str, replacement: str = '', count: int = 0) -> str: """ Replace occurrences of a regex pattern in a string. Security: Protected against ReDoS (Regular Expression Denial of Service) attacks: - Limits input value size to prevent excessive processing - Uses timeout mechanism to prevent runaway regex operations - Validates pattern complexity to prevent catastrophic backtracking Args: value: The input string to perform replacements on pattern: The regex pattern to search for replacement: The replacement string (default: '') count: Maximum number of replacements (0 = replace all, default: 0) Returns: String with replacements applied, or original value on error Example: {{ "hello world" | regex_replace("world", "universe") }} {{ diff | regex_replace("([^<]+)([^<]+)", "Label1: \\1\\nLabel2: \\2") }} Security limits: - Maximum input size: 10MB - Maximum pattern length: 500 characters - Operation timeout: 10 seconds - Dangerous nested quantifier patterns are rejected """ # Security limits MAX_INPUT_SIZE = 1024 * 1024 * 10 # 10MB max input size MAX_PATTERN_LENGTH = 500 # Maximum regex pattern length REGEX_TIMEOUT_SECONDS = 10 # Maximum time for regex operation # Validate input sizes value_str = str(value) if len(value_str) > MAX_INPUT_SIZE: logger.warning(f"regex_replace: Input too large ({len(value_str)} bytes), truncating") value_str = value_str[:MAX_INPUT_SIZE] if len(pattern) > MAX_PATTERN_LENGTH: logger.warning(f"regex_replace: Pattern too long ({len(pattern)} chars), rejecting") return value_str # Check for potentially dangerous patterns (basic checks) # Nested quantifiers like (a+)+ can cause catastrophic backtracking dangerous_patterns = [ r'\([^)]*\+[^)]*\)\+', # (x+)+ r'\([^)]*\*[^)]*\)\+', # (x*)+ r'\([^)]*\+[^)]*\)\*', # (x+)* r'\([^)]*\*[^)]*\)\*', # (x*)* ] for dangerous in dangerous_patterns: if re.search(dangerous, pattern): logger.warning(f"regex_replace: Potentially dangerous pattern detected: {pattern}") return value_str def timeout_handler(signum, frame): raise TimeoutError("Regex operation timed out") try: # Set up timeout for regex operation (Unix-like systems only) # This prevents ReDoS attacks old_handler = None if hasattr(signal, 'SIGALRM'): old_handler = signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(REGEX_TIMEOUT_SECONDS) try: result = re.sub(pattern, replacement, value_str, count=count) finally: # Cancel the alarm if hasattr(signal, 'SIGALRM'): signal.alarm(0) if old_handler is not None: signal.signal(signal.SIGALRM, old_handler) return result except TimeoutError: logger.error(f"regex_replace: Regex operation timed out - possible ReDoS attack. Pattern: {pattern}") return value_str except re.error as e: logger.warning(f"regex_replace: Invalid regex pattern: {e}") return value_str except Exception as e: logger.error(f"regex_replace: Unexpected error: {e}") return value_str ================================================ FILE: changedetectionio/jinja2_custom/safe_jinja.py ================================================ """ Safe Jinja2 render with max payload sizes See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations """ import jinja2.sandbox import typing as t import os from .extensions.TimeExtension import TimeExtension from .plugins import regex_replace JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10)) # Default extensions - can be overridden in create_jinja_env() DEFAULT_JINJA2_EXTENSIONS = [TimeExtension] def create_jinja_env(extensions=None, **kwargs) -> jinja2.sandbox.ImmutableSandboxedEnvironment: """ Create a sandboxed Jinja2 environment with our custom extensions and default timezone. Args: extensions: List of extension classes to use (defaults to DEFAULT_JINJA2_EXTENSIONS) **kwargs: Additional arguments to pass to ImmutableSandboxedEnvironment Returns: Configured Jinja2 environment """ if extensions is None: extensions = DEFAULT_JINJA2_EXTENSIONS jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment( extensions=extensions, **kwargs ) # Get default timezone from environment variable default_timezone = os.getenv('TZ', 'UTC').strip() jinja2_env.default_timezone = default_timezone # Register custom filters jinja2_env.filters['regex_replace'] = regex_replace return jinja2_env # This is used for notifications etc, so actually it's OK to send custom HTML such as etc, but it should limit what data is available. # (Which also limits available functions that could be called) def render(template_str, **args: t.Any) -> str: jinja2_env = create_jinja_env() output = jinja2_env.from_string(template_str).render(args) return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE] def render_fully_escaped(content): """ Escape HTML content safely. MEMORY LEAK FIX: Use markupsafe.escape() directly instead of creating Jinja2 environments (was causing 1M+ compilations per page load). Simpler, faster, and no concerns about environment state. """ from markupsafe import escape return str(escape(content)) ================================================ FILE: changedetectionio/languages.py ================================================ """ Language configuration for i18n support Automatically discovers available languages from translations directory """ import os from pathlib import Path def get_timeago_locale(flask_locale): """ Convert Flask-Babel locale codes to timeago library locale codes. The Python timeago library (https://github.com/hustcc/timeago) supports 48 locales but uses different naming conventions than Flask-Babel. This function maps between them. Notable differences: - Chinese: Flask uses 'zh', timeago uses 'zh_CN' - Portuguese: Flask uses 'pt', timeago uses 'pt_PT' or 'pt_BR' - Swedish: Flask uses 'sv', timeago uses 'sv_SE' - Norwegian: Flask uses 'no', timeago uses 'nb_NO' or 'nn_NO' - Hindi: Flask uses 'hi', timeago uses 'in_HI' - Czech: Flask uses 'cs', but timeago doesn't support Czech - fallback to English Args: flask_locale (str): Flask-Babel locale code (e.g., 'cs', 'zh', 'pt') Returns: str: timeago library locale code (e.g., 'en', 'zh_CN', 'pt_PT') """ locale_map = { 'zh': 'zh_CN', # Chinese Simplified # timeago library just hasn't been updated to use the more modern locale naming convention, before BCP 47 / RFC 5646. 'zh_TW': 'zh_TW', # Chinese Traditional (timeago uses zh_TW) 'zh_Hant_TW': 'zh_TW', # Flask-Babel normalizes zh_TW to zh_Hant_TW, map back to timeago's zh_TW 'pt': 'pt_PT', # Portuguese (Portugal) 'sv': 'sv_SE', # Swedish 'no': 'nb_NO', # Norwegian Bokmål 'hi': 'in_HI', # Hindi 'cs': 'en', # Czech not supported by timeago, fallback to English 'uk': 'uk', # Ukrainian 'en_GB': 'en', # British English - timeago uses 'en' 'en_US': 'en', # American English - timeago uses 'en' } return locale_map.get(flask_locale, flask_locale) # Language metadata: flag icon CSS class and native name # Using flag-icons library: https://flagicons.lipis.dev/ LANGUAGE_DATA = { 'en_GB': {'flag': 'fi fi-gb fis', 'name': 'English (UK)'}, 'en_US': {'flag': 'fi fi-us fis', 'name': 'English (US)'}, 'de': {'flag': 'fi fi-de fis', 'name': 'Deutsch'}, 'fr': {'flag': 'fi fi-fr fis', 'name': 'Français'}, 'ko': {'flag': 'fi fi-kr fis', 'name': '한국어'}, 'cs': {'flag': 'fi fi-cz fis', 'name': 'Čeština'}, 'es': {'flag': 'fi fi-es fis', 'name': 'Español'}, 'pt': {'flag': 'fi fi-pt fis', 'name': 'Português'}, 'it': {'flag': 'fi fi-it fis', 'name': 'Italiano'}, 'ja': {'flag': 'fi fi-jp fis', 'name': '日本語'}, 'zh': {'flag': 'fi fi-cn fis', 'name': '中文 (简体)'}, 'zh_Hant_TW': {'flag': 'fi fi-tw fis', 'name': '繁體中文'}, 'ru': {'flag': 'fi fi-ru fis', 'name': 'Русский'}, 'pl': {'flag': 'fi fi-pl fis', 'name': 'Polski'}, 'nl': {'flag': 'fi fi-nl fis', 'name': 'Nederlands'}, 'sv': {'flag': 'fi fi-se fis', 'name': 'Svenska'}, 'da': {'flag': 'fi fi-dk fis', 'name': 'Dansk'}, 'no': {'flag': 'fi fi-no fis', 'name': 'Norsk'}, 'fi': {'flag': 'fi fi-fi fis', 'name': 'Suomi'}, 'tr': {'flag': 'fi fi-tr fis', 'name': 'Türkçe'}, 'ar': {'flag': 'fi fi-sa fis', 'name': 'العربية'}, 'hi': {'flag': 'fi fi-in fis', 'name': 'हिन्दी'}, 'uk': {'flag': 'fi fi-ua fis', 'name': 'Українська'}, } def get_available_languages(): """ Discover available languages by scanning the translations directory Returns a dict of available languages with their metadata """ translations_dir = Path(__file__).parent / 'translations' available = {} # Scan for translation directories if translations_dir.exists(): for lang_dir in translations_dir.iterdir(): if lang_dir.is_dir() and lang_dir.name in LANGUAGE_DATA: # Check if messages.po exists po_file = lang_dir / 'LC_MESSAGES' / 'messages.po' if po_file.exists(): available[lang_dir.name] = LANGUAGE_DATA[lang_dir.name] # If no English variants found, fall back to adding en_GB as default if 'en_GB' not in available and 'en_US' not in available: available['en_GB'] = LANGUAGE_DATA['en_GB'] return available def get_language_codes(): """Get list of available language codes""" return list(get_available_languages().keys()) def get_flag_for_locale(locale): """Get flag emoji for a locale, or globe if unknown""" return LANGUAGE_DATA.get(locale, {}).get('flag', '🌐') def get_name_for_locale(locale): """Get native name for a locale""" return LANGUAGE_DATA.get(locale, {}).get('name', locale.upper()) ================================================ FILE: changedetectionio/model/App.py ================================================ from os import getenv from copy import deepcopy from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT from changedetectionio.model.Tags import TagsDict from changedetectionio.notification import ( default_notification_body, default_notification_format, default_notification_title, ) # Equal to or greater than this number of FilterNotFoundInResponse exceptions will trigger a filter-not-found notification _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6 DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36' class model(dict): base_config = { 'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!", 'watching': {}, 'settings': { 'headers': { }, 'requests': { 'extra_proxies': [], # Configurable extra proxies via the UI 'extra_browsers': [], # Configurable extra proxies via the UI 'jitter_seconds': 0, 'proxy': None, # Preferred proxy connection 'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None}, 'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds 'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "5")), # Number of threads, lower is better for slow connections 'default_ua': { 'html_requests': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", DEFAULT_SETTINGS_HEADERS_USERAGENT), 'html_webdriver': None, } }, 'application': { # Custom notification content 'all_paused': False, 'all_muted': False, 'api_access_token_enabled': True, 'base_url' : None, 'empty_pages_are_a_change': False, 'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"), 'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT, 'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum 'global_subtractive_selectors': [], 'history_snapshot_max_length': None, 'ignore_whitespace': True, 'ignore_status_codes': False, #@todo implement, as ternary. 'ssim_threshold': '0.96', # Default SSIM threshold for screenshot comparison 'notification_body': default_notification_body, 'notification_format': default_notification_format, 'notification_title': default_notification_title, 'notification_urls': [], # Apprise URL list 'pager_size': 50, 'password': False, 'render_anchor_tag_content': False, 'rss_access_token': None, 'rss_content_format': RSS_CONTENT_FORMAT_DEFAULT, 'rss_template_type': 'system_default', 'rss_template_override': None, 'rss_diff_length': 5, 'rss_hide_muted_watches': True, 'rss_reader_mode': False, 'scheduler_timezone_default': None, # Default IANA timezone name 'schema_version' : 0, 'shared_diff_access': False, 'strip_ignored_lines': False, 'tags': None, # Initialized in __init__ with real datastore_path 'webdriver_delay': None , # Extra delay in seconds before extracting text 'ui': { 'use_page_title_in_list': True, 'open_diff_in_new_tab': True, 'socket_io_enabled': True, 'favicons_enabled': True }, } } } def __init__(self, *arg, datastore_path=None, **kw): super(model, self).__init__(*arg, **kw) # Capture any tags data passed in before base_config overwrites the structure existing_tags = self.get('settings', {}).get('application', {}).get('tags') or {} # CRITICAL: deepcopy to avoid sharing mutable objects between instances self.update(deepcopy(self.base_config)) # TagsDict requires the real datastore_path at runtime (cannot be set at class-definition time) if datastore_path is None: raise ValueError("App.model() requires 'datastore_path' keyword argument") self['settings']['application']['tags'] = TagsDict(existing_tags, datastore_path=datastore_path) def parse_headers_from_text_file(filepath): headers = {} with open(filepath, 'r', encoding='utf-8') as f: for l in f.readlines(): l = l.strip() if not l.startswith('#') and ':' in l: (k, v) = l.split(':', 1) # Split only on the first colon headers[k.strip()] = v.strip() return headers ================================================ FILE: changedetectionio/model/Tag.py ================================================ """ Tag/Group domain model for organizing and overriding watch settings. ARCHITECTURE NOTE: Configuration Override Hierarchy =================================================== Tags can override Watch settings when overrides_watch=True. Current implementation requires manual checking in processors: for tag_uuid in watch.get('tags'): tag = datastore['settings']['application']['tags'][tag_uuid] if tag.get('overrides_watch'): restock_settings = tag.get('restock_settings', {}) break With Pydantic, this would be automatic via chain resolution: Watch → Tag (first with overrides_watch) → Global See: Watch.py model docstring for full Pydantic architecture explanation See: processors/restock_diff/processor.py:184-192 for current manual implementation """ from changedetectionio.model import watch_base from changedetectionio.model.persistence import EntityPersistenceMixin class model(EntityPersistenceMixin, watch_base): """ Tag domain model - groups watches and can override their settings. Tags inherit from watch_base to reuse all the same fields as Watch. When overrides_watch=True, tag settings take precedence over watch settings for all watches in this tag/group. Fields: overrides_watch (bool): If True, this tag's settings override watch settings title (str): Display name for this tag/group uuid (str): Unique identifier ... (all fields from watch_base can be set as tag-level overrides) Resolution order when overrides_watch=True: Watch.field → Tag.field (if overrides_watch) → Global.field """ def __init__(self, *arg, **kw): # Parent class (watch_base) handles __datastore and __datastore_path super(model, self).__init__(*arg, **kw) self['overrides_watch'] = kw.get('default', {}).get('overrides_watch') if kw.get('default'): self.update(kw['default']) del kw['default'] # _save_to_disk() method provided by EntityPersistenceMixin # commit() and _get_commit_data() methods inherited from watch_base # Tag uses default _get_commit_data() (includes all keys) ================================================ FILE: changedetectionio/model/Tags.py ================================================ import os import shutil from pathlib import Path from loguru import logger _SENTINEL = object() class TagsDict(dict): """Dict subclass that removes the corresponding tag.json file when a tag is deleted.""" def __init__(self, *args, datastore_path: str | os.PathLike, **kwargs) -> None: self._datastore_path = Path(datastore_path) super().__init__(*args, **kwargs) def __delitem__(self, key: str) -> None: super().__delitem__(key) tag_dir = self._datastore_path / key tag_json_file = tag_dir / "tag.json" if not os.path.exists(tag_json_file): logger.critical(f"Aborting deletion of directory '{tag_dir}' because '{tag_json_file}' does not exist.") return try: shutil.rmtree(tag_dir) logger.info(f"Deleted tag directory for tag {key!r}") except FileNotFoundError: pass except OSError as e: logger.error(f"Failed to delete tag directory for tag {key!r}: {e}") def pop(self, key: str, default=_SENTINEL): """Remove and return tag, deleting its tag.json file. Raises KeyError if missing and no default given.""" if key in self: value = self[key] del self[key] return value if default is _SENTINEL: raise KeyError(key) return default ================================================ FILE: changedetectionio/model/Watch.py ================================================ """ Watch domain model for change detection monitoring. ARCHITECTURE NOTE: Configuration Override Hierarchy =================================================== This module implements Watch objects that inherit from dict (technical debt). The dream architecture would use Pydantic for: 1. CHAIN RESOLUTION (Watch → Tag → Global Settings) - Current: Manual resolution scattered across codebase - Future: @computed_field properties with automatic resolution - Examples: resolved_fetch_backend, resolved_restock_settings, etc. 2. DATABASE BACKEND ABSTRACTION - Current: Domain model tightly coupled to file-based JSON storage - Future: Domain model (Pydantic) separate from persistence layer - Enables: Easy migration to PostgreSQL, MongoDB, etc. 3. TYPE SAFETY & VALIDATION - Current: Dict access with no compile-time checks - Future: Type hints, IDE autocomplete, validation at boundaries See class model docstring for detailed explanation and examples. See: processors/restock_diff/processor.py:184-192 for manual resolution example """ from blinker import signal from changedetectionio.validate_url import is_safe_valid_url from changedetectionio.strtobool import strtobool from changedetectionio.jinja2_custom import render as jinja_render from . import watch_base from .persistence import EntityPersistenceMixin import os import re from pathlib import Path from loguru import logger from .. import jinja2_custom as safe_jinja from ..html_tools import TRANSLATE_WHITESPACE_TABLE FAVICON_RESAVE_THRESHOLD_SECONDS=86400 BROTLI_COMPRESS_SIZE_THRESHOLD = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024*20)) # Module-level favicon filename cache: data_dir → basename (or None) # Keyed by data_dir so it survives Watch object recreation, deepcopy, and concurrent requests. # Invalidated explicitly in bump_favicon() when a new favicon is saved. _FAVICON_FILENAME_CACHE: dict = {} minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)) mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7} def _brotli_save(contents, filepath, mode=None, fallback_uncompressed=False): """ Save compressed data using native brotli with streaming compression. Uses chunked compression to minimize peak memory usage and malloc_trim() to force release of C-level memory back to the OS. Args: contents: data to compress (str or bytes) filepath: destination file path mode: brotli compression mode (e.g., brotli.MODE_TEXT) fallback_uncompressed: if True, save uncompressed on failure; if False, raise exception Returns: str: actual filepath saved (may differ from input if fallback used) Raises: Exception: if compression fails and fallback_uncompressed is False """ import brotli import gc import ctypes # Ensure contents are bytes if isinstance(contents, str): contents = contents.encode('utf-8') try: original_size = len(contents) logger.debug(f"Starting brotli streaming compression of {original_size} bytes.") # Create streaming compressor compressor = brotli.Compressor(quality=6, mode=mode if mode is not None else brotli.MODE_GENERIC) # Stream compress in chunks to minimize memory usage chunk_size = 65536 # 64KB chunks total_compressed_size = 0 with open(filepath, 'wb') as f: # Process data in chunks offset = 0 while offset < len(contents): chunk = contents[offset:offset + chunk_size] compressed_chunk = compressor.process(chunk) if compressed_chunk: f.write(compressed_chunk) total_compressed_size += len(compressed_chunk) offset += chunk_size # Finalize compression - critical for proper cleanup final_chunk = compressor.finish() if final_chunk: f.write(final_chunk) total_compressed_size += len(final_chunk) logger.debug(f"Finished brotli compression - From {original_size} to {total_compressed_size} bytes.") # Cleanup: Delete compressor, force Python GC, then force C-level memory release del compressor gc.collect() # Force release of C-level memory back to OS (since brotli is a C library) try: ctypes.CDLL('libc.so.6').malloc_trim(0) except Exception: pass # malloc_trim not available on all systems (e.g., macOS) return filepath except Exception as e: logger.error(f"Brotli compression error: {e}") # Compression failed if fallback_uncompressed: logger.warning(f"Brotli compression failed for {filepath}, saving uncompressed") fallback_path = filepath.replace('.br', '') with open(fallback_path, 'wb') as f: f.write(contents) return fallback_path else: raise Exception(f"Brotli compression failed for {filepath}: {e}") class model(EntityPersistenceMixin, watch_base): """ Watch domain model for monitoring URL changes. Inherits from watch_base (which inherits dict) - see watch_base docstring for field documentation. ## Configuration Override Hierarchy (Chain Resolution) The dream architecture uses a 3-level resolution chain: Watch settings → Tag/Group settings → Global settings Current implementation is MANUAL (see processor.py:184-192 for example): - Processors manually check watch.get('field') - Then loop through watch.tags to find first tag with overrides_watch=True - Finally fall back to datastore['settings']['application']['field'] FUTURE: Pydantic-based chain resolution would enable: ```python # Instead of manual resolution in every processor: restock_settings = watch.get('restock_settings', {}) for tag_uuid in watch.get('tags'): tag = datastore['settings']['application']['tags'][tag_uuid] if tag.get('overrides_watch'): restock_settings = tag.get('restock_settings', {}) break # Clean computed properties with automatic resolution: @computed_field def resolved_restock_settings(self) -> dict: if self.restock_settings: return self.restock_settings for tag_uuid in self.tags: tag = self._datastore.get_tag(tag_uuid) if tag.overrides_watch and tag.restock_settings: return tag.restock_settings return self._datastore.settings.restock_settings or {} # Usage: watch.resolved_restock_settings (automatic, type-safe, tested once) ``` Benefits of Pydantic migration: 1. Single source of truth for resolution logic (not scattered across processors) 2. Type safety + IDE autocomplete (watch.resolved_fetch_backend vs dict navigation) 3. Database backend abstraction (domain model separate from persistence) 4. Automatic validation at boundaries 5. Self-documenting via type hints 6. Easy to test resolution independently Resolution chain examples that would benefit: - fetch_backend: watch → tag → global (see get_fetch_backend property) - notification_urls: watch → tag → global - time_between_check: watch → global (see threshold_seconds) - restock_settings: watch → tag (see processors/restock_diff/processor.py:184-192) - history_snapshot_max_length: watch → global (see save_history_blob:550-556) - All processor_config_* settings could use tag overrides ## Database Backend Abstraction with Pydantic Current: Watch inherits dict, tightly coupled to file-based JSON storage Future: Domain model (Watch) separate from persistence layer ```python # Domain model (database-agnostic) class Watch(BaseModel): uuid: str url: str # ... validation, business logic # Pluggable backends class DataStoreBackend(ABC): def save_watch(self, watch: Watch): ... def load_watch(self, uuid: str) -> Watch: ... # Implementations: FileBackend, MongoBackend, PostgresBackend, etc. ``` This would enable: - Easy migration between storage backends (file → postgres → mongodb) - Pydantic handles serialization/deserialization automatically - Domain logic stays clean (no storage concerns in Watch methods) ## Migration Path Given existing codebase, incremental migration recommended: 1. Create Pydantic models alongside existing dict-based models 2. Add .to_pydantic() / .from_pydantic() bridge methods 3. Gradually migrate code to use Pydantic models 4. Remove dict inheritance once migration complete See: watch_base docstring for technical debt discussion See: processors/restock_diff/processor.py:184-192 for manual resolution example See: Watch.py:550-556 for nested dict navigation that would become watch.resolved_* """ __newest_history_key = None __history_n = 0 jitter_seconds = 0 def __init__(self, *arg, **kw): # Validate __datastore before calling parent (Watch requires it) if not kw.get('__datastore'): raise ValueError("Watch object requires '__datastore' reference - cannot access global settings without it") # Parent class (watch_base) handles __datastore and __datastore_path super(model, self).__init__(*arg, **kw) if kw.get('default'): self.update(kw['default']) del kw['default'] if self.get('default'): del self['default'] # Be sure the cached timestamp is ready bump = self.history # Note: __deepcopy__, __getstate__, and __setstate__ are inherited from watch_base # This prevents memory leaks by sharing __datastore reference instead of copying it @property def viewed(self): # Don't return viewed when last_viewed is 0 and newest_key is 0 if int(self['last_viewed']) and int(self['last_viewed']) >= int(self.newest_history_key) : return True return False @property def has_unviewed(self): return int(self.newest_history_key) > int(self['last_viewed']) and self.__history_n >= 2 @property def link(self): url = self.get('url', '') if not is_safe_valid_url(url): return 'DISABLED' ready_url = url if '{%' in url or '{{' in url: # Jinja2 available in URLs along with https://pypi.org/project/jinja2-time/ try: ready_url = jinja_render(template_str=url) except Exception as e: logger.critical(f"Invalid URL template for: '{url}' - {str(e)}") from flask import flash, url_for from markupsafe import Markup message = Markup('The URL {} is invalid and cannot be used, click to edit'.format( url_for('ui.ui_edit.edit_page', uuid=self.get('uuid')), self.get('url', ''))) flash(message, 'error') return '' if ready_url.startswith('source:'): ready_url=ready_url.replace('source:', '') # Also double check it after any Jinja2 formatting just incase if not is_safe_valid_url(ready_url): return 'DISABLED' return ready_url @property def domain_only_from_link(self): from urllib.parse import urlparse parsed = urlparse(self.link) domain = parsed.hostname return domain @property def history_index_filename(self): # So that you dont try to view different histories in different 'diff' setups, can confuse cdio. processor = self.get('processor') if not processor or self.get('processor') == 'text_json_diff': return 'history.txt' else: return f'history-{processor}.txt' def clear_watch(self): import pathlib # Get list of processor config files to preserve from changedetectionio.processors import find_processors processor_names = [name for cls, name in find_processors()] processor_config_files = {f"{name}.json" for name in processor_names} # JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc # But preserve processor config files (they're configuration, not history data) # Use glob not rglob here for safety. for item in pathlib.Path(str(self.data_dir)).glob("*.*"): # Skip processor config files if item.name in processor_config_files: continue os.unlink(item) # Force the attr to recalculate bump = self.history # Do this last because it will trigger a recheck due to last_checked being zero self.update({ 'browser_steps_last_error_step': None, 'check_count': 0, 'fetch_time': 0.0, 'has_ldjson_price_data': None, 'last_checked': 0, 'last_error': False, 'last_notification_error': False, 'last_viewed': 0, 'previous_md5': False, 'remote_server_reply': None, 'track_ldjson_price_data': None }) watch_check_update = signal('watch_check_update') if watch_check_update: watch_check_update.send(watch_uuid=self.get('uuid')) return @property def is_source_type_url(self): return self.get('url', '').startswith('source:') @property def get_fetch_backend(self): """ Get the fetch backend for this watch with special case handling. CHAIN RESOLUTION OPPORTUNITY: Currently returns watch.fetch_backend directly, but doesn't implement Watch → Tag → Global resolution chain. With Pydantic: @computed_field def resolved_fetch_backend(self) -> str: # Special case: PDFs always use html_requests if self.is_pdf: return 'html_requests' # Watch override if self.fetch_backend and self.fetch_backend != 'system': return self.fetch_backend # Tag override (first tag with overrides_watch=True wins) for tag_uuid in self.tags: tag = self._datastore.get_tag(tag_uuid) if tag.overrides_watch and tag.fetch_backend: return tag.fetch_backend # Global default return self._datastore.settings.fetch_backend """ # Maybe also if is_image etc? # This is because chrome/playwright wont render the PDF in the browser and we will just fetch it and use pdf2html to see the text. if self.is_pdf: return 'html_requests' return self.get('fetch_backend') @property def fetcher_supports_screenshots(self): """Return True if the fetcher configured for this watch supports screenshots. Resolves 'system' via self._datastore, then checks supports_screenshots on the actual fetcher class. Works for built-in and plugin fetchers alike. """ from changedetectionio import content_fetchers fetcher_name = self.get_fetch_backend # already handles is_pdf → html_requests if not fetcher_name or fetcher_name == 'system': fetcher_name = self._datastore['settings']['application'].get('fetch_backend', 'html_requests') fetcher_class = getattr(content_fetchers, fetcher_name, None) if fetcher_class is None: return False return bool(getattr(fetcher_class, 'supports_screenshots', False)) @property def is_pdf(self): url = str(self.get("url") or "").lower() content_type = str(self.get("content-type") or "").lower() if content_type in ("none", "null", ""): content_type = "" return ( url.endswith(".pdf") or content_type.split(";")[0].strip() == "application/pdf" ) @property def label(self): # Used for sorting, display, etc return self.get('title') or self.get('page_title') or self.link @property def last_changed(self): # last_changed will be the newest snapshot, but when we have just one snapshot, it should be 0 if self.__history_n <= 1: return 0 if self.__newest_history_key: return int(self.__newest_history_key) return 0 @property def history_n(self): return self.__history_n @property def history(self): """History index is just a text file as a list {watch-uuid}/history.txt contains a list like {epoch-time},{filename}\n We read in this list as the history information """ tmp_history = {} # In the case we are only using the watch for processing without history if not self.data_dir: return [] # Read the history file as a dict fname = os.path.join(self.data_dir, self.history_index_filename) if os.path.isfile(fname): logger.debug(f"Reading watch history index for {self.get('uuid')}") with open(fname, "r", encoding='utf-8') as f: for i in f.readlines(): if ',' in i: k, v = i.strip().split(',', 2) # The index history could contain a relative path, so we need to make the fullpath # so that python can read it # Cross-platform: check for any path separator (works on Windows and Unix) if os.sep not in v and '/' not in v and '\\' not in v: # Relative filename only, no path separators v = os.path.join(self.data_dir, v) else: # It's possible that they moved the datadir on older versions # So the snapshot exists but is in a different path # Cross-platform: use os.path.basename instead of split('/') snapshot_fname = os.path.basename(v) proposed_new_path = os.path.join(self.data_dir, snapshot_fname) if not os.path.exists(v) and os.path.exists(proposed_new_path): v = proposed_new_path tmp_history[k] = v if len(tmp_history): self.__newest_history_key = list(tmp_history.keys())[-1] else: self.__newest_history_key = None self.__history_n = len(tmp_history) return tmp_history @property def has_history(self): fname = os.path.join(self.data_dir, self.history_index_filename) return os.path.isfile(fname) @property def has_browser_steps(self): has_browser_steps = self.get('browser_steps') and list(filter( lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one' and s['operation'] != 'Goto site'), self.get('browser_steps'))) return has_browser_steps @property def has_restock_info(self): if self.get('restock') and self['restock'].get('in_stock') != None: return True return False # Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0. @property def newest_history_key(self): if self.__newest_history_key is not None: return self.__newest_history_key if len(self.history) <= 1: return 0 bump = self.history return self.__newest_history_key # Given an arbitrary timestamp, find the best history key for the [diff] button so it can preset a smarter from_version @property def get_from_version_based_on_last_viewed(self): """Unfortunately for now timestamp is stored as string key""" keys = list(self.history.keys()) if not keys: return None if len(keys) == 1: return keys[0] last_viewed = int(self.get('last_viewed')) sorted_keys = sorted(keys, key=lambda x: int(x)) sorted_keys.reverse() # When the 'last viewed' timestamp is greater than or equal the newest snapshot, return second newest if last_viewed >= int(sorted_keys[0]): return sorted_keys[1] # When the 'last viewed' timestamp is between snapshots, return the older snapshot for newer, older in list(zip(sorted_keys[0:], sorted_keys[1:])): if last_viewed < int(newer) and last_viewed >= int(older): return older # When the 'last viewed' timestamp is less than the oldest snapshot, return oldest return sorted_keys[-1] def get_history_snapshot(self, timestamp=None, filepath=None): """ Accepts either timestamp or filepath :param timestamp: :param filepath: :return: """ import brotli if not filepath: filepath = self.history[timestamp] # Check if binary file (image, PDF, etc.) # Binary files are NEVER saved with .br compression, only text files are binary_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.webp', '.pdf', '.bin', '.jfif') is_binary = any(filepath.endswith(ext) for ext in binary_extensions) # Only look for .br versions for text files if not is_binary: # See if a brotli version exists and switch to that (text files only) if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"): filepath = f"{filepath}.br" # OR in the backup case that the .br does not exist, but the plain one does if filepath.endswith('.br') and not os.path.isfile(filepath): if os.path.isfile(filepath.replace('.br', '')): filepath = filepath.replace('.br', '') # Handle .br compressed text files if filepath.endswith('.br'): # Brotli doesnt have a fileheader to detect it, so we rely on filename # https://www.rfc-editor.org/rfc/rfc7932 # Note: .br should ONLY exist for text files, never binary with open(filepath, 'rb') as f: return brotli.decompress(f.read()).decode('utf-8') # Binary file - return raw bytes if is_binary: with open(filepath, 'rb') as f: return f.read() # Text file - decode to string with open(filepath, 'r', encoding='utf-8', errors='ignore') as f: return f.read() def _write_atomic(self, dest, data, mode='wb'): """Write data atomically to dest using a temp file""" import tempfile with tempfile.NamedTemporaryFile(mode, delete=False, dir=self.data_dir) as tmp: tmp.write(data) tmp.flush() os.fsync(tmp.fileno()) tmp_path = tmp.name os.replace(tmp_path, dest) def history_trim(self, newest_n_items): from pathlib import Path import gc # Sort by timestamp (key) sorted_items = sorted(self.history.items(), key=lambda x: int(x[0])) keep_part = dict(sorted_items[-newest_n_items:]) delete_part = dict(sorted_items[:-newest_n_items]) logger.info( f"[{self.get('uuid')}] Trimming history to most recent {newest_n_items} items, keeping {len(keep_part)} items deleting {len(delete_part)} items.") if delete_part: for item in delete_part.items(): try: Path(item[1]).unlink(missing_ok=True) except Exception as e: logger.critical(f"{str(e)}") finally: logger.debug(f"[{self.get('uuid')}] Deleted {item[1]} history snapshot") try: dest = os.path.join(self.data_dir, self.history_index_filename) output = "\r\n".join( f"{k},{Path(v).name}" for k, v in keep_part.items() )+"\r\n" self._write_atomic(dest=dest, data=output, mode='w') except Exception as e: logger.critical(f"{str(e)}") finally: logger.debug(f"[{self.get('uuid')}] Updated history index {dest}") # reimport bump = self.history gc.collect() # Save some text file to the appropriate path and bump the history # result_obj from fetch_site_status.run() def save_history_blob(self, contents, timestamp, snapshot_id): logger.trace(f"{self.get('uuid')} - Updating {self.history_index_filename} with timestamp {timestamp}") self.ensure_data_dir_exists() skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False')) # Binary data - detect file type and save without compression if isinstance(contents, bytes): try: import puremagic detections = puremagic.magic_string(contents[:2048]) ext = detections[0].extension if detections else 'bin' # Strip leading dot if present (puremagic returns extensions like '.jfif') ext = ext.lstrip('.') if detections: logger.trace(f"Detected file type: {detections[0].mime_type} -> extension: {ext}") except Exception as e: logger.warning(f"puremagic detection failed: {e}, using 'bin' extension") ext = 'bin' snapshot_fname = f"{snapshot_id}.{ext}" dest = os.path.join(self.data_dir, snapshot_fname) self._write_atomic(dest, contents) logger.trace(f"Saved binary snapshot as {snapshot_fname} ({len(contents)} bytes)") # Text data - use brotli compression if enabled and above threshold else: if not skip_brotli and len(contents) > BROTLI_COMPRESS_SIZE_THRESHOLD: # Compressed text import brotli snapshot_fname = f"{snapshot_id}.txt.br" dest = os.path.join(self.data_dir, snapshot_fname) if not os.path.exists(dest): try: actual_dest = _brotli_save(contents, dest, mode=brotli.MODE_TEXT, fallback_uncompressed=True) if actual_dest != dest: snapshot_fname = os.path.basename(actual_dest) except Exception as e: logger.error(f"{self.get('uuid')} - Brotli compression failed: {e}") # Fallback to uncompressed snapshot_fname = f"{snapshot_id}.txt" dest = os.path.join(self.data_dir, snapshot_fname) self._write_atomic(dest, contents.encode('utf-8')) else: # Plain text snapshot_fname = f"{snapshot_id}.txt" dest = os.path.join(self.data_dir, snapshot_fname) self._write_atomic(dest, contents.encode('utf-8')) # Append to history.txt atomically index_fname = os.path.join(self.data_dir, self.history_index_filename) index_line = f"{timestamp},{snapshot_fname}\n" with open(index_fname, 'a', encoding='utf-8') as f: f.write(index_line) f.flush() os.fsync(f.fileno()) # Update internal state self.__newest_history_key = timestamp self.__history_n += 1 # MANUAL CHAIN RESOLUTION: Watch → Global # With Pydantic, this would become: maxlen = watch.resolved_history_snapshot_max_length # @computed_field def resolved_history_snapshot_max_length(self) -> Optional[int]: # if self.history_snapshot_max_length: return self.history_snapshot_max_length # if tag := self._get_override_tag(): return tag.history_snapshot_max_length # return self._datastore.settings.history_snapshot_max_length maxlen = self.get('history_snapshot_max_length') or self.get_global_setting('application', 'history_snapshot_max_length') if maxlen and self.__history_n and self.__history_n > maxlen: self.history_trim(newest_n_items=maxlen) # @todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status return snapshot_fname @property def has_empty_checktime(self): # using all() + dictionary comprehension # Check if all values are 0 in dictionary res = all(x == None or x == False or x==0 for x in self.get('time_between_check', {}).values()) return res def threshold_seconds(self): seconds = 0 for m, n in mtable.items(): x = self.get('time_between_check', {}).get(m, None) if x: seconds += x * n return seconds # Iterate over all history texts and see if something new exists # Always applying .strip() to start/end but optionally replace any other whitespace def lines_contain_something_unique_compared_to_history(self, lines: list, ignore_whitespace=False): local_lines = set([]) if lines: if ignore_whitespace: if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk local_lines = set([l.translate(TRANSLATE_WHITESPACE_TABLE).lower() for l in lines]) else: local_lines = set([l.decode('utf-8').translate(TRANSLATE_WHITESPACE_TABLE).lower() for l in lines]) else: if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk local_lines = set([l.strip().lower() for l in lines]) else: local_lines = set([l.decode('utf-8').strip().lower() for l in lines]) # Compare each lines (set) against each history text file (set) looking for something new.. existing_history = set({}) for k, v in self.history.items(): content = self.get_history_snapshot(filepath=v) if ignore_whitespace: alist = set([line.translate(TRANSLATE_WHITESPACE_TABLE).lower() for line in content.splitlines()]) else: alist = set([line.strip().lower() for line in content.splitlines()]) existing_history = existing_history.union(alist) # Check that everything in local_lines(new stuff) already exists in existing_history - it should # if not, something new happened return not local_lines.issubset(existing_history) def get_screenshot(self): fname = os.path.join(self.data_dir, "last-screenshot.png") if os.path.isfile(fname): return fname # False is not an option for AppRise, must be type None return None def favicon_is_expired(self): favicon_fname = self.get_favicon_filename() import glob import time if not favicon_fname: return True try: fname = next(iter(glob.glob(os.path.join(self.data_dir, "favicon.*"))), None) logger.trace(f"Favicon file maybe found at {fname}") if os.path.isfile(fname): file_age = int(time.time() - os.path.getmtime(fname)) logger.trace(f"Favicon file age is {file_age}s") if file_age < FAVICON_RESAVE_THRESHOLD_SECONDS: return False except Exception as e: logger.critical(f"Exception checking Favicon age {str(e)}") return True # Also in the case that the file didnt exist return True def bump_favicon(self, url, favicon_base_64: str) -> None: from urllib.parse import urlparse import base64 import binascii decoded = None if url: try: parsed = urlparse(url) filename = os.path.basename(parsed.path) (base, extension) = filename.lower().strip().rsplit('.', 1) except ValueError: logger.error(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}'") return None else: # Assume favicon.ico base = "favicon" extension = "ico" fname = os.path.join(self.data_dir, f"favicon.{extension}") try: # validate=True makes sure the string only contains valid base64 chars decoded = base64.b64decode(favicon_base_64, validate=True) except (binascii.Error, ValueError) as e: logger.warning(f"UUID: {self.get('uuid')} FavIcon save data (Base64) corrupt? {str(e)}") else: if decoded: try: with open(fname, 'wb') as f: f.write(decoded) # Invalidate module-level favicon filename cache for this watch _FAVICON_FILENAME_CACHE.pop(self.data_dir, None) # A signal that could trigger the socket server to update the browser also watch_check_update = signal('watch_favicon_bump') if watch_check_update: watch_check_update.send(watch_uuid=self.get('uuid')) except Exception as e: logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}") # @todo - Store some checksum and only write when its different logger.debug(f"UUID: {self.get('uuid')} updated favicon to at {fname}") def get_favicon_filename(self) -> str | None: """ Find any favicon.* file in the watch data directory. Uses a module-level cache keyed by data_dir to survive Watch object recreation, deepcopy (which drops instance attrs), and concurrent request races. Invalidated by bump_favicon() when a new favicon is saved. Returns: str: Basename of the favicon file, or None if not found. """ if self.data_dir in _FAVICON_FILENAME_CACHE: return _FAVICON_FILENAME_CACHE[self.data_dir] import glob files = glob.glob(os.path.join(self.data_dir, "favicon.*")) fname = os.path.basename(files[0]) if files else None _FAVICON_FILENAME_CACHE[self.data_dir] = fname return fname def get_screenshot_as_thumbnail(self, max_age=3200): """Return path to a square thumbnail of the most recent screenshot. Creates a 150x150 pixel thumbnail from the top portion of the screenshot. Args: max_age: Maximum age in seconds before recreating thumbnail Returns: Path to thumbnail or None if no screenshot exists """ import os import time thumbnail_path = os.path.join(self.data_dir, "thumbnail.jpeg") top_trim = 500 # Pixels from top of screenshot to use screenshot_path = self.get_screenshot() if not screenshot_path: return None # Reuse thumbnail if it's fresh and screenshot hasn't changed if os.path.isfile(thumbnail_path): thumbnail_mtime = os.path.getmtime(thumbnail_path) screenshot_mtime = os.path.getmtime(screenshot_path) if screenshot_mtime <= thumbnail_mtime and time.time() - thumbnail_mtime < max_age: return thumbnail_path try: from PIL import Image with Image.open(screenshot_path) as img: # Crop top portion first (full width, top_trim height) top_crop_height = min(top_trim, img.height) img = img.crop((0, 0, img.width, top_crop_height)) # Create a smaller intermediate image (to reduce memory usage) aspect = img.width / img.height interim_width = min(top_trim, img.width) interim_height = int(interim_width / aspect) if aspect > 0 else top_trim img = img.resize((interim_width, interim_height), Image.NEAREST) # Convert to RGB if needed if img.mode != 'RGB': img = img.convert('RGB') # Crop to square from top center square_size = min(img.width, img.height) left = (img.width - square_size) // 2 img = img.crop((left, 0, left + square_size, square_size)) # Final resize to exact thumbnail size with better filter img = img.resize((350, 350), Image.BILINEAR) # Save with optimized settings img.save(thumbnail_path, "JPEG", quality=75, optimize=True) return thumbnail_path except Exception as e: logger.error(f"Error creating thumbnail for {self.get('uuid')}: {str(e)}") return None def __get_file_ctime(self, filename): fname = os.path.join(self.data_dir, filename) if os.path.isfile(fname): return int(os.path.getmtime(fname)) return False @property def error_text_ctime(self): return self.__get_file_ctime('last-error.txt') @property def snapshot_text_ctime(self): if self.history_n==0: return False timestamp = list(self.history.keys())[-1] return int(timestamp) @property def snapshot_screenshot_ctime(self): return self.__get_file_ctime('last-screenshot.png') @property def snapshot_error_screenshot_ctime(self): return self.__get_file_ctime('last-error-screenshot.png') def get_error_text(self): """Return the text saved from a previous request that resulted in a non-200 error""" fname = os.path.join(self.data_dir, "last-error.txt") if os.path.isfile(fname): with open(fname, 'r', encoding='utf-8') as f: return f.read() return False def get_error_snapshot(self): """Return path to the screenshot that resulted in a non-200 error""" fname = os.path.join(self.data_dir, "last-error-screenshot.png") if os.path.isfile(fname): return fname return False def pause(self): self['paused'] = True def unpause(self): self['paused'] = False def toggle_pause(self): self['paused'] ^= True def mute(self): self['notification_muted'] = True def unmute(self): self['notification_muted'] = False def toggle_mute(self): self['notification_muted'] ^= True def _get_commit_data(self): """ Prepare watch data for commit. Excludes processor_config_* keys (stored in separate files). Normalizes browser_steps to empty list if no meaningful steps. """ import copy # Get base snapshot with lock lock = self._datastore.lock if self._datastore and hasattr(self._datastore, 'lock') else None if lock: with lock: snapshot = dict(self) else: snapshot = dict(self) # Exclude processor config keys (stored separately) watch_dict = {k: copy.deepcopy(v) for k, v in snapshot.items() if not k.startswith('processor_config_')} # Normalize browser_steps: if no meaningful steps, save as empty list if not self.has_browser_steps: watch_dict['browser_steps'] = [] return watch_dict # _save_to_disk() method provided by EntityPersistenceMixin # commit() method inherited from watch_base def extra_notification_token_values(self): # Used for providing extra tokens # return {'widget': 555} return {} def extra_notification_token_placeholder_info(self): # Used for providing extra tokens # return [('widget', "Get widget amounts")] return [] def extract_regex_from_all_history(self, regex): import csv import re import datetime csv_output_filename = False csv_writer = False f = None # self.history will be keyed with the full path for k, fname in self.history.items(): if os.path.isfile(fname): if True: contents = self.get_history_snapshot(timestamp=k) res = re.findall(regex, contents, re.MULTILINE) if res: if not csv_writer: # A file on the disk can be transferred much faster via flask than a string reply csv_output_filename = f"report-{self.get('uuid')}.csv" f = open(os.path.join(self.data_dir, csv_output_filename), 'w') # @todo some headers in the future #fieldnames = ['Epoch seconds', 'Date'] csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL, #fieldnames=fieldnames ) csv_writer.writerow(['Epoch seconds', 'Date']) # csv_writer.writeheader() date_str = datetime.datetime.fromtimestamp(int(k)).strftime('%Y-%m-%d %H:%M:%S') for r in res: row = [k, date_str] if isinstance(r, str): row.append(r) else: row+=r csv_writer.writerow(row) if f: f.close() return csv_output_filename def has_special_diff_filter_options_set(self): # All False - nothing would be done, so act like it's not processable if not self.get('filter_text_added', True) and not self.get('filter_text_replaced', True) and not self.get('filter_text_removed', True): return False # Or one is set if not self.get('filter_text_added', True) or not self.get('filter_text_replaced', True) or not self.get('filter_text_removed', True): return True # None is set return False def save_error_text(self, contents): self.ensure_data_dir_exists() target_path = os.path.join(self.data_dir, "last-error.txt") with open(target_path, 'w', encoding='utf-8') as f: f.write(contents) def save_xpath_data(self, data, as_error=False): import json import zlib if as_error: target_path = os.path.join(str(self.data_dir), "elements-error.deflate") else: target_path = os.path.join(str(self.data_dir), "elements.deflate") self.ensure_data_dir_exists() with open(target_path, 'wb') as f: if not isinstance(data, str): f.write(zlib.compress(json.dumps(data).encode())) else: f.write(zlib.compress(data.encode())) f.close() # Save as PNG, PNG is larger but better for doing visual diff in the future def save_screenshot(self, screenshot: bytes, as_error=False): if as_error: target_path = os.path.join(self.data_dir, "last-error-screenshot.png") else: target_path = os.path.join(self.data_dir, "last-screenshot.png") self.ensure_data_dir_exists() with open(target_path, 'wb') as f: f.write(screenshot) f.close() def get_last_fetched_text_before_filters(self): import brotli filepath = os.path.join(self.data_dir, 'last-fetched.br') if not os.path.isfile(filepath) or os.path.getsize(filepath) == 0: # If a previous attempt doesnt yet exist, just snarf the previous snapshot instead dates = list(self.history.keys()) if len(dates): return self.get_history_snapshot(timestamp=dates[-1]) else: return '' with open(filepath, 'rb') as f: return(brotli.decompress(f.read()).decode('utf-8')) def save_last_text_fetched_before_filters(self, contents): import brotli filepath = os.path.join(self.data_dir, 'last-fetched.br') _brotli_save(contents, filepath, mode=brotli.MODE_TEXT, fallback_uncompressed=False) def save_last_fetched_html(self, timestamp, contents): self.ensure_data_dir_exists() snapshot_fname = f"{timestamp}.html.br" filepath = os.path.join(self.data_dir, snapshot_fname) _brotli_save(contents, filepath, mode=None, fallback_uncompressed=True) self._prune_last_fetched_html_snapshots() def get_fetched_html(self, timestamp): import brotli snapshot_fname = f"{timestamp}.html.br" filepath = os.path.join(self.data_dir, snapshot_fname) if os.path.isfile(filepath): with open(filepath, 'rb') as f: return (brotli.decompress(f.read()).decode('utf-8')) return False def _prune_last_fetched_html_snapshots(self): dates = list(self.history.keys()) dates.reverse() for index, timestamp in enumerate(dates): snapshot_fname = f"{timestamp}.html.br" filepath = os.path.join(self.data_dir, snapshot_fname) # Keep only the first 2 if index > 1 and os.path.isfile(filepath): os.remove(filepath) @property def get_browsersteps_available_screenshots(self): "For knowing which screenshots are available to show the user in BrowserSteps UI" available = [] for f in Path(self.data_dir).glob('step_before-*.jpeg'): step_n=re.search(r'step_before-(\d+)', f.name) if step_n: available.append(step_n.group(1)) return available def compile_error_texts(self, has_proxies=None): """Compile error texts for this watch. Accepts has_proxies parameter to ensure it works even outside app context""" from flask import url_for, has_request_context from markupsafe import Markup output = [] # Initialize as list since we're using append last_error = self.get('last_error','') has_app_context = has_request_context() # has app+request context, we can use url_for() if has_app_context: if last_error: last_error = safe_jinja.render_fully_escaped(last_error) if '403' in last_error: if has_proxies: output.append(str(Markup(f"{last_error} - Try other proxies/location '"))) else: output.append(str(Markup(f"{last_error} - Try adding external proxies/locations '"))) else: output.append(str(Markup(last_error))) if self.get('last_notification_error'): txt = safe_jinja.render_fully_escaped(self.get('last_notification_error')) result = f'' output.append(result) else: # Lo_Fi version - no app context, cant rely on Jinja2 Markup if last_error: output.append(safe_jinja.render_fully_escaped(last_error)) if self.get('last_notification_error'): output.append(safe_jinja.render_fully_escaped(self.get('last_notification_error'))) res = "\n".join(output) return res ================================================ FILE: changedetectionio/model/__init__.py ================================================ import os import uuid from changedetectionio import strtobool from .persistence import EntityPersistenceMixin, _determine_entity_type __all__ = ['EntityPersistenceMixin', 'watch_base'] from ..browser_steps.browser_steps import browser_steps_get_valid_steps USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default' CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL' class watch_base(dict): """ Base watch domain model (inherits from dict for backward compatibility). WARNING: This class inherits from dict, which violates proper encapsulation. Dict inheritance is legacy technical debt that should be refactored to a proper domain model (e.g., Pydantic BaseModel) for better type safety and validation. TODO: Migrate to Pydantic BaseModel for: - Type safety and IDE autocomplete - Automatic validation - Clear separation between domain model and serialization - Database backend abstraction (file → postgres → mongodb) - Configuration override chain resolution (Watch → Tag → Global) - Immutability options - Better testing - USE https://docs.pydantic.dev/latest/integrations/datamodel_code_generator TO BUILD THE MODEL FROM THE API-SPEC!!! CHAIN RESOLUTION ARCHITECTURE: The dream is a 3-level override hierarchy: Watch settings → Tag/Group settings → Global settings Current implementation: MANUAL resolution scattered across codebase - Processors manually check watch.get('field') - Loop through tags to find overrides_watch=True - Fall back to datastore['settings']['application']['field'] Pydantic implementation: AUTOMATIC resolution via @computed_field - Single source of truth for each setting's resolution logic - Type-safe, testable, self-documenting - Example: watch.resolved_fetch_backend (instead of nested dict navigation) See: Watch.py model docstring for detailed Pydantic architecture plan See: Tag.py model docstring for tag override explanation See: processors/restock_diff/processor.py:184-192 for current manual example Core Fields: uuid (str): Unique identifier for this watch (auto-generated) url (str): Target URL to monitor for changes title (str|None): Custom display name (overrides page_title if set) page_title (str|None): Title extracted from tag of monitored page tags (List[str]): List of tag UUIDs for categorization tag (str): DEPRECATED - Old single-tag system, use tags instead Check Configuration: processor (str): Processor type ('text_json_diff', 'restock_diff', etc.) fetch_backend (str): Fetcher to use ('system', 'html_requests', 'playwright', etc.) method (str): HTTP method ('GET', 'POST', etc.) headers (dict): Custom HTTP headers to send proxy (str|None): Preferred proxy server paused (bool): Whether change detection is paused Scheduling: time_between_check (dict): Check interval {'weeks': int, 'days': int, 'hours': int, 'minutes': int, 'seconds': int} time_between_check_use_default (bool): Use global default interval if True time_schedule_limit (dict): Weekly schedule limiting when checks can run Structure: { 'enabled': bool, 'monday/tuesday/.../sunday': { 'enabled': bool, 'start_time': str ('HH:MM'), 'duration': {'hours': str, 'minutes': str} } } Content Filtering: include_filters (List[str]): CSS/XPath selectors to extract content subtractive_selectors (List[str]): Selectors to remove from content ignore_text (List[str]): Text patterns to ignore in change detection trigger_text (List[str]): Text/regex that must be present to trigger change text_should_not_be_present (List[str]): Text that should NOT be present extract_text (List[str]): Regex patterns to extract specific text after filtering Text Processing: trim_text_whitespace (bool): Strip leading/trailing whitespace sort_text_alphabetically (bool): Sort lines alphabetically before comparison remove_duplicate_lines (bool): Remove duplicate lines check_unique_lines (bool): Compare against all history for unique lines strip_ignored_lines (bool|None): Remove lines matching ignore patterns Change Detection Filters: filter_text_added (bool): Include added text in change detection filter_text_removed (bool): Include removed text in change detection filter_text_replaced (bool): Include replaced text in change detection Browser Automation: browser_steps (List[dict]): Browser automation steps for JS-heavy sites browser_steps_last_error_step (int|None): Last step that caused error webdriver_delay (int|None): Seconds to wait after page load webdriver_js_execute_code (str|None): JavaScript to execute before extraction Restock Detection: in_stock_only (bool): Only trigger on in-stock transitions follow_price_changes (bool): Monitor price changes has_ldjson_price_data (bool|None): Whether page has LD-JSON price data track_ldjson_price_data (str|None): Track LD-JSON price data ('ACCEPT', 'REJECT', None) price_change_threshold_percent (float|None): Minimum price change % to trigger Notifications: notification_urls (List[str]): Apprise URLs for notifications notification_title (str|None): Custom notification title template notification_body (str|None): Custom notification body template notification_format (str): Notification format (e.g., 'System default', 'Text', 'HTML') notification_muted (bool): Disable notifications for this watch notification_screenshot (bool): Include screenshot in notifications notification_alert_count (int): Number of notifications sent last_notification_error (str|None): Last notification error message body (str|None): DEPRECATED? Legacy notification body field filter_failure_notification_send (bool): Send notification on filter failures History & State: date_created (int|None): Unix timestamp of watch creation last_checked (int): Unix timestamp of last check last_viewed (int): History snapshot key of last user view last_error (str|bool): Last error message or False if no error check_count (int): Total number of checks performed fetch_time (float): Duration of last fetch in seconds consecutive_filter_failures (int): Counter for consecutive filter match failures previous_md5 (str|bool): MD5 hash of previous content history_snapshot_max_length (int|None): Max history snapshots to keep (None = use global) Conditions: conditions (dict): Custom conditions for change detection logic conditions_match_logic (str): Logic operator ('ALL', 'ANY') for conditions Metadata: content-type (str|None): Content-Type from last fetch remote_server_reply (str|None): Server header from last response ignore_status_codes (List[int]|None): HTTP status codes to ignore use_page_title_in_list (bool|None): Display page title in watch list (None = use system default) Instance Attributes (not serialized): __datastore: Reference to parent DataStore (set externally after creation) data_dir: Filesystem path for this watch's data directory Notes: - Many fields default to None to distinguish "not set" from "set to default" - When field is None, system-level defaults are used - Processor-specific configs (e.g., processor_config_*) are NOT stored in watch.json They are stored in separate {processor_name}.json files - This class is used for both Watch and Tag objects (tags reuse the structure) """ def __init__(self, *arg, **kw): # Store datastore reference (common to Watch and Tag) # Use single underscore to avoid name mangling issues in subclasses self._datastore = kw.get('__datastore') if kw.get('__datastore'): del kw['__datastore'] # Store datastore_path (common to Watch and Tag) self._datastore_path = kw.get('datastore_path') if kw.get('datastore_path'): del kw['datastore_path'] # IMPORTANT: Don't initialize __watch_was_edited yet! # We'll initialize it AFTER the initial update() call below # This prevents marking the watch as edited during initialization self.update({ # Custom notification content # Re #110, so then if this is set to None, we know to use the default value instead # Requires setting to None on submit if it's the same as the default # Should be all None by default, so we use the system default in this case. 'body': None, 'browser_steps': [], 'browser_steps_last_error_step': None, 'conditions' : [], 'conditions_match_logic': CONDITIONS_MATCH_LOGIC_DEFAULT, 'check_count': 0, 'check_unique_lines': False, # On change-detected, compare against all history if its something new 'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine. 'content-type': None, 'date_created': None, 'extract_text': [], # Extract text by regex after filters 'fetch_backend': 'system', # plaintext, playwright etc 'fetch_time': 0.0, 'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), 'filter_text_added': True, 'filter_text_removed': True, 'filter_text_replaced': True, 'follow_price_changes': True, 'has_ldjson_price_data': None, 'history_snapshot_max_length': None, 'headers': {}, # Extra headers to send 'ignore_text': [], # List of text to ignore when calculating the comparison checksum 'ignore_status_codes': None, 'in_stock_only': True, # Only trigger change on going to instock from out-of-stock 'include_filters': [], 'last_checked': 0, 'last_error': False, 'last_notification_error': None, 'last_viewed': 0, # history key value of the last viewed via the [diff] link 'method': 'GET', 'notification_alert_count': 0, 'notification_body': None, 'notification_format': USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, 'notification_muted': False, 'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL 'notification_title': None, 'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise) 'page_title': None, # <title> from the page 'paused': False, 'previous_md5': False, 'processor': 'text_json_diff', # could be restock_diff or others from .processors 'price_change_threshold_percent': None, 'proxy': None, # Preferred proxy connection 'remote_server_reply': None, # From 'server' reply header 'sort_text_alphabetically': False, 'strip_ignored_lines': None, 'subtractive_selectors': [], 'tag': '', # Old system of text name for a tag, to be removed 'tags': [], # list of UUIDs to App.Tags 'text_should_not_be_present': [], # Text that should not present 'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None}, 'time_between_check_use_default': True, "time_schedule_limit": { "enabled": False, "monday": { "enabled": True, "start_time": "00:00", "duration": { "hours": "24", "minutes": "00" } }, "tuesday": { "enabled": True, "start_time": "00:00", "duration": { "hours": "24", "minutes": "00" } }, "wednesday": { "enabled": True, "start_time": "00:00", "duration": { "hours": "24", "minutes": "00" } }, "thursday": { "enabled": True, "start_time": "00:00", "duration": { "hours": "24", "minutes": "00" } }, "friday": { "enabled": True, "start_time": "00:00", "duration": { "hours": "24", "minutes": "00" } }, "saturday": { "enabled": True, "start_time": "00:00", "duration": { "hours": "24", "minutes": "00" } }, "sunday": { "enabled": True, "start_time": "00:00", "duration": { "hours": "24", "minutes": "00" } }, }, 'title': None, # An arbitrary field that overrides 'page_title' 'track_ldjson_price_data': None, 'trim_text_whitespace': False, 'remove_duplicate_lines': False, 'trigger_text': [], # List of text or regex to wait for until a change is detected 'url': '', 'use_page_title_in_list': None, # None = use system settings 'uuid': str(uuid.uuid4()), 'webdriver_delay': None, 'webdriver_js_execute_code': None, # Run before change-detection }) super(watch_base, self).__init__(*arg, **kw) # Check if we're being initialized from an existing watch object # that has was_edited=True, so we can preserve the flag preserve_edited_flag = False if self.get('default'): # When creating a new watch object from an existing one (e.g., changing processor), # preserve the was_edited flag if it was True default_watch = self.get('default') if hasattr(default_watch, 'was_edited') and default_watch.was_edited: preserve_edited_flag = True del self['default'] # NOW initialize the edited flag after all initial setup is complete # This ensures initialization doesn't trigger the edited flag # But preserve it if the source watch had it set to True self.__watch_was_edited = preserve_edited_flag def _mark_field_as_edited(self, key): """ Helper to mark a field as edited if it's writable. Internal method used by __setitem__, update(), pop(), etc. """ # Don't track edits during initial load or if already edited if not hasattr(self, '_watch_base__watch_was_edited'): return if self.__watch_was_edited: return # Already marked as edited # Import from shared schema utilities (no circular dependency) from .schema_utils import get_readonly_watch_fields readonly_fields = get_readonly_watch_fields() # Additional system-managed fields not in OpenAPI spec (yet) # These are set by processors/workers and should not trigger edited flag additional_system_fields = { 'last_check_status', # Set by processors 'restock', # Set by restock processor 'last_viewed', # Set by mark_all_viewed endpoint } # Only mark as edited if this is a user-writable field if key not in readonly_fields and key not in additional_system_fields: self.__watch_was_edited = True def __setitem__(self, key, value): """ Override dict.__setitem__ to track when writable watch fields are modified. This enables skipping reprocessing when: 1. HTML content is unchanged (checksumFromPreviousCheckWasTheSame) 2. AND watch configuration was not edited Only sets the edited flag when field is NOT in readonly_fields (from OpenAPI spec). """ # Set the value first (always) super().__setitem__(key, value) # Mark as edited if writable field self._mark_field_as_edited(key) def __delitem__(self, key): """Override dict.__delitem__ to track deletions of writable fields.""" super().__delitem__(key) self._mark_field_as_edited(key) def update(self, *args, **kwargs): if args and args[0].get('browser_steps'): args[0]['browser_steps'] = browser_steps_get_valid_steps(args[0].get('browser_steps')) """Override dict.update() to track modifications to writable fields.""" # Call parent update first super().update(*args, **kwargs) # Mark as edited for any writable fields that were updated # Handle both update(dict) and update(key=value) forms if args: for key in args[0].keys(): self._mark_field_as_edited(key) for key in kwargs.keys(): self._mark_field_as_edited(key) def pop(self, key, *args): """Override dict.pop() to track removal of writable fields.""" result = super().pop(key, *args) self._mark_field_as_edited(key) return result def setdefault(self, key, default=None): """Override dict.setdefault() to track modifications to writable fields.""" # Only marks as edited if key didn't exist (i.e., a new value was set) existed = key in self result = super().setdefault(key, default) if not existed: self._mark_field_as_edited(key) return result @property def was_edited(self): """ Check if watch configuration was edited since last processing. Returns: bool: True if writable fields were modified, False otherwise """ return getattr(self, '_watch_base__watch_was_edited', False) def reset_watch_edited_flag(self): """ Reset the watch edited flag after successful processing. Call this after processing completes to allow future content-only change detection. """ self.__watch_was_edited = False @classmethod def get_property_names(cls): """ Get all @property attribute names from this model class using introspection. This discovers computed/derived properties that are not stored in the datastore. These properties should be filtered out during PUT/POST requests. Returns: frozenset: Immutable set of @property attribute names from the model class """ import functools # Create a cached version if it doesn't exist if not hasattr(cls, '_cached_get_property_names'): @functools.cache def _get_props(): properties = set() # Use introspection to find all @property attributes for name in dir(cls): # Skip private/magic attributes if name.startswith('_'): continue try: attr = getattr(cls, name) # Check if it's a property descriptor if isinstance(attr, property): properties.add(name) except (AttributeError, TypeError): continue return frozenset(properties) cls._cached_get_property_names = _get_props return cls._cached_get_property_names() def __deepcopy__(self, memo): """ Custom deepcopy for all watch_base subclasses (Watch, Tag, etc.). CRITICAL FIX: Prevents copying large reference objects like __datastore which would cause exponential memory growth when Watch objects are deepcopied. This is called by: - api/Watch.py:76 (API endpoint) - api/Tags.py:28 (Tags API) - processors/base.py:26 (EVERY processor run) - store/__init__.py:544 (clone watch) - And other locations """ from copy import deepcopy # Create new instance without calling __init__ cls = self.__class__ new_obj = cls.__new__(cls) memo[id(self)] = new_obj # Copy the dict data (all the settings) for key, value in self.items(): new_obj[key] = deepcopy(value, memo) # Copy instance attributes dynamically # This handles Watch-specific attrs (like __datastore) and any future subclass attrs for attr_name in dir(self): # Skip methods, special attrs, and dict keys if attr_name.startswith('_') and not attr_name.startswith('__'): # This catches _model__datastore, _model__history_n, etc. try: attr_value = getattr(self, attr_name) # Special handling: Share references to large objects instead of copying # Examples: _datastore, __datastore, __app_reference, __global_settings, etc. if (attr_name == '_datastore' or attr_name.endswith('__datastore') or attr_name.endswith('__app')): # Share the reference (don't copy!) to prevent memory leaks setattr(new_obj, attr_name, attr_value) # Skip cache attributes - let them regenerate on demand elif 'cache' in attr_name.lower(): pass # Don't copy caches # Copy regular instance attributes elif not callable(attr_value): setattr(new_obj, attr_name, attr_value) except AttributeError: pass # Attribute doesn't exist in this instance return new_obj def __getstate__(self): """ Custom pickle serialization for all watch_base subclasses. Excludes large reference objects (like __datastore) from serialization. """ # Get the dict data state = dict(self) # Collect instance attributes (excluding methods and large references) instance_attrs = {} for attr_name in dir(self): if attr_name.startswith('_') and not attr_name.startswith('__'): try: attr_value = getattr(self, attr_name) # Exclude large reference objects and caches from serialization if not (attr_name == '_datastore' or attr_name.endswith('__datastore') or attr_name.endswith('__app') or 'cache' in attr_name.lower() or callable(attr_value)): instance_attrs[attr_name] = attr_value except AttributeError: pass if instance_attrs: state['__instance_metadata__'] = instance_attrs return state def __setstate__(self, state): """ Custom pickle deserialization for all watch_base subclasses. WARNING: Large reference objects (like __datastore) are NOT restored! Caller must restore these references after unpickling if needed. """ # Extract metadata metadata = state.pop('__instance_metadata__', {}) # Restore dict data self.update(state) # Restore instance attributes for attr_name, attr_value in metadata.items(): setattr(self, attr_name, attr_value) @property def data_dir(self): """ The base directory for this watch/tag data (property, computed from UUID). Common property for both Watch and Tag objects. Returns path like: /datastore/{uuid}/ """ return os.path.join(self._datastore_path, self['uuid']) if self._datastore_path else None def ensure_data_dir_exists(self): """ Create the data directory if it doesn't exist. Common method for both Watch and Tag objects. """ from loguru import logger if not os.path.isdir(self.data_dir): logger.debug(f"> Creating data dir {self.data_dir}") os.mkdir(self.data_dir) def get_global_setting(self, *path): """ Get a setting from the global datastore configuration. Args: *path: Path to the setting (e.g., 'application', 'history_snapshot_max_length') Returns: The setting value, or None if not found Example: maxlen = self.get_global_setting('application', 'history_snapshot_max_length') """ if not self._datastore: return None try: value = self._datastore['settings'] for key in path: value = value[key] return value except (KeyError, TypeError): return None def _get_commit_data(self): """ Prepare data for commit (can be overridden by subclasses). Returns: dict: Data to serialize (filtered as needed by subclass) """ import copy # Acquire datastore lock to prevent concurrent modifications during copy lock = self._datastore.lock if self._datastore and hasattr(self._datastore, 'lock') else None if lock: with lock: snapshot = dict(self) else: snapshot = dict(self) # Deep copy snapshot (slower, but done outside lock to minimize contention) # Subclasses can override to filter keys (e.g., Watch excludes processor_config_*) return {k: copy.deepcopy(v) for k, v in snapshot.items()} def _save_to_disk(self, data_dict, uuid): """ Save data to disk (must be implemented by subclasses). Args: data_dict: Dictionary to save uuid: UUID for logging Raises: NotImplementedError: If subclass doesn't implement """ raise NotImplementedError("Subclass must implement _save_to_disk()") def commit(self): """ Save this watch/tag immediately to disk using atomic write. Common commit logic for Watch and Tag objects. Subclasses override _get_commit_data() and _save_to_disk() for specifics. Fire-and-forget: Logs errors but does not raise exceptions. Data remains in memory even if save fails, so next commit will retry. """ from loguru import logger if not self.data_dir: entity_type = self.__class__.__name__ logger.error(f"Cannot commit {entity_type} {self.get('uuid')} without datastore_path") return uuid = self.get('uuid') if not uuid: entity_type = self.__class__.__name__ logger.error(f"Cannot commit {entity_type} without UUID") return # Get data from subclass (may filter keys) try: data_dict = self._get_commit_data() except Exception as e: logger.error(f"Failed to prepare commit data for {uuid}: {e}") return # Save to disk via subclass implementation try: # Determine entity type from module name (Watch.py -> watch, Tag.py -> tag) entity_type = _determine_entity_type(self.__class__) filename = f"{entity_type}.json" self._save_to_disk(data_dict, uuid) logger.debug(f"Committed {entity_type} {uuid} to {uuid}/{filename}") except Exception as e: logger.error(f"Failed to commit {uuid}: {e}") ================================================ FILE: changedetectionio/model/persistence.py ================================================ """ Entity persistence mixin for Watch and Tag models. Provides file-based persistence using atomic writes. """ import functools import inspect @functools.lru_cache(maxsize=None) def _determine_entity_type(cls): """ Determine entity type from class hierarchy (cached at class level). Args: cls: The class to inspect Returns: str: Entity type ('watch', 'tag', etc.) Raises: ValueError: If entity type cannot be determined """ for base_class in inspect.getmro(cls): module_name = base_class.__module__ if module_name.startswith('changedetectionio.model.'): # Get last part after dot: "changedetectionio.model.Watch" -> "watch" return module_name.split('.')[-1].lower() raise ValueError( f"Cannot determine entity type for {cls.__module__}.{cls.__name__}. " f"Entity must inherit from a class in changedetectionio.model (Watch or Tag)." ) class EntityPersistenceMixin: """ Mixin providing file persistence for watch_base subclasses (Watch, Tag, etc.). This mixin provides the _save_to_disk() method required by watch_base.commit(). It automatically determines the correct filename and size limits based on class hierarchy. Usage: class model(EntityPersistenceMixin, watch_base): # in Watch.py pass class model(EntityPersistenceMixin, watch_base): # in Tag.py pass """ def _save_to_disk(self, data_dict, uuid): """ Save entity to disk using atomic write. Implements the abstract method required by watch_base.commit(). Automatically determines filename and size limits from class hierarchy. Args: data_dict: Dictionary to save uuid: UUID for logging Raises: ValueError: If entity type cannot be determined from class hierarchy """ # Import here to avoid circular dependency from changedetectionio.store.file_saving_datastore import save_entity_atomic # Determine entity type (cached at class level, not instance level) entity_type = _determine_entity_type(self.__class__) # Set filename and size limits based on entity type filename = f'{entity_type}.json' max_size_mb = 10 if entity_type == 'watch' else 1 # Save using generic function save_entity_atomic( self.data_dir, uuid, data_dict, filename=filename, entity_type=entity_type, max_size_mb=max_size_mb ) ================================================ FILE: changedetectionio/model/schema_utils.py ================================================ """ Schema utilities for Watch and Tag models. Provides functions to extract readonly fields and properties from OpenAPI spec. Shared by both the model layer and API layer to avoid circular dependencies. """ import functools @functools.cache def get_openapi_schema_dict(): """ Get the raw OpenAPI spec dictionary for schema access. Returns the YAML dict directly (not the OpenAPI object). """ import os import yaml spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml') if not os.path.exists(spec_path): spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml') with open(spec_path, 'r', encoding='utf-8') as f: return yaml.safe_load(f) @functools.cache def _resolve_readonly_fields(schema_name): """ Generic helper to resolve readOnly fields, including allOf inheritance. Args: schema_name: Name of the schema (e.g., 'Watch', 'Tag') Returns: frozenset: All readOnly field names including inherited ones """ spec_dict = get_openapi_schema_dict() schema = spec_dict['components']['schemas'].get(schema_name, {}) readonly_fields = set() # Handle allOf (schema inheritance) if 'allOf' in schema: for item in schema['allOf']: # Resolve $ref to parent schema if '$ref' in item: ref_path = item['$ref'].split('/')[-1] ref_schema = spec_dict['components']['schemas'].get(ref_path, {}) if 'properties' in ref_schema: for field_name, field_def in ref_schema['properties'].items(): if field_def.get('readOnly') is True: readonly_fields.add(field_name) # Check schema-specific properties if 'properties' in item: for field_name, field_def in item['properties'].items(): if field_def.get('readOnly') is True: readonly_fields.add(field_name) else: # Direct properties (no inheritance) if 'properties' in schema: for field_name, field_def in schema['properties'].items(): if field_def.get('readOnly') is True: readonly_fields.add(field_name) return frozenset(readonly_fields) @functools.cache def get_readonly_watch_fields(): """ Extract readOnly field names from Watch schema in OpenAPI spec. Returns readOnly fields from WatchBase (uuid, date_created) + Watch-specific readOnly fields. Used by: - model/watch_base.py: Track when writable fields are edited - api/Watch.py: Filter readonly fields from PUT requests """ return _resolve_readonly_fields('Watch') @functools.cache def get_readonly_tag_fields(): """ Extract readOnly field names from Tag schema in OpenAPI spec. Returns readOnly fields from WatchBase (uuid, date_created) + Tag-specific readOnly fields. """ return _resolve_readonly_fields('Tag') ================================================ FILE: changedetectionio/notification/__init__.py ================================================ from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH default_notification_format = 'htmlcolor' default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n' default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}' # The values (markdown etc) are from apprise NotifyFormat, # But to avoid importing the whole heavy module just use the same strings here. valid_notification_formats = { 'text': 'Plain Text', 'html': 'HTML', 'htmlcolor': 'HTML Color', 'markdown': 'Markdown to HTML', # Used only for editing a watch (not for global) USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH } ================================================ FILE: changedetectionio/notification/apprise_plugin/__init__.py ================================================ ================================================ FILE: changedetectionio/notification/apprise_plugin/assets.py ================================================ from apprise import AppriseAsset # Refer to: # https://github.com/caronc/apprise/wiki/Development_API#the-apprise-asset-object APPRISE_APP_ID = "changedetection.io" APPRISE_APP_DESC = "ChangeDetection.io best and simplest website monitoring and change detection" APPRISE_APP_URL = "https://changedetection.io" APPRISE_AVATAR_URL = "https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png" apprise_asset = AppriseAsset( app_id=APPRISE_APP_ID, app_desc=APPRISE_APP_DESC, app_url=APPRISE_APP_URL, image_url_logo=APPRISE_AVATAR_URL, ) ================================================ FILE: changedetectionio/notification/apprise_plugin/custom_handlers.py ================================================ """ Custom Apprise HTTP Handlers with format= Parameter Support IMPORTANT: This module works around a limitation in Apprise's @notify decorator. THE PROBLEM: ------------- When using Apprise's @notify decorator to create custom notification handlers, the decorator creates a CustomNotifyPlugin that uses parse_url(..., simple=True) to parse URLs. This simple parsing mode does NOT extract the format= query parameter from the URL and set it as a top-level parameter that NotifyBase.__init__ can use to set notify_format. As a result: 1. URL: post://example.com/webhook?format=html 2. Apprise parses this and sees format=html in qsd (query string dictionary) 3. But it does NOT extract it and pass it to NotifyBase.__init__ 4. NotifyBase defaults to notify_format=TEXT 5. When you call apobj.notify(body="<html>...", body_format="html"): - Apprise sees: input format = html, output format (notify_format) = text - Apprise calls convert_between("html", "text", body) - This strips all HTML tags, leaving only plain text 6. Your custom handler receives stripped plain text instead of HTML THE SOLUTION: ------------- Instead of using the @notify decorator directly, we: 1. Manually register custom plugins using plugins.N_MGR.add() 2. Create a CustomHTTPHandler class that extends CustomNotifyPlugin 3. Override __init__ to extract format= from qsd and set it as kwargs['format'] 4. Call NotifyBase.__init__ which properly sets notify_format from kwargs['format'] 5. Set up _default_args like CustomNotifyPlugin does for compatibility This ensures that when format=html is in the URL: - notify_format is set to HTML - Apprise sees: input format = html, output format = html - No conversion happens (convert_between returns content unchanged) - Your custom handler receives the original HTML intact TESTING: -------- To verify this works: >>> apobj = apprise.Apprise() >>> apobj.add('post://localhost:5005/test?format=html') >>> for server in apobj: ... print(server.notify_format) # Should print: html (not text) >>> apobj.notify(body='<span>Test</span>', body_format='html') # Your handler should receive '<span>Test</span>' not 'Test' """ import json import re from urllib.parse import unquote_plus import requests from apprise import plugins from apprise.decorators.base import CustomNotifyPlugin from apprise.utils.parse import parse_url as apprise_parse_url, url_assembly from apprise.utils.logic import dict_full_update from loguru import logger from requests.structures import CaseInsensitiveDict SUPPORTED_HTTP_METHODS = {"get", "post", "put", "delete", "patch", "head"} def notify_supported_methods(func): """Register custom HTTP method handlers that properly support format= parameter.""" for method in SUPPORTED_HTTP_METHODS: _register_http_handler(method, func) _register_http_handler(f"{method}s", func) return func def _register_http_handler(schema, send_func): """Register a custom HTTP handler that extracts format= from URL query parameters.""" # Parse base URL base_url = f"{schema}://" base_args = apprise_parse_url(base_url, default_schema=schema, verify_host=False, simple=True) class CustomHTTPHandler(CustomNotifyPlugin): secure_protocol = schema service_name = f"Custom HTTP - {schema.upper()}" _base_args = base_args def __init__(self, **kwargs): # Extract format from qsd and set it as a top-level kwarg # This allows NotifyBase.__init__ to properly set notify_format if 'qsd' in kwargs and 'format' in kwargs['qsd']: kwargs['format'] = kwargs['qsd']['format'] # Call NotifyBase.__init__ (skip CustomNotifyPlugin.__init__) super(CustomNotifyPlugin, self).__init__(**kwargs) # Set up _default_args like CustomNotifyPlugin does self._default_args = {} kwargs.pop("secure", None) dict_full_update(self._default_args, self._base_args) dict_full_update(self._default_args, kwargs) self._default_args["url"] = url_assembly(**self._default_args) __send = staticmethod(send_func) def send(self, body, title="", notify_type="info", *args, **kwargs): """Call the custom send function.""" try: result = self.__send( body, title, notify_type, *args, meta=self._default_args, **kwargs ) return True if result is None else bool(result) except Exception as e: self.logger.warning(f"Exception in custom HTTP handler: {e}") return False # Register the plugin plugins.N_MGR.add( plugin=CustomHTTPHandler, schemas=schema, send_func=send_func, url=base_url, ) def _get_auth(parsed_url: dict) -> str | tuple[str, str]: user: str | None = parsed_url.get("user") password: str | None = parsed_url.get("password") if user is not None and password is not None: return (unquote_plus(user), unquote_plus(password)) if user is not None: return unquote_plus(user) return "" def _get_headers(parsed_url: dict, body: str) -> CaseInsensitiveDict: headers = CaseInsensitiveDict( {unquote_plus(k).title(): unquote_plus(v) for k, v in parsed_url["qsd+"].items()} ) # If Content-Type is not specified, guess if the body is a valid JSON if headers.get("Content-Type") is None: try: json.loads(body) headers["Content-Type"] = "application/json; charset=utf-8" except Exception: pass return headers def _get_params(parsed_url: dict) -> CaseInsensitiveDict: # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation # In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise # but here we are making straight requests, so we need todo convert this against apprise's logic params = CaseInsensitiveDict( { unquote_plus(k): unquote_plus(v) for k, v in parsed_url["qsd"].items() if k.strip("-") not in parsed_url["qsd-"] and k.strip("+") not in parsed_url["qsd+"] } ) return params @notify_supported_methods def apprise_http_custom_handler( body: str, title: str, notify_type: str, meta: dict, body_format: str = None, *args, **kwargs, ) -> bool: url: str = meta.get("url") schema: str = meta.get("schema") method: str = re.sub(r"s$", "", schema).upper() # Convert /foobar?+some-header=hello to proper header dictionary parsed_url: dict[str, str | dict | None] | None = apprise_parse_url(url) if parsed_url is None: return False auth = _get_auth(parsed_url=parsed_url) headers = _get_headers(parsed_url=parsed_url, body=body) params = _get_params(parsed_url=parsed_url) url = re.sub(rf"^{schema}", "https" if schema.endswith("s") else "http", parsed_url.get("url")) response = requests.request( method=method, url=url, auth=auth, headers=headers, params=params, data=body.encode("utf-8") if isinstance(body, str) else body, ) response.raise_for_status() logger.info(f"Successfully sent custom notification to {url}") return True ================================================ FILE: changedetectionio/notification/apprise_plugin/discord.py ================================================ """ Custom Discord plugin for changedetection.io Extends Apprise's Discord plugin to support custom colored embeds for removed/added content """ from apprise.plugins.discord import NotifyDiscord from apprise.decorators import notify from apprise.common import NotifyFormat from loguru import logger # Import placeholders from changedetection's diff module from ...diff import ( REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED, ADDED_PLACEMARKER_OPEN, ADDED_PLACEMARKER_CLOSED, CHANGED_PLACEMARKER_OPEN, CHANGED_PLACEMARKER_CLOSED, CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED, ) # Discord embed sidebar colors for different change types DISCORD_COLOR_UNCHANGED = 8421504 # Gray (#808080) DISCORD_COLOR_REMOVED = 16711680 # Red (#FF0000) DISCORD_COLOR_ADDED = 65280 # Green (#00FF00) DISCORD_COLOR_CHANGED = 16753920 # Orange (#FFA500) DISCORD_COLOR_CHANGED_INTO = 3447003 # Blue (#5865F2 - Discord blue) DISCORD_COLOR_WARNING = 16776960 # Yellow (#FFFF00) class NotifyDiscordCustom(NotifyDiscord): """ Custom Discord notification handler that supports multiple colored embeds for showing removed (red) and added (green) content separately. """ def send(self, body, title="", notify_type=None, attach=None, **kwargs): """ Override send method to create custom embeds with red/green colors for removed/added content when placeholders are present. """ # Check if body contains our diff placeholders has_removed = REMOVED_PLACEMARKER_OPEN in body has_added = ADDED_PLACEMARKER_OPEN in body has_changed = CHANGED_PLACEMARKER_OPEN in body has_changed_into = CHANGED_INTO_PLACEMARKER_OPEN in body # If we have diff placeholders and we're in markdown/html format, create custom embeds if (has_removed or has_added or has_changed or has_changed_into) and self.notify_format in (NotifyFormat.MARKDOWN, NotifyFormat.HTML): return self._send_with_colored_embeds(body, title, notify_type, attach, **kwargs) # Otherwise, use the parent class's default behavior return super().send(body, title, notify_type, attach, **kwargs) def _send_with_colored_embeds(self, body, title, notify_type, attach, **kwargs): """ Send Discord message with embeds in the original diff order. Preserves the sequence: unchanged -> removed -> added -> unchanged, etc. """ from datetime import datetime, timezone payload = { "tts": self.tts, "wait": self.tts is False, } if self.flags: payload["flags"] = self.flags # Acquire image_url image_url = self.image_url(notify_type) if self.avatar and (image_url or self.avatar_url): payload["avatar_url"] = self.avatar_url if self.avatar_url else image_url if self.user: payload["username"] = self.user # Associate our thread_id with our message params = {"thread_id": self.thread_id} if self.thread_id else None # Build embeds array preserving order embeds = [] # Add title as plain bold text in message content (not an embed) if title: payload["content"] = f"**{title}**" # Parse the body into ordered chunks chunks = self._parse_body_into_chunks(body) # Discord limits: # - Max 10 embeds per message # - Max 6000 characters total across all embeds # - Max 4096 characters per embed description max_embeds = 10 max_total_chars = 6000 max_embed_description = 4096 # All 10 embed slots are available for content max_content_embeds = max_embeds # Start character count total_chars = 0 # Create embeds from chunks in order (no titles, just color coding) for chunk_type, content in chunks: if not content.strip(): continue # Truncate individual embed description if needed if len(content) > max_embed_description: content = content[:max_embed_description - 3] + "..." # Check if we're approaching the embed count limit # We need room for the warning embed, so stop at max_content_embeds - 1 current_content_embeds = len(embeds) if current_content_embeds >= max_content_embeds - 1: # Add a truncation notice (this will be the 10th embed) embeds.append({ "description": "⚠️ Content truncated (Discord 10 embed limit reached) - Tip: Select 'Plain Text' or 'HTML' format for longer diffs", "color": DISCORD_COLOR_WARNING, }) break # Check if adding this embed would exceed total character limit if total_chars + len(content) > max_total_chars: # Add a truncation notice remaining_chars = max_total_chars - total_chars if remaining_chars > 100: # Add partial content if we have room truncated_content = content[:remaining_chars - 100] + "..." embeds.append({ "description": truncated_content, "color": (DISCORD_COLOR_UNCHANGED if chunk_type == "unchanged" else DISCORD_COLOR_REMOVED if chunk_type == "removed" else DISCORD_COLOR_ADDED), }) embeds.append({ "description": "⚠️ Content truncated (Discord 6000 char limit reached)\nTip: Select 'Plain Text' or 'HTML' format for longer diffs", "color": DISCORD_COLOR_WARNING, }) break if chunk_type == "unchanged": embeds.append({ "description": content, "color": DISCORD_COLOR_UNCHANGED, }) elif chunk_type == "removed": embeds.append({ "description": content, "color": DISCORD_COLOR_REMOVED, }) elif chunk_type == "added": embeds.append({ "description": content, "color": DISCORD_COLOR_ADDED, }) elif chunk_type == "changed": # Changed (old value) - use orange to distinguish from pure removal embeds.append({ "description": content, "color": DISCORD_COLOR_CHANGED, }) elif chunk_type == "changed_into": # Changed into (new value) - use blue to distinguish from pure addition embeds.append({ "description": content, "color": DISCORD_COLOR_CHANGED_INTO, }) total_chars += len(content) if embeds: payload["embeds"] = embeds # Send the payload using parent's _send method if not self._send(payload, params=params): return False # Handle attachments if present if attach and self.attachment_support: payload.update({ "tts": False, "wait": True, }) payload.pop("embeds", None) payload.pop("content", None) payload.pop("allow_mentions", None) for attachment in attach: self.logger.info(f"Posting Discord Attachment {attachment.name}") if not self._send(payload, params=params, attach=attachment): return False return True def _parse_body_into_chunks(self, body): """ Parse the body into ordered chunks of (type, content) tuples. Types: "unchanged", "removed", "added", "changed", "changed_into" Preserves the original order of the diff. """ chunks = [] position = 0 while position < len(body): # Find the next marker next_removed = body.find(REMOVED_PLACEMARKER_OPEN, position) next_added = body.find(ADDED_PLACEMARKER_OPEN, position) next_changed = body.find(CHANGED_PLACEMARKER_OPEN, position) next_changed_into = body.find(CHANGED_INTO_PLACEMARKER_OPEN, position) # Determine which marker comes first if next_removed == -1 and next_added == -1 and next_changed == -1 and next_changed_into == -1: # No more markers, rest is unchanged if position < len(body): chunks.append(("unchanged", body[position:])) break # Find the earliest marker next_marker_pos = None next_marker_type = None # Compare all marker positions to find the earliest markers = [] if next_removed != -1: markers.append((next_removed, "removed")) if next_added != -1: markers.append((next_added, "added")) if next_changed != -1: markers.append((next_changed, "changed")) if next_changed_into != -1: markers.append((next_changed_into, "changed_into")) if markers: next_marker_pos, next_marker_type = min(markers, key=lambda x: x[0]) # Add unchanged content before the marker if next_marker_pos > position: chunks.append(("unchanged", body[position:next_marker_pos])) # Find the closing marker if next_marker_type == "removed": open_marker = REMOVED_PLACEMARKER_OPEN close_marker = REMOVED_PLACEMARKER_CLOSED elif next_marker_type == "added": open_marker = ADDED_PLACEMARKER_OPEN close_marker = ADDED_PLACEMARKER_CLOSED elif next_marker_type == "changed": open_marker = CHANGED_PLACEMARKER_OPEN close_marker = CHANGED_PLACEMARKER_CLOSED else: # changed_into open_marker = CHANGED_INTO_PLACEMARKER_OPEN close_marker = CHANGED_INTO_PLACEMARKER_CLOSED close_pos = body.find(close_marker, next_marker_pos) if close_pos == -1: # No closing marker, take rest as this type content = body[next_marker_pos + len(open_marker):] chunks.append((next_marker_type, content)) break else: # Extract content between markers content = body[next_marker_pos + len(open_marker):close_pos] chunks.append((next_marker_type, content)) position = close_pos + len(close_marker) return chunks # Register the custom Discord handler with Apprise # This will override the built-in discord:// handler @notify(on="discord") def discord_custom_wrapper(body, title, notify_type, meta, body_format=None, *args, **kwargs): """ Wrapper function to make the custom Discord handler work with Apprise's decorator system. Note: This decorator approach may not work for overriding built-in plugins. The class-based approach above is the proper way to extend NotifyDiscord. """ logger.info("Custom Discord handler called") # This is here for potential future use with decorator-based registration return True ================================================ FILE: changedetectionio/notification/email_helpers.py ================================================ def as_monospaced_html_email(content: str, title: str) -> str: """ Wraps `content` in a minimal, email-safe HTML template that forces monospace rendering across Gmail, Hotmail, Apple Mail, etc. Args: content: The body text (plain text or HTML-like). title: The title plaintext Returns: A complete HTML document string suitable for sending as an email body. """ # All line feed types should be removed and then this function should only be fed <br>'s # Then it works with our <pre> styling without double linefeeds content = content.translate(str.maketrans('', '', '\r\n')) if title: import html title = html.escape(title) else: title = '' # 2. Full email-safe HTML html_email = f"""<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="x-apple-disable-message-reformatting"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <!--[if mso]> <style> body, div, pre, td {{ font-family: "Courier New", Courier, monospace !important; }} </style> <![endif]--> <title>{title}
{content}
""" return html_email ================================================ FILE: changedetectionio/notification/handler.py ================================================ import time import re import apprise from apprise import NotifyFormat from loguru import logger from urllib.parse import urlparse from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL from .email_helpers import as_monospaced_html_email from ..diff import HTML_REMOVED_STYLE, REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED, ADDED_PLACEMARKER_OPEN, HTML_ADDED_STYLE, \ ADDED_PLACEMARKER_CLOSED, CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED, CHANGED_PLACEMARKER_OPEN, \ CHANGED_PLACEMARKER_CLOSED, HTML_CHANGED_STYLE, HTML_CHANGED_INTO_STYLE import re from ..notification_service import NotificationContextData, add_rendered_diff_to_notification_vars newline_re = re.compile(r'\r\n|\r|\n') def markup_text_links_to_html(body): """ Convert plaintext to HTML with clickable links. Uses Jinja2's escape and Markup for XSS safety. """ from linkify_it import LinkifyIt from markupsafe import Markup, escape linkify = LinkifyIt() # Match URLs in the ORIGINAL text (before escaping) matches = linkify.match(body) if not matches: # No URLs, just escape everything return Markup(escape(body)) result = [] last_index = 0 # Process each URL match for match in matches: # Add escaped text before the URL if match.index > last_index: text_part = body[last_index:match.index] result.append(escape(text_part)) # Add the link with escaped URL (both in href and display) url = match.url result.append(Markup(f'{escape(url)}')) last_index = match.last_index # Add remaining escaped text if last_index < len(body): result.append(escape(body[last_index:])) # Join all parts return str(Markup(''.join(str(part) for part in result))) def notification_format_align_with_apprise(n_format : str): """ Correctly align changedetection's formats with apprise's formats Probably these are the same - but good to be sure. These set the expected OUTPUT format type :param n_format: :return: """ if n_format.startswith('html'): # Apprise only knows 'html' not 'htmlcolor' etc, which shouldnt matter here n_format = NotifyFormat.HTML.value elif n_format.startswith('markdown'): # probably the same but just to be safe n_format = NotifyFormat.MARKDOWN.value elif n_format.startswith('text'): # probably the same but just to be safe n_format = NotifyFormat.TEXT.value else: n_format = NotifyFormat.TEXT.value return n_format def apply_html_color_to_body(n_body: str): # https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050 n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, f'') n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, f'') n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, f'') n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, f'') # Handle changed/replaced lines (old → new) n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'') n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'') n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'') n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'') return n_body def apply_discord_markdown_to_body(n_body): """ Discord does not support but it supports non-standard ~~strikethrough~~ :param n_body: :return: """ import re # Define the mapping between your placeholders and markdown markers replacements = [ (REMOVED_PLACEMARKER_OPEN, '~~', REMOVED_PLACEMARKER_CLOSED, '~~'), (ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'), (CHANGED_PLACEMARKER_OPEN, '~~', CHANGED_PLACEMARKER_CLOSED, '~~'), (CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'), ] # So that the markdown gets added without any whitespace following it which would break it for open_tag, open_md, close_tag, close_md in replacements: # Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag pattern = re.compile( re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag), flags=re.DOTALL ) n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body) return n_body def apply_standard_markdown_to_body(n_body): """ Apprise does not support ~~strikethrough~~ but it will convert to HTML strikethrough. :param n_body: :return: """ import re # Define the mapping between your placeholders and markdown markers replacements = [ (REMOVED_PLACEMARKER_OPEN, '', REMOVED_PLACEMARKER_CLOSED, ''), (ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'), (CHANGED_PLACEMARKER_OPEN, '', CHANGED_PLACEMARKER_CLOSED, ''), (CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'), ] # So that the markdown gets added without any whitespace following it which would break it for open_tag, open_md, close_tag, close_md in replacements: # Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag pattern = re.compile( re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag), flags=re.DOTALL ) n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body) return n_body def replace_placemarkers_in_text(text, url, requested_output_format): """ Replace diff placemarkers in text based on the URL service type and requested output format. Used for both notification title and body to ensure consistent placeholder replacement. :param text: The text to process :param url: The notification URL (to detect service type) :param requested_output_format: The output format (html, htmlcolor, markdown, text, etc.) :return: Processed text with placemarkers replaced """ if not text: return text if url.startswith('tgram://'): # Telegram only supports a limited subset of HTML # Use strikethrough for removed content, bold for added content text = text.replace(REMOVED_PLACEMARKER_OPEN, '') text = text.replace(REMOVED_PLACEMARKER_CLOSED, '') text = text.replace(ADDED_PLACEMARKER_OPEN, '') text = text.replace(ADDED_PLACEMARKER_CLOSED, '') # Handle changed/replaced lines (old → new) text = text.replace(CHANGED_PLACEMARKER_OPEN, '') text = text.replace(CHANGED_PLACEMARKER_CLOSED, '') text = text.replace(CHANGED_INTO_PLACEMARKER_OPEN, '') text = text.replace(CHANGED_INTO_PLACEMARKER_CLOSED, '') elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks') or url.startswith('https://discord.com/api')) and requested_output_format == 'html': # Discord doesn't support HTML, use Discord markdown text = apply_discord_markdown_to_body(n_body=text) elif requested_output_format == 'htmlcolor': # https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050 text = text.replace(REMOVED_PLACEMARKER_OPEN, f'') text = text.replace(REMOVED_PLACEMARKER_CLOSED, f'') text = text.replace(ADDED_PLACEMARKER_OPEN, f'') text = text.replace(ADDED_PLACEMARKER_CLOSED, f'') # Handle changed/replaced lines (old → new) text = text.replace(CHANGED_PLACEMARKER_OPEN, f'') text = text.replace(CHANGED_PLACEMARKER_CLOSED, f'') text = text.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'') text = text.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'') elif requested_output_format == 'markdown': # Markdown to HTML - Apprise will convert this to HTML text = apply_standard_markdown_to_body(n_body=text) else: # plaintext, html, and default - use simple text markers text = text.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ') text = text.replace(REMOVED_PLACEMARKER_CLOSED, '') text = text.replace(ADDED_PLACEMARKER_OPEN, '(added) ') text = text.replace(ADDED_PLACEMARKER_CLOSED, '') text = text.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ') text = text.replace(CHANGED_PLACEMARKER_CLOSED, f'') text = text.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ') text = text.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'') return text def apply_service_tweaks(url, n_body, n_title, requested_output_format): logger.debug(f"Applying markup in '{requested_output_format}' mode") # Re 323 - Limit discord length to their 2000 char limit total or it wont send. # Because different notifications may require different pre-processing, run each sequentially :( # 2000 bytes minus - # 200 bytes for the overhead of the _entire_ json payload, 200 bytes for {tts, wait, content} etc headers # Length of URL - Incase they specify a longer custom avatar_url if not n_body or not n_body.strip(): return url, n_body, n_title # Normalize URL scheme to lowercase to prevent case-sensitivity issues # e.g., "Discord://webhook" -> "discord://webhook", "TGRAM://bot123" -> "tgram://bot123" scheme_separator_pos = url.find('://') if scheme_separator_pos > 0: url = url[:scheme_separator_pos].lower() + url[scheme_separator_pos:] # So if no avatar_url is specified, add one so it can be correctly calculated into the total payload parsed = urlparse(url) k = '?' if not parsed.query else '&' if url and not 'avatar_url' in url \ and not url.startswith('mail') \ and not url.startswith('post') \ and not url.startswith('get') \ and not url.startswith('delete') \ and not url.startswith('put'): url += k + f"avatar_url={APPRISE_AVATAR_URL}" # Replace placemarkers in title first (this was the missing piece causing the bug) # Titles are ALWAYS plain text across all notification services (Discord embeds, Slack attachments, # email Subject headers, etc.), so we always use 'text' format for title placemarker replacement # Looking over apprise library it seems that all plugins only expect plain-text. n_title = replace_placemarkers_in_text(n_title, url, 'text') if url.startswith('tgram://'): # Telegram only supports a limit subset of HTML, remove the '
' we place in. # re https://github.com/dgtlmoon/changedetection.io/issues/555 # @todo re-use an existing library we have already imported to strip all non-allowed tags n_body = n_body.replace('
', '\n') n_body = n_body.replace('
', '\n') n_body = newline_re.sub('\n', n_body) # Replace placemarkers for body n_body = replace_placemarkers_in_text(n_body, url, requested_output_format) # real limit is 4096, but minus some for extra metadata payload_max_size = 3600 body_limit = max(0, payload_max_size - len(n_title)) n_title = n_title[0:payload_max_size] n_body = n_body[0:body_limit] elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks') or url.startswith('https://discord.com/api'))\ and 'html' in requested_output_format: # Discord doesn't support HTML, replace
with newlines n_body = n_body.strip().replace('
', '\n') n_body = n_body.replace('
', '\n') n_body = newline_re.sub('\n', n_body) # Don't replace placeholders or truncate here - let the custom Discord plugin handle it # The plugin will use embeds (6000 char limit across all embeds) if placeholders are present, # or plain content (2000 char limit) otherwise # Only do placeholder replacement if NOT using htmlcolor (which triggers embeds in custom plugin) if requested_output_format == 'html': # No diff placeholders, use Discord markdown for any other formatting # Use Discord markdown: strikethrough for removed, bold for added n_body = replace_placemarkers_in_text(n_body, url, requested_output_format) # Apply 2000 char limit for plain content payload_max_size = 1700 body_limit = max(0, payload_max_size - len(n_title)) n_title = n_title[0:payload_max_size] n_body = n_body[0:body_limit] # else: our custom Discord plugin will convert any placeholders left over into embeds with color bars # Is not discord/tgram and they want htmlcolor elif requested_output_format == 'htmlcolor': n_body = replace_placemarkers_in_text(n_body, url, requested_output_format) n_body = newline_re.sub('
\n', n_body) elif requested_output_format == 'html': n_body = replace_placemarkers_in_text(n_body, url, requested_output_format) n_body = newline_re.sub('
\n', n_body) elif requested_output_format == 'markdown': # Markdown to HTML - Apprise will convert this to HTML n_body = replace_placemarkers_in_text(n_body, url, requested_output_format) else: #plaintext etc default n_body = replace_placemarkers_in_text(n_body, url, requested_output_format) return url, n_body, n_title def process_notification(n_object: NotificationContextData, datastore): from changedetectionio.jinja2_custom import render as jinja_render from . import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, default_notification_format, valid_notification_formats # be sure its registered from .apprise_plugin.custom_handlers import apprise_http_custom_handler # Register custom Discord plugin from .apprise_plugin.discord import NotifyDiscordCustom if not isinstance(n_object, NotificationContextData): raise TypeError(f"Expected NotificationContextData, got {type(n_object)}") now = time.time() if n_object.get('notification_timestamp'): logger.trace(f"Time since queued {now-n_object['notification_timestamp']:.3f}s") # Insert variables into the notification content notification_parameters = create_notification_parameters(n_object, datastore) requested_output_format = n_object.get('notification_format', default_notification_format) logger.debug(f"Requested notification output format: '{requested_output_format}'") # If we arrived with 'System default' then look it up if requested_output_format == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: # Initially text or whatever requested_output_format = datastore.data['settings']['application'].get('notification_format', default_notification_format) requested_output_format_original = requested_output_format # Now clean it up so it fits perfectly with apprise requested_output_format = notification_format_align_with_apprise(n_format=requested_output_format) logger.trace(f"Complete notification body including Jinja and placeholders calculated in {time.time() - now:.2f}s") # https://github.com/caronc/apprise/wiki/Development_LogCapture # Anything higher than or equal to WARNING (which covers things like Connection errors) # raise it as an exception sent_objs = [] if 'as_async' in n_object: apprise_asset.async_mode = n_object.get('as_async') apobj = apprise.Apprise(debug=True, asset=apprise_asset) # Override Apprise's built-in Discord plugin with our custom one # This allows us to use colored embeds for diff content # First remove the built-in discord plugin, then add our custom one apprise.plugins.N_MGR.remove('discord') apprise.plugins.N_MGR.add(NotifyDiscordCustom, schemas='discord') if not n_object.get('notification_urls'): return None n_object.update(add_rendered_diff_to_notification_vars( notification_scan_text=n_object.get('notification_body', '')+n_object.get('notification_title', ''), current_snapshot=n_object.get('current_snapshot'), prev_snapshot=n_object.get('prev_snapshot'), # Should always be false for 'text' mode or its too hard to read # But otherwise, this could be some setting word_diff=False if requested_output_format_original == 'text' else True, ) ) with (apprise.LogCapture(level=apprise.logging.DEBUG) as logs): for url in n_object['notification_urls']: n_body = jinja_render(template_str=n_object.get('notification_body', ''), **notification_parameters) n_title = jinja_render(template_str=n_object.get('notification_title', ''), **notification_parameters) if n_object.get('markup_text_links_to_html_links'): n_body = markup_text_links_to_html(body=n_body) url = url.strip() if not url or url.startswith('#'): logger.debug(f"Skipping commented out or empty notification URL - '{url}'") continue logger.info(f">> Process Notification: AppRise start notifying '{url}'") url = jinja_render(template_str=url, **notification_parameters) # If it's a plaintext document, and they want HTML type email/alerts, so it needs to be escaped watch_mime_type = n_object.get('watch_mime_type') if watch_mime_type and 'text/' in watch_mime_type.lower() and not 'html' in watch_mime_type.lower(): if 'html' in requested_output_format: from markupsafe import escape n_body = str(escape(n_body)) if 'html' in requested_output_format: # Since the n_body is always some kind of text from the 'diff' engine, attempt to preserve whitespaces that get sent to the HTML output # But only where its more than 1 consecutive whitespace, otherwise "and this" becomes "and this" etc which is too much. n_body = n_body.replace(' ', '  ') (url, n_body, n_title) = apply_service_tweaks(url=url, n_body=n_body, n_title=n_title, requested_output_format=requested_output_format_original) apprise_input_format = "NO-THANKS-WE-WILL-MANAGE-ALL-OF-THIS" if not 'format=' in url: parsed_url = urlparse(url) prefix_add_to_url = '?' if not parsed_url.query else '&' # THIS IS THE TRICK HOW TO DISABLE APPRISE DOING WEIRD AUTO-CONVERSION WITH BREAKING BR TAGS ETC if 'html' in requested_output_format: url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}" apprise_input_format = NotifyFormat.HTML.value elif 'text' in requested_output_format: url = f"{url}{prefix_add_to_url}format={NotifyFormat.TEXT.value}" apprise_input_format = NotifyFormat.TEXT.value elif requested_output_format == NotifyFormat.MARKDOWN.value: # Convert markdown to HTML ourselves since not all plugins do this from apprise.conversion import markdown_to_html # Make sure there are paragraph breaks around horizontal rules n_body = n_body.replace('---', '\n\n---\n\n') n_body = markdown_to_html(n_body) url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}" requested_output_format = NotifyFormat.HTML.value apprise_input_format = NotifyFormat.HTML.value # Changed from MARKDOWN to HTML else: # ?format was IN the apprise URL, they are kind of on their own here, we will try our best if 'format=html' in url: n_body = newline_re.sub('
\r\n', n_body) # This will also prevent apprise from doing conversion apprise_input_format = NotifyFormat.HTML.value requested_output_format = NotifyFormat.HTML.value elif 'format=text' in url: apprise_input_format = NotifyFormat.TEXT.value requested_output_format = NotifyFormat.TEXT.value #@todo on null:// (only if its a 1 url with null) probably doesnt need to actually .add/setup/etc sent_objs.append({'title': n_title, 'body': n_body, 'url': url, # So that we can do a null:// call and get back exactly what would have been sent 'original_context': n_object }) if not url.startswith('null://'): apobj.add(url) # Since the output is always based on the plaintext of the 'diff' engine, wrap it nicely. # It should always be similar to the 'history' part of the UI. if url.startswith('mail') and 'html' in requested_output_format: if not ' Tag settings > Global settings """ from changedetectionio.notification import ( USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, default_notification_body, default_notification_title ) # Would be better if this was some kind of Object where Watch can reference the parent datastore etc v = watch.get(var_name) if v and not watch.get('notification_muted'): if var_name == 'notification_format' and v == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: return datastore.data['settings']['application'].get('notification_format') return v tags = datastore.get_all_tags_for_watch(uuid=watch.get('uuid')) if tags: for tag_uuid, tag in tags.items(): v = tag.get(var_name) if v and not tag.get('notification_muted'): return v if datastore.data['settings']['application'].get(var_name): return datastore.data['settings']['application'].get(var_name) # Otherwise could be defaults if var_name == 'notification_format': return USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH if var_name == 'notification_body': return default_notification_body if var_name == 'notification_title': return default_notification_title return None class FormattableTimestamp(str): """ A str subclass representing a formatted datetime. As a plain string it renders with the default format, but can also be called with a custom format argument in Jinja2 templates: {{ change_datetime }} → '2024-01-15 10:30:00 UTC' {{ change_datetime(format='%Y') }} → '2024' {{ change_datetime(format='%A') }} → 'Monday' {{ change_datetime(format='%Y-%m-%d') }} → '2024-01-15' Being a str subclass means it is natively JSON serializable. """ _DEFAULT_FORMAT = '%Y-%m-%d %H:%M:%S %Z' def __new__(cls, timestamp): dt = datetime.datetime.fromtimestamp(int(timestamp), tz=pytz.UTC) local_tz = datetime.datetime.now().astimezone().tzinfo dt_local = dt.astimezone(local_tz) try: formatted = dt_local.strftime(cls._DEFAULT_FORMAT) except Exception: formatted = dt_local.isoformat() instance = super().__new__(cls, formatted) instance._dt = dt_local return instance def __call__(self, format=_DEFAULT_FORMAT): try: return self._dt.strftime(format) except Exception: return self._dt.isoformat() class FormattableDiff(str): """ A str subclass representing a rendered diff. As a plain string it renders with the default options for that variant, but can be called with custom arguments in Jinja2 templates: {{ diff }} → default diff output {{ diff(lines=5) }} → truncate to 5 lines {{ diff(added_only=true) }} → only show added lines {{ diff(removed_only=true) }} → only show removed lines {{ diff(context=3) }} → 3 lines of context around changes {{ diff(word_diff=false) }} → line-level diff instead of word-level {{ diff(lines=10, added_only=true) }} → combine args {{ diff_added(lines=5) }} → works on any diff_* variant too Being a str subclass means it is natively JSON serializable. """ def __new__(cls, prev_snapshot, current_snapshot, **base_kwargs): if prev_snapshot or current_snapshot: from changedetectionio import diff as diff_module rendered = diff_module.render_diff(prev_snapshot, current_snapshot, **base_kwargs) else: rendered = '' instance = super().__new__(cls, rendered) instance._prev = prev_snapshot instance._current = current_snapshot instance._base_kwargs = base_kwargs return instance def __call__(self, lines=None, added_only=False, removed_only=False, context=0, word_diff=None, case_insensitive=False, ignore_junk=False): from changedetectionio import diff as diff_module kwargs = dict(self._base_kwargs) if added_only: kwargs['include_removed'] = False if removed_only: kwargs['include_added'] = False if context: kwargs['context_lines'] = int(context) if word_diff is not None: kwargs['word_diff'] = bool(word_diff) if case_insensitive: kwargs['case_insensitive'] = True if ignore_junk: kwargs['ignore_junk'] = True result = diff_module.render_diff(self._prev or '', self._current or '', **kwargs) if lines is not None: result = '\n'.join(result.splitlines()[:int(lines)]) return result # What is passed around as notification context, also used as the complete list of valid {{ tokens }} class NotificationContextData(dict): def __init__(self, initial_data=None, **kwargs): # ValidateJinja2Template() validates against the keynames of this dict to check for valid tokens in the body (user submission) super().__init__({ 'base_url': None, 'change_datetime': FormattableTimestamp(time.time()), 'current_snapshot': None, 'diff': FormattableDiff('', ''), 'diff_clean': FormattableDiff('', '', include_change_type_prefix=False), 'diff_added': FormattableDiff('', '', include_removed=False), 'diff_added_clean': FormattableDiff('', '', include_removed=False, include_change_type_prefix=False), 'diff_full': FormattableDiff('', '', include_equal=True), 'diff_full_clean': FormattableDiff('', '', include_equal=True, include_change_type_prefix=False), 'diff_patch': FormattableDiff('', '', patch_format=True), 'diff_removed': FormattableDiff('', '', include_added=False), 'diff_removed_clean': FormattableDiff('', '', include_added=False, include_change_type_prefix=False), 'diff_url': None, 'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen 'notification_timestamp': time.time(), 'prev_snapshot': None, 'preview_url': None, 'screenshot': None, 'timestamp_from': None, 'timestamp_to': None, 'triggered_text': None, 'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters 'watch_mime_type': None, 'watch_tag': None, 'watch_title': None, 'watch_url': 'https://WATCH-PLACE-HOLDER/', 'watch_uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters }) # Apply any initial data passed in self.update({'watch_uuid': self.get('uuid')}) if initial_data: self.update(initial_data) # Apply any keyword arguments if kwargs: self.update(kwargs) n_format = self.get('notification_format') if n_format and not valid_notification_formats.get(n_format): raise ValueError(f'Invalid notification format: "{n_format}"') def set_random_for_validation(self): import random, string """Randomly fills all dict keys with random strings (for validation/testing). So we can test the output in the notification body """ for key in self.keys(): if key in ['uuid', 'time', 'watch_uuid', 'change_datetime'] or key.startswith('diff'): continue rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12)) self[key] = rand_str def __setitem__(self, key, value): if key == 'notification_format' and isinstance(value, str) and not value.startswith('RANDOM-PLACEHOLDER-'): if not valid_notification_formats.get(value): raise ValueError(f'Invalid notification format: "{value}"') super().__setitem__(key, value) def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snapshot:str, current_snapshot:str, word_diff:bool): """ Efficiently renders only the diff placeholders that are actually used in the notification text. Scans the notification template for diff placeholder usage (diff, diff_added, diff_clean, etc.) and only renders those specific variants, avoiding expensive render_diff() calls for unused placeholders. Uses LRU caching to avoid duplicate renders when multiple placeholders share the same arguments. Args: notification_scan_text: The notification template text to scan for placeholders prev_snapshot: Previous version of content for diff comparison current_snapshot: Current version of content for diff comparison word_diff: Whether to use word-level (True) or line-level (False) diffing Returns: dict: Only the diff placeholders that were found in notification_scan_text, with rendered content """ import re now = time.time() # Define base kwargs for each diff variant — these become the stored defaults # on the FormattableDiff object, so {{ diff(lines=5) }} overrides on top of them diff_specs = { 'diff': {'word_diff': word_diff}, 'diff_clean': {'word_diff': word_diff, 'include_change_type_prefix': False}, 'diff_added': {'word_diff': word_diff, 'include_removed': False}, 'diff_added_clean': {'word_diff': word_diff, 'include_removed': False, 'include_change_type_prefix': False}, 'diff_full': {'word_diff': word_diff, 'include_equal': True}, 'diff_full_clean': {'word_diff': word_diff, 'include_equal': True, 'include_change_type_prefix': False}, 'diff_patch': {'word_diff': word_diff, 'patch_format': True}, 'diff_removed': {'word_diff': word_diff, 'include_added': False}, 'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False}, } ret = {} rendered_count = 0 # Only create FormattableDiff objects for diff keys actually used in the notification text for key in NotificationContextData().keys(): if key.startswith('diff') and key in diff_specs: # Check if this placeholder is actually used in the notification text pattern = rf"(? etc depending on the notification format if watch: n_object.update(watch.extra_notification_token_values()) return n_object class NotificationService: """ Standalone notification service that handles all notification functionality previously embedded in the update_worker class """ def __init__(self, datastore, notification_q): self.datastore = datastore self.notification_q = notification_q def queue_notification_for_watch(self, n_object: NotificationContextData, watch, date_index_from=-2, date_index_to=-1): """ Queue a notification for a watch with full diff rendering and template variables """ from changedetectionio.notification import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH if not isinstance(n_object, NotificationContextData): raise TypeError(f"Expected NotificationContextData, got {type(n_object)}") dates = [] trigger_text = '' if watch: watch_history = watch.history dates = list(watch_history.keys()) trigger_text = watch.get('trigger_text', []) # Add text that was triggered if len(dates): snapshot_contents = watch.get_history_snapshot(timestamp=dates[-1]) else: snapshot_contents = "No snapshot/history available, the watch should fetch atleast once." # If we ended up here with "System default" if n_object.get('notification_format') == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: n_object['notification_format'] = self.datastore.data['settings']['application'].get('notification_format') triggered_text = '' if len(trigger_text): from . import html_tools triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text) if triggered_text: triggered_text = '\n'.join(triggered_text) # Could be called as a 'test notification' with only 1 snapshot available prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n" current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples" if len(dates) > 1: prev_snapshot = watch.get_history_snapshot(timestamp=dates[date_index_from]) current_snapshot = watch.get_history_snapshot(timestamp=dates[date_index_to]) n_object.update(set_basic_notification_vars(current_snapshot=current_snapshot, prev_snapshot=prev_snapshot, watch=watch, triggered_text=triggered_text, timestamp_changed=dates[date_index_to])) if self.notification_q: logger.debug("Queued notification for sending") self.notification_q.put(n_object) else: logger.debug("Not queued, no queue defined. Just returning processed data") return n_object def send_content_changed_notification(self, watch_uuid): """ Send notification when content changes are detected """ n_object = NotificationContextData() watch = self.datastore.data['watching'].get(watch_uuid) if not watch: return watch_history = watch.history dates = list(watch_history.keys()) # Theoretically it's possible that this could be just 1 long, # - In the case that the timestamp key was not unique if len(dates) == 1: raise ValueError( "History index had 2 or more, but only 1 date loaded, timestamps were not unique? maybe two of the same timestamps got written, needs more delay?" ) # Should be a better parent getter in the model object # Prefer - Individual watch settings > Tag settings > Global settings (in that order) # this change probably not needed? n_object['notification_urls'] = _check_cascading_vars(self.datastore, 'notification_urls', watch) n_object['notification_title'] = _check_cascading_vars(self.datastore,'notification_title', watch) n_object['notification_body'] = _check_cascading_vars(self.datastore,'notification_body', watch) n_object['notification_format'] = _check_cascading_vars(self.datastore,'notification_format', watch) # (Individual watch) Only prepare to notify if the rules above matched queued = False if n_object and n_object.get('notification_urls'): queued = True count = watch.get('notification_alert_count', 0) + 1 self.datastore.update_watch(uuid=watch_uuid, update_obj={'notification_alert_count': count}) self.queue_notification_for_watch(n_object=n_object, watch=watch) return queued def send_filter_failure_notification(self, watch_uuid): """ Send notification when CSS/XPath filters fail consecutively """ threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts') watch = self.datastore.data['watching'].get(watch_uuid) if not watch: return filter_list = ", ".join(watch['include_filters']) # @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed body = f"""Hello, Your configured CSS/xPath filters of '{filter_list}' for {{{{watch_url}}}} did not appear on the page after {threshold} attempts. It's possible the page changed layout and the filter needs updating ( Try the 'Visual Selector' tab ) Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}} Thanks - Your omniscient changedetection.io installation. """ n_object = NotificationContextData({ 'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page', 'notification_body': body, 'notification_format': _check_cascading_vars(self.datastore, 'notification_format', watch), }) n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html') if len(watch['notification_urls']): n_object['notification_urls'] = watch['notification_urls'] elif len(self.datastore.data['settings']['application']['notification_urls']): n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls'] # Only prepare to notify if the rules above matched if 'notification_urls' in n_object: n_object.update({ 'watch_url': watch['url'], 'uuid': watch_uuid, 'screenshot': None }) self.notification_q.put(n_object) logger.debug(f"Sent filter not found notification for {watch_uuid}") else: logger.debug(f"NOT sending filter not found notification for {watch_uuid} - no notification URLs") def send_step_failure_notification(self, watch_uuid, step_n): """ Send notification when browser steps fail consecutively """ watch = self.datastore.data['watching'].get(watch_uuid, False) if not watch: return threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts') step = step_n + 1 # @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed # {{{{ }}}} because this will be Jinja2 {{ }} tokens body = f"""Hello, Your configured browser step at position {step} for the web page watch {{{{watch_url}}}} did not appear on the page after {threshold} attempts, did the page change layout? The element may have moved and needs editing, or does it need a delay added? Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}} Thanks - Your omniscient changedetection.io installation. """ n_object = NotificationContextData({ 'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run", 'notification_body': body, 'notification_format': self._check_cascading_vars('notification_format', watch), }) n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html') if len(watch['notification_urls']): n_object['notification_urls'] = watch['notification_urls'] elif len(self.datastore.data['settings']['application']['notification_urls']): n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls'] # Only prepare to notify if the rules above matched if 'notification_urls' in n_object: n_object.update({ 'watch_url': watch['url'], 'uuid': watch_uuid }) self.notification_q.put(n_object) logger.error(f"Sent step not found notification for {watch_uuid}") # Convenience functions for creating notification service instances def create_notification_service(datastore, notification_q): """ Factory function to create a NotificationService instance """ return NotificationService(datastore, notification_q) ================================================ FILE: changedetectionio/pluggy_interface.py ================================================ import pluggy import os import importlib import sys from loguru import logger # Global plugin namespace for changedetection.io PLUGIN_NAMESPACE = "changedetectionio" hookspec = pluggy.HookspecMarker(PLUGIN_NAMESPACE) hookimpl = pluggy.HookimplMarker(PLUGIN_NAMESPACE) class ChangeDetectionSpec: """Hook specifications for extending changedetection.io functionality.""" @hookspec def ui_edit_stats_extras(watch): """Return HTML content to add to the stats tab in the edit view. Args: watch: The watch object being edited Returns: str: HTML content to be inserted in the stats tab """ pass @hookspec def register_content_fetcher(self): """Return a tuple of (fetcher_name, fetcher_class) for content fetcher plugins. The fetcher_name should start with 'html_' and the fetcher_class should inherit from changedetectionio.content_fetchers.base.Fetcher Returns: tuple: (str: fetcher_name, class: fetcher_class) """ pass @hookspec def fetcher_status_icon(fetcher_name): """Return status icon HTML attributes for a content fetcher. Args: fetcher_name: The name of the fetcher (e.g., 'html_webdriver', 'html_js_zyte') Returns: str: HTML string containing tags or other status icon elements Empty string if no custom status icon is needed """ pass @hookspec def plugin_static_path(self): """Return the path to the plugin's static files directory. Returns: str: Absolute path to the plugin's static directory, or None if no static files """ pass @hookspec def get_itemprop_availability_override(self, content, fetcher_name, fetcher_instance, url): """Provide custom implementation of get_itemprop_availability for a specific fetcher. This hook allows plugins to provide their own product availability detection when their fetcher is being used. This is called as a fallback when the built-in method doesn't find good data. Args: content: The HTML/text content to parse fetcher_name: The name of the fetcher being used (e.g., 'html_js_zyte') fetcher_instance: The fetcher instance that generated the content url: The URL being watched/checked Returns: dict or None: Dictionary with availability data: { 'price': float or None, 'availability': str or None, # e.g., 'in stock', 'out of stock' 'currency': str or None, # e.g., 'USD', 'EUR' } Or None if this plugin doesn't handle this fetcher or couldn't extract data """ pass @hookspec def plugin_settings_tab(self): """Return settings tab information for this plugin. This hook allows plugins to add their own settings tab to the settings page. Settings will be saved to a separate JSON file in the datastore directory. Returns: dict or None: Dictionary with settings tab information: { 'plugin_id': str, # Unique identifier (e.g., 'zyte_fetcher') 'tab_label': str, # Display name for tab (e.g., 'Zyte Fetcher') 'form_class': Form, # WTForms Form class for the settings 'template_path': str, # Optional: path to Jinja2 template (relative to plugin) # If not provided, a default form renderer will be used } Or None if this plugin doesn't provide settings """ pass @hookspec def register_processor(self): """Register an external processor plugin. External packages can implement this hook to register custom processors that will be discovered alongside built-in processors. Returns: dict or None: Dictionary with processor information: { 'processor_name': str, # Machine name (e.g., 'osint_recon') 'processor_module': module, # Module containing processor.py 'processor_class': class, # The perform_site_check class 'metadata': { # Optional metadata 'name': str, # Display name 'description': str, # Description 'processor_weight': int,# Sort weight (lower = higher priority) 'list_badge_text': str, # Badge text for UI } } Return None if this plugin doesn't provide a processor """ pass @hookspec def update_handler_alter(update_handler, watch, datastore): """Modify or wrap the update_handler before it processes a watch. This hook is called after the update_handler (perform_site_check instance) is created but before it calls call_browser() and run_changedetection(). Plugins can use this to: - Wrap the handler to add logging/metrics - Modify handler configuration - Add custom preprocessing logic Args: update_handler: The perform_site_check instance that will process the watch watch: The watch dict being processed datastore: The application datastore Returns: object or None: Return a modified/wrapped handler, or None to keep the original. If multiple plugins return handlers, they are chained in registration order. """ pass @hookspec def update_finalize(update_handler, watch, datastore, processing_exception): """Called after watch processing completes (success or failure). This hook is called in the finally block after all processing is complete, allowing plugins to perform cleanup, update metrics, or log final status. The plugin can access update_handler.last_logging_insert_id if it was stored during update_handler_alter, and use processing_exception to determine if the processing succeeded or failed. Args: update_handler: The perform_site_check instance (may be None if creation failed) watch: The watch dict that was processed (may be None if not loaded) datastore: The application datastore processing_exception: The exception from the main processing block, or None if successful. This does NOT include cleanup exceptions - only exceptions from the actual watch processing (fetch, diff, etc). Returns: None: This hook doesn't return a value """ pass # Set up Plugin Manager plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE) # Register hookspecs plugin_manager.add_hookspecs(ChangeDetectionSpec) # Load plugins from subdirectories def load_plugins_from_directories(): # Dictionary of directories to scan for plugins plugin_dirs = { 'conditions': os.path.join(os.path.dirname(__file__), 'conditions', 'plugins'), # Add more plugin directories here as needed } # Note: Removed the direct import of example_word_count_plugin as it's now in the conditions/plugins directory for dir_name, dir_path in plugin_dirs.items(): if not os.path.exists(dir_path): continue # Get all Python files (excluding __init__.py) for filename in os.listdir(dir_path): if filename.endswith(".py") and filename != "__init__.py": module_name = filename[:-3] # Remove .py extension module_path = f"changedetectionio.{dir_name}.plugins.{module_name}" try: module = importlib.import_module(module_path) # Register the plugin with pluggy plugin_manager.register(module, module_name) except (ImportError, AttributeError) as e: print(f"Error loading plugin {module_name}: {e}") # Load plugins load_plugins_from_directories() # Discover installed plugins from external packages (if any) plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE) # Function to inject datastore into plugins that need it def inject_datastore_into_plugins(datastore): """Inject the global datastore into plugins that need access to settings. This should be called after plugins are loaded and datastore is initialized. Args: datastore: The global ChangeDetectionStore instance """ for plugin_name, plugin_obj in plugin_manager.list_name_plugin(): # Check if plugin has datastore attribute and it's not set if hasattr(plugin_obj, 'datastore'): if plugin_obj.datastore is None: plugin_obj.datastore = datastore logger.debug(f"Injected datastore into plugin: {plugin_name}") # Function to register built-in fetchers - called later from content_fetchers/__init__.py def register_builtin_fetchers(): """Register built-in content fetchers as internal plugins This is called from content_fetchers/__init__.py after all fetchers are imported to avoid circular import issues. """ from changedetectionio.content_fetchers import requests, playwright, puppeteer, webdriver_selenium # Register each built-in fetcher plugin if hasattr(requests, 'requests_plugin'): plugin_manager.register(requests.requests_plugin, 'builtin_requests') if hasattr(playwright, 'playwright_plugin'): plugin_manager.register(playwright.playwright_plugin, 'builtin_playwright') if hasattr(puppeteer, 'puppeteer_plugin'): plugin_manager.register(puppeteer.puppeteer_plugin, 'builtin_puppeteer') if hasattr(webdriver_selenium, 'webdriver_selenium_plugin'): plugin_manager.register(webdriver_selenium.webdriver_selenium_plugin, 'builtin_webdriver_selenium') # Helper function to collect UI stats extras from all plugins def collect_ui_edit_stats_extras(watch): """Collect and combine HTML content from all plugins that implement ui_edit_stats_extras""" extras_content = [] # Get all plugins that implement the ui_edit_stats_extras hook results = plugin_manager.hook.ui_edit_stats_extras(watch=watch) # If we have results, add them to our content if results: for result in results: if result: # Skip empty results extras_content.append(result) return "\n".join(extras_content) if extras_content else "" def collect_fetcher_status_icons(fetcher_name): """Collect status icon data from all plugins Args: fetcher_name: The name of the fetcher (e.g., 'html_webdriver', 'html_js_zyte') Returns: dict or None: Icon data dictionary from first matching plugin, or None """ # Get status icon data from plugins results = plugin_manager.hook.fetcher_status_icon(fetcher_name=fetcher_name) # Return first non-None result if results: for result in results: if result and isinstance(result, dict): return result return None def get_itemprop_availability_from_plugin(content, fetcher_name, fetcher_instance, url): """Get itemprop availability data from plugins as a fallback. This is called when the built-in get_itemprop_availability doesn't find good data. Args: content: The HTML/text content to parse fetcher_name: The name of the fetcher being used (e.g., 'html_js_zyte') fetcher_instance: The fetcher instance that generated the content url: The URL being watched (watch.link - includes Jinja2 evaluation) Returns: dict or None: Availability data dictionary from first matching plugin, or None """ # Get availability data from plugins results = plugin_manager.hook.get_itemprop_availability_override( content=content, fetcher_name=fetcher_name, fetcher_instance=fetcher_instance, url=url ) # Return first non-None result with actual data if results: for result in results: if result and isinstance(result, dict): # Check if the result has any meaningful data if result.get('price') is not None or result.get('availability'): return result return None def get_active_plugins(): """Get a list of active plugins with their descriptions. Returns: list: List of dictionaries with plugin information: [ {'name': 'plugin_name', 'description': 'Plugin description'}, ... ] """ active_plugins = [] # Get all registered plugins for plugin_name, plugin_obj in plugin_manager.list_name_plugin(): # Skip built-in plugins (they start with 'builtin_') if plugin_name.startswith('builtin_'): continue # Get plugin description if available description = None if hasattr(plugin_obj, '__doc__') and plugin_obj.__doc__: description = plugin_obj.__doc__.strip().split('\n')[0] # First line only elif hasattr(plugin_obj, 'description'): description = plugin_obj.description # Try to get a friendly name from the plugin friendly_name = plugin_name if hasattr(plugin_obj, 'name'): friendly_name = plugin_obj.name active_plugins.append({ 'name': friendly_name, 'description': description or 'No description available' }) return active_plugins def get_fetcher_capabilities(watch, datastore): """Get capability flags for a watch's fetcher. Args: watch: The watch object/dict datastore: The datastore to resolve 'system' fetcher Returns: dict: Dictionary with capability flags: { 'supports_browser_steps': bool, 'supports_screenshots': bool, 'supports_xpath_element_data': bool } """ # Get the fetcher name from watch fetcher_name = watch.get('fetch_backend', 'system') # Resolve 'system' to actual fetcher if fetcher_name == 'system': fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests') # Get the fetcher class from changedetectionio import content_fetchers # Try to get from built-in fetchers first if hasattr(content_fetchers, fetcher_name): fetcher_class = getattr(content_fetchers, fetcher_name) return { 'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False), 'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False), 'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False) } # Try to get from plugin-provided fetchers # Query all plugins for registered fetchers plugin_fetchers = plugin_manager.hook.register_content_fetcher() for fetcher_registration in plugin_fetchers: if fetcher_registration: name, fetcher_class = fetcher_registration if name == fetcher_name: return { 'supports_browser_steps': getattr(fetcher_class, 'supports_browser_steps', False), 'supports_screenshots': getattr(fetcher_class, 'supports_screenshots', False), 'supports_xpath_element_data': getattr(fetcher_class, 'supports_xpath_element_data', False) } # Default: no capabilities return { 'supports_browser_steps': False, 'supports_screenshots': False, 'supports_xpath_element_data': False } def get_plugin_settings_tabs(): """Get all plugin settings tabs. Returns: list: List of dictionaries with plugin settings tab information: [ { 'plugin_id': str, 'tab_label': str, 'form_class': Form, 'description': str }, ... ] """ tabs = [] results = plugin_manager.hook.plugin_settings_tab() for result in results: if result and isinstance(result, dict): # Validate required fields if 'plugin_id' in result and 'tab_label' in result and 'form_class' in result: tabs.append(result) else: logger.warning(f"Invalid plugin settings tab spec: {result}") return tabs def load_plugin_settings(datastore_path, plugin_id): """Load settings for a specific plugin from JSON file. Args: datastore_path: Path to the datastore directory plugin_id: Unique identifier for the plugin (e.g., 'zyte_fetcher') Returns: dict: Plugin settings, or empty dict if file doesn't exist """ import json settings_file = os.path.join(datastore_path, f"{plugin_id}.json") if not os.path.exists(settings_file): return {} try: with open(settings_file, 'r', encoding='utf-8') as f: return json.load(f) except Exception as e: logger.error(f"Failed to load settings for plugin '{plugin_id}': {e}") return {} def save_plugin_settings(datastore_path, plugin_id, settings): """Save settings for a specific plugin to JSON file. Args: datastore_path: Path to the datastore directory plugin_id: Unique identifier for the plugin (e.g., 'zyte_fetcher') settings: Dictionary of settings to save Returns: bool: True if save was successful, False otherwise """ import json settings_file = os.path.join(datastore_path, f"{plugin_id}.json") try: with open(settings_file, 'w', encoding='utf-8') as f: json.dump(settings, f, indent=2, ensure_ascii=False) logger.info(f"Saved settings for plugin '{plugin_id}' to {settings_file}") return True except Exception as e: logger.error(f"Failed to save settings for plugin '{plugin_id}': {e}") return False def get_plugin_template_paths(): """Get list of plugin template directories for Jinja2 loader. Scans both external pluggy plugins and built-in processor plugins. Returns: list: List of absolute paths to plugin template directories """ template_paths = [] # Add the base processors/templates directory (as absolute path) processors_templates_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'processors', 'templates') if os.path.isdir(processors_templates_dir): template_paths.append(processors_templates_dir) logger.debug(f"Added base processors template path: {processors_templates_dir}") # Scan built-in processor plugins from changedetectionio.processors import find_processors processor_list = find_processors() for processor_module, processor_name in processor_list: # Each processor is a module, check if it has a templates directory if hasattr(processor_module, '__file__'): processor_file = processor_module.__file__ if processor_file: # Get the processor directory (e.g., processors/image_ssim_diff/) processor_dir = os.path.dirname(os.path.abspath(processor_file)) templates_dir = os.path.join(processor_dir, 'templates') if os.path.isdir(templates_dir): template_paths.append(templates_dir) logger.debug(f"Added processor template path: {templates_dir}") # Get all registered external pluggy plugins for plugin_name, plugin_obj in plugin_manager.list_name_plugin(): # Check if plugin has a templates directory if hasattr(plugin_obj, '__file__'): plugin_file = plugin_obj.__file__ elif hasattr(plugin_obj, '__module__'): # Get the module file module = sys.modules.get(plugin_obj.__module__) if module and hasattr(module, '__file__'): plugin_file = module.__file__ else: continue else: continue if plugin_file: plugin_dir = os.path.dirname(os.path.abspath(plugin_file)) templates_dir = os.path.join(plugin_dir, 'templates') if os.path.isdir(templates_dir): template_paths.append(templates_dir) logger.debug(f"Added plugin template path: {templates_dir}") return template_paths def apply_update_handler_alter(update_handler, watch, datastore): """Apply update_handler_alter hooks from all plugins. Allows plugins to wrap or modify the update_handler before it processes a watch. Multiple plugins can chain modifications - each plugin receives the result from the previous plugin. Args: update_handler: The perform_site_check instance to potentially modify watch: The watch dict being processed datastore: The application datastore Returns: object: The (potentially modified/wrapped) update_handler """ # Get all plugins that implement the update_handler_alter hook results = plugin_manager.hook.update_handler_alter( update_handler=update_handler, watch=watch, datastore=datastore ) # Chain results - each plugin gets the result from the previous one current_handler = update_handler if results: for result in results: if result is not None: logger.debug(f"Plugin modified update_handler for watch {watch.get('uuid')}") current_handler = result return current_handler def apply_update_finalize(update_handler, watch, datastore, processing_exception): """Apply update_finalize hooks from all plugins. Called in the finally block after watch processing completes, allowing plugins to perform cleanup, update metrics, or log final status. Args: update_handler: The perform_site_check instance (may be None) watch: The watch dict that was processed (may be None) datastore: The application datastore processing_exception: The exception from processing, or None if successful Returns: None """ try: # Call all plugins that implement the update_finalize hook plugin_manager.hook.update_finalize( update_handler=update_handler, watch=watch, datastore=datastore, processing_exception=processing_exception ) except Exception as e: # Don't let plugin errors crash the worker logger.error(f"Error in update_finalize hook: {e}") logger.exception(f"update_finalize hook exception details:") ================================================ FILE: changedetectionio/processors/README.md ================================================ # Change detection post-processors The concept here is to be able to switch between different domain specific problems to solve. - `text_json_diff` The traditional text and JSON comparison handler - `restock_diff` Only cares about detecting if a product looks like it has some text that suggests that it's out of stock, otherwise assumes that it's in stock. Some suggestions for the future - `graphical` ## API schema extension (`api.yaml`) A processor can extend the Watch/Tag API schema by placing an `api.yaml` alongside its `__init__.py`. Define a `components.schemas.processor_config_` entry and it will be merged into `WatchBase` at startup, making `processor_config_` a valid field on all watch create/update API calls. The fully merged spec is served live at `/api/v1/full-spec`. See `restock_diff/api.yaml` for a working example. ## Todo - Make each processor return a extra list of sub-processed (so you could configure a single processor in different ways) - move restock_diff to its own pip/github repo ================================================ FILE: changedetectionio/processors/__init__.py ================================================ from functools import lru_cache from loguru import logger from flask_babel import gettext, get_locale import importlib import inspect import os import pkgutil def find_sub_packages(package_name): """ Find all sub-packages within the given package. :param package_name: The name of the base package to scan for sub-packages. :return: A list of sub-package names. """ package = importlib.import_module(package_name) return [name for _, name, is_pkg in pkgutil.iter_modules(package.__path__) if is_pkg] @lru_cache(maxsize=1) def find_processors(): """ Find all subclasses of DifferenceDetectionProcessor in the specified package. Results are cached to avoid repeated discovery. :param package_name: The name of the package to scan for processor modules. :return: A list of (module, class) tuples. """ package_name = "changedetectionio.processors" # Name of the current package/module processors = [] sub_packages = find_sub_packages(package_name) from changedetectionio.processors.base import difference_detection_processor for sub_package in sub_packages: module_name = f"{package_name}.{sub_package}.processor" try: module = importlib.import_module(module_name) # Iterate through all classes in the module for name, obj in inspect.getmembers(module, inspect.isclass): # Only register classes that are actually defined in this module (not imported) if (issubclass(obj, difference_detection_processor) and obj is not difference_detection_processor and obj.__module__ == module.__name__): processors.append((module, sub_package)) break # Only need one processor per module except (ModuleNotFoundError, ImportError) as e: logger.warning(f"Failed to import module {module_name}: {e} (find_processors())") # Discover plugin processors via pluggy try: from changedetectionio.pluggy_interface import plugin_manager plugin_results = plugin_manager.hook.register_processor() for result in plugin_results: if result and isinstance(result, dict): processor_module = result.get('processor_module') processor_name = result.get('processor_name') if processor_module and processor_name: processors.append((processor_module, processor_name)) plugin_path = getattr(processor_module, '__file__', 'unknown location') logger.info(f"Registered plugin processor: {processor_name} from {plugin_path}") except Exception as e: logger.warning(f"Error loading plugin processors: {e}") return processors def get_parent_module(module): module_name = module.__name__ if '.' not in module_name: return None # Top-level module has no parent parent_module_name = module_name.rsplit('.', 1)[0] try: return importlib.import_module(parent_module_name) except Exception as e: pass return False def get_custom_watch_obj_for_processor(processor_name): from changedetectionio.model import Watch watch_class = Watch.model processor_classes = find_processors() custom_watch_obj = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None) if custom_watch_obj: # Parent of .processor.py COULD have its own Watch implementation parent_module = get_parent_module(custom_watch_obj[0]) if hasattr(parent_module, 'Watch'): watch_class = parent_module.Watch return watch_class def find_processor_module(processor_name): """ Find the processor module by name. Args: processor_name: Processor machine name (e.g., 'image_ssim_diff') Returns: module: The processor's parent module, or None if not found """ processor_classes = find_processors() processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None) if processor_tuple: # Return the parent module (the package containing processor.py) return get_parent_module(processor_tuple[0]) return None def get_processor_module(processor_name): """ Get the actual processor module (with perform_site_check class) by name. Works for both built-in and plugin processors. Args: processor_name: Processor machine name (e.g., 'text_json_diff', 'osint_recon') Returns: module: The processor module containing perform_site_check, or None if not found """ processor_classes = find_processors() processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None) if processor_tuple: # Return the actual processor module (first element of tuple) return processor_tuple[0] return None def get_processor_submodule(processor_name, submodule_name): """ Get an optional submodule from a processor (e.g., 'difference', 'extract', 'preview'). Works for both built-in and plugin processors. Args: processor_name: Processor machine name (e.g., 'text_json_diff', 'osint_recon') submodule_name: Name of the submodule (e.g., 'difference', 'extract', 'preview') Returns: module: The submodule if it exists, or None if not found """ processor_classes = find_processors() processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None) if not processor_tuple: return None processor_module = processor_tuple[0] parent_module = get_parent_module(processor_module) if not parent_module: return None # Try to import the submodule try: # For built-in processors: changedetectionio.processors.text_json_diff.difference # For plugin processors: changedetectionio_osint.difference parent_module_name = parent_module.__name__ submodule_full_name = f"{parent_module_name}.{submodule_name}" return importlib.import_module(submodule_full_name) except (ModuleNotFoundError, ImportError): return None @lru_cache(maxsize=1) def get_plugin_processor_metadata(): """Get metadata from plugin processors.""" metadata = {} try: from changedetectionio.pluggy_interface import plugin_manager plugin_results = plugin_manager.hook.register_processor() for result in plugin_results: if result and isinstance(result, dict): processor_name = result.get('processor_name') meta = result.get('metadata', {}) if processor_name: metadata[processor_name] = meta except Exception as e: logger.warning(f"Error getting plugin processor metadata: {e}") return metadata @lru_cache(maxsize=32) def _available_processors_cached(locale_str): """ Internal cached function that includes locale in cache key. This ensures translations are cached per-language instead of globally. :param locale_str: The locale string (e.g., 'en', 'it', 'zh') :return: A list of tuples (processor_name, translated_description, weight) """ processor_classes = find_processors() # Check if DISABLED_PROCESSORS env var is set disabled_processors_env = os.getenv('DISABLED_PROCESSORS', 'image_ssim_diff').strip() disabled_processors = [] if disabled_processors_env: # Parse comma-separated list and strip whitespace disabled_processors = [p.strip() for p in disabled_processors_env.split(',') if p.strip()] logger.info(f"DISABLED_PROCESSORS set, disabling: {disabled_processors}") available = [] plugin_metadata = get_plugin_processor_metadata() for module, sub_package_name in processor_classes: # Skip disabled processors if sub_package_name in disabled_processors: logger.debug(f"Skipping processor '{sub_package_name}' (in DISABLED_PROCESSORS)") continue # Check if this is a plugin processor if sub_package_name in plugin_metadata: meta = plugin_metadata[sub_package_name] description = gettext(meta.get('name', sub_package_name)) # Plugin processors start from weight 10 to separate them from built-in processors weight = 100 + meta.get('processor_weight', 0) else: # Try to get the 'name' attribute from the processor module first if hasattr(module, 'name'): description = gettext(module.name) else: # Fall back to processor_description from parent module's __init__.py parent_module = get_parent_module(module) if parent_module and hasattr(parent_module, 'processor_description'): description = gettext(parent_module.processor_description) else: # Final fallback to a readable name description = sub_package_name.replace('_', ' ').title() # Get weight for sorting (lower weight = higher in list) weight = 0 # Default weight for processors without explicit weight # Check processor module itself first if hasattr(module, 'processor_weight'): weight = module.processor_weight else: # Fall back to parent module (package __init__.py) parent_module = get_parent_module(module) if parent_module and hasattr(parent_module, 'processor_weight'): weight = parent_module.processor_weight available.append((sub_package_name, description, weight)) # Sort by weight (lower weight = appears first) available.sort(key=lambda x: x[2]) # Return as tuples without weight (for backwards compatibility) return [(name, desc) for name, desc, weight in available] def available_processors(): """ Get a list of processors by name and description for the UI elements. Can be filtered via DISABLED_PROCESSORS environment variable (comma-separated list). This function delegates to a locale-aware cached version to ensure translations are cached per-language instead of globally. :return: A list of tuples (processor_name, translated_description) """ # Get current locale and use it as cache key # Convert Babel Locale object to string for use as cache key locale = get_locale() locale_str = str(locale) if locale else 'en' return _available_processors_cached(locale_str) def get_default_processor(): """ Get the default processor to use when none is specified. Returns the first available processor based on weight (lowest weight = highest priority). This ensures forms auto-select a valid processor even when DISABLED_PROCESSORS filters the list. :return: The processor name string (e.g., 'text_json_diff') """ available = available_processors() if available: return available[0][0] # Return the processor name from first tuple return 'text_json_diff' # Fallback if somehow no processors are available def get_processor_badge_texts(): """ Get a dictionary mapping processor names to their list_badge_text values. Translations are applied based on the current request locale. :return: A dict mapping processor name to badge text (e.g., {'text_json_diff': 'Text', 'restock_diff': 'Restock'}) """ processor_classes = find_processors() badge_texts = {} for module, sub_package_name in processor_classes: # Try to get the 'list_badge_text' attribute from the processor module if hasattr(module, 'list_badge_text'): badge_texts[sub_package_name] = gettext(module.list_badge_text) else: # Fall back to parent module's __init__.py parent_module = get_parent_module(module) if parent_module and hasattr(parent_module, 'list_badge_text'): badge_texts[sub_package_name] = gettext(parent_module.list_badge_text) return badge_texts def get_processor_descriptions(): """ Get a dictionary mapping processor names to their description/name values. Translations are applied based on the current request locale. :return: A dict mapping processor name to description (e.g., {'text_json_diff': 'Webpage Text/HTML, JSON and PDF changes'}) """ processor_classes = find_processors() descriptions = {} for module, sub_package_name in processor_classes: # Try to get the 'name' or 'description' attribute from the processor module first if hasattr(module, 'name'): descriptions[sub_package_name] = gettext(module.name) elif hasattr(module, 'description'): descriptions[sub_package_name] = gettext(module.description) else: # Fall back to parent module's __init__.py parent_module = get_parent_module(module) if parent_module and hasattr(parent_module, 'processor_description'): descriptions[sub_package_name] = gettext(parent_module.processor_description) elif parent_module and hasattr(parent_module, 'name'): descriptions[sub_package_name] = gettext(parent_module.name) else: # Final fallback to a readable name descriptions[sub_package_name] = sub_package_name.replace('_', ' ').title() return descriptions def generate_processor_badge_colors(processor_name): """ Generate consistent colors for a processor badge based on its name. Uses a hash of the processor name to generate pleasing, accessible colors for both light and dark modes. :param processor_name: The processor name (e.g., 'text_json_diff') :return: A dict with 'light' and 'dark' color schemes, each containing 'bg' and 'color' """ import hashlib # Generate a consistent hash from the processor name hash_obj = hashlib.md5(processor_name.encode('utf-8')) hash_int = int(hash_obj.hexdigest()[:8], 16) # Generate hue from hash (0-360) hue = hash_int % 360 # Light mode: pastel background with darker text light_saturation = 60 + (hash_int % 25) # 60-85% light_lightness = 85 + (hash_int % 10) # 85-95% - very light text_lightness = 25 + (hash_int % 15) # 25-40% - dark # Dark mode: solid, vibrant colors with white text dark_saturation = 55 + (hash_int % 20) # 55-75% dark_lightness = 45 + (hash_int % 15) # 45-60% return { 'light': { 'bg': f'hsl({hue}, {light_saturation}%, {light_lightness}%)', 'color': f'hsl({hue}, 50%, {text_lightness}%)' }, 'dark': { 'bg': f'hsl({hue}, {dark_saturation}%, {dark_lightness}%)', 'color': '#fff' } } @lru_cache(maxsize=1) def get_processor_badge_css(): """ Generate CSS for all processor badges with auto-generated colors. This creates CSS rules for both light and dark modes for each processor. :return: A string containing CSS rules for all processor badges """ processor_classes = find_processors() css_rules = [] for module, sub_package_name in processor_classes: colors = generate_processor_badge_colors(sub_package_name) # Light mode rule css_rules.append( f".processor-badge-{sub_package_name} {{\n" f" background-color: {colors['light']['bg']};\n" f" color: {colors['light']['color']};\n" f"}}" ) # Dark mode rule css_rules.append( f"html[data-darkmode=\"true\"] .processor-badge-{sub_package_name} {{\n" f" background-color: {colors['dark']['bg']};\n" f" color: {colors['dark']['color']};\n" f"}}" ) return '\n\n'.join(css_rules) def save_processor_config(datastore, watch_uuid, config_data): """ Save processor-specific configuration to JSON file. This is a shared helper function used by both the UI edit form and API endpoints to consistently handle processor configuration storage. Args: datastore: The application datastore instance watch_uuid: UUID of the watch config_data: Dictionary of configuration data to save (with processor_config_* prefix removed) Returns: bool: True if saved successfully, False otherwise """ if not config_data: return True try: from changedetectionio.processors.base import difference_detection_processor # Get processor name from watch watch = datastore.data['watching'].get(watch_uuid) if not watch: logger.error(f"Cannot save processor config: watch {watch_uuid} not found") return False processor_name = watch.get('processor', 'text_json_diff') # Create a processor instance to access config methods processor_instance = difference_detection_processor(datastore, watch_uuid) # Use processor name as filename so each processor keeps its own config config_filename = f'{processor_name}.json' processor_instance.update_extra_watch_config(config_filename, config_data) logger.debug(f"Saved processor config to {config_filename}: {config_data}") return True except Exception as e: logger.error(f"Failed to save processor config: {e}") return False def extract_processor_config_from_form_data(form_data): """ Extract processor_config_* fields from form data and return separate dicts. This is a shared helper function used by both the UI edit form and API endpoints to consistently handle processor configuration extraction. IMPORTANT: This function modifies form_data in-place by removing processor_config_* fields. Args: form_data: Dictionary of form data (will be modified in-place) Returns: dict: Dictionary of processor config data (with processor_config_* prefix removed) """ processor_config_data = {} # Use list() to create a copy of keys since we're modifying the dict for field_name in list(form_data.keys()): if field_name.startswith('processor_config_'): config_key = field_name.replace('processor_config_', '') # Save all values (including empty strings) to allow explicit clearing of settings processor_config_data[config_key] = form_data[field_name] # Remove from form_data to prevent it from reaching datastore del form_data[field_name] return processor_config_data ================================================ FILE: changedetectionio/processors/base.py ================================================ import asyncio import re import hashlib from changedetectionio.browser_steps.browser_steps import browser_steps_get_valid_steps from changedetectionio.content_fetchers.base import Fetcher from changedetectionio.strtobool import strtobool from changedetectionio.validate_url import is_private_hostname from copy import deepcopy from abc import abstractmethod import os from urllib.parse import urlparse from loguru import logger SCREENSHOT_FORMAT_JPEG = 'JPEG' SCREENSHOT_FORMAT_PNG = 'PNG' class difference_detection_processor(): browser_steps = None datastore = None fetcher = None screenshot = None watch = None xpath_data = None preferred_proxy = None screenshot_format = SCREENSHOT_FORMAT_JPEG last_raw_content_checksum = None def __init__(self, datastore, watch_uuid): self.datastore = datastore self.watch_uuid = watch_uuid # Create a stable snapshot of the watch for processing # Why deepcopy? # 1. Prevents "dict changed during iteration" errors if watch is modified during processing # 2. Preserves Watch object with properties (.link, .is_pdf, etc.) - can't use dict() # 3. Safe now: Watch.__deepcopy__() shares datastore ref (no memory leak) but copies dict data self.watch = deepcopy(self.datastore.data['watching'].get(watch_uuid)) # Generic fetcher that should be extended (requests, playwright etc) self.fetcher = Fetcher() # Load the last raw content checksum from file self.read_last_raw_content_checksum() def update_last_raw_content_checksum(self, checksum): """ Save the raw content MD5 checksum to file. This is used for skip logic - avoid reprocessing if raw HTML unchanged. """ if not checksum: return watch = self.datastore.data['watching'].get(self.watch_uuid) if not watch: return data_dir = watch.data_dir if not data_dir: return watch.ensure_data_dir_exists() checksum_file = os.path.join(data_dir, 'last-checksum.txt') try: with open(checksum_file, 'w', encoding='utf-8') as f: f.write(checksum) self.last_raw_content_checksum = checksum except IOError as e: logger.warning(f"Failed to write checksum file for {self.watch_uuid}: {e}") def read_last_raw_content_checksum(self): """ Read the last raw content MD5 checksum from file. Returns None if file doesn't exist (first run) or can't be read. """ watch = self.datastore.data['watching'].get(self.watch_uuid) if not watch: self.last_raw_content_checksum = None return data_dir = watch.data_dir if not data_dir: self.last_raw_content_checksum = None return checksum_file = os.path.join(data_dir, 'last-checksum.txt') if not os.path.isfile(checksum_file): self.last_raw_content_checksum = None return try: with open(checksum_file, 'r', encoding='utf-8') as f: self.last_raw_content_checksum = f.read().strip() except IOError as e: logger.warning(f"Failed to read checksum file for {self.watch_uuid}: {e}") self.last_raw_content_checksum = None async def validate_iana_url(self): """Pre-flight SSRF check — runs DNS lookup in executor to avoid blocking the event loop. Covers all fetchers (requests, playwright, puppeteer, plugins) since every fetch goes through call_browser(). """ if strtobool(os.getenv('ALLOW_IANA_RESTRICTED_ADDRESSES', 'false')): return parsed = urlparse(self.watch.link) if not parsed.hostname: return loop = asyncio.get_running_loop() if await loop.run_in_executor(None, is_private_hostname, parsed.hostname): raise Exception( f"Fetch blocked: '{self.watch.link}' resolves to a private/reserved IP address. " f"Set ALLOW_IANA_RESTRICTED_ADDRESSES=true to allow." ) async def call_browser(self, preferred_proxy_id=None): from requests.structures import CaseInsensitiveDict url = self.watch.link # Protect against file:, file:/, file:// access, check the real "link" without any meta "source:" etc prepended. if re.search(r'^file:', url.strip(), re.IGNORECASE): if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')): raise Exception( "file:// type access is denied for security reasons." ) await self.validate_iana_url() # Requests, playwright, other browser via wss:// etc, fetch_extra_something prefer_fetch_backend = self.watch.get('fetch_backend', 'system') # Proxy ID "key" preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch( uuid=self.watch.get('uuid')) # Pluggable content self.fetcher if not prefer_fetch_backend or prefer_fetch_backend == 'system': prefer_fetch_backend = self.datastore.data['settings']['application'].get('fetch_backend') # In the case that the preferred fetcher was a browser config with custom connection URL.. # @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..) custom_browser_connection_url = None if prefer_fetch_backend.startswith('extra_browser_'): (t, key) = prefer_fetch_backend.split('extra_browser_') connection = list( filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', []))) if connection: prefer_fetch_backend = 'html_webdriver' custom_browser_connection_url = connection[0].get('browser_connection_url') # PDF should be html_requests because playwright will serve it up (so far) in a embedded page # @todo https://github.com/dgtlmoon/changedetection.io/issues/2019 # @todo needs test to or a fix if self.watch.is_pdf: prefer_fetch_backend = "html_requests" # Grab the right kind of 'fetcher', (playwright, requests, etc) from changedetectionio import content_fetchers if hasattr(content_fetchers, prefer_fetch_backend): # @todo TEMPORARY HACK - SWITCH BACK TO PLAYWRIGHT FOR BROWSERSTEPS if prefer_fetch_backend == 'html_webdriver' and self.watch.has_browser_steps: # This is never supported in selenium anyway logger.warning( "Using playwright fetcher override for possible puppeteer request in browsersteps, because puppetteer:browser steps is incomplete.") from changedetectionio.content_fetchers.playwright import fetcher as playwright_fetcher fetcher_obj = playwright_fetcher else: fetcher_obj = getattr(content_fetchers, prefer_fetch_backend) else: # What it referenced doesnt exist, Just use a default fetcher_obj = getattr(content_fetchers, "html_requests") proxy_url = None if preferred_proxy_id: # Custom browser endpoints should NOT have a proxy added if not prefer_fetch_backend.startswith('extra_browser_'): proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url') logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}") else: logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ") logger.debug(f"Using proxy '{proxy_url}' for {self.watch['uuid']}") # Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need. # When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc) self.fetcher = fetcher_obj(proxy_override=proxy_url, custom_browser_connection_url=custom_browser_connection_url, screenshot_format=self.screenshot_format ) if self.watch.has_browser_steps: self.fetcher.browser_steps = browser_steps_get_valid_steps(self.watch.get('browser_steps', [])) self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid')) # Tweak the base config with the per-watch ones from changedetectionio.jinja2_custom import render as jinja_render request_headers = CaseInsensitiveDict() ua = self.datastore.data['settings']['requests'].get('default_ua') if ua and ua.get(prefer_fetch_backend): request_headers.update({'User-Agent': ua.get(prefer_fetch_backend)}) request_headers.update(self.watch.get('headers', {})) request_headers.update(self.datastore.get_all_base_headers()) request_headers.update(self.datastore.get_all_headers_in_textfile_for_watch(uuid=self.watch.get('uuid'))) # https://github.com/psf/requests/issues/4525 # Requests doesnt yet support brotli encoding, so don't put 'br' here, be totally sure that the user cannot # do this by accident. if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']: request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '') for header_name in request_headers: request_headers.update({header_name: jinja_render(template_str=request_headers.get(header_name))}) timeout = self.datastore.data['settings']['requests'].get('timeout') request_body = self.watch.get('body') if request_body: request_body = jinja_render(template_str=self.watch.get('body')) request_method = self.watch.get('method') ignore_status_codes = self.watch.get('ignore_status_codes', False) # Configurable per-watch or global extra delay before extracting text (for webDriver types) system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None) if self.watch.get('webdriver_delay'): self.fetcher.render_extract_delay = self.watch.get('webdriver_delay') elif system_webdriver_delay is not None: self.fetcher.render_extract_delay = system_webdriver_delay if self.watch.get('webdriver_js_execute_code') is not None and self.watch.get('webdriver_js_execute_code').strip(): self.fetcher.webdriver_js_execute_code = self.watch.get('webdriver_js_execute_code') # Requests for PDF's, images etc should be passwd the is_binary flag is_binary = self.watch.is_pdf # And here we go! call the right browser with browser-specific settings empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) # All fetchers are now async await self.fetcher.run( current_include_filters=self.watch.get('include_filters'), empty_pages_are_a_change=empty_pages_are_a_change, fetch_favicon=self.watch.favicon_is_expired(), ignore_status_codes=ignore_status_codes, is_binary=is_binary, request_body=request_body, request_headers=request_headers, request_method=request_method, screenshot_format=self.screenshot_format, timeout=timeout, url=url, watch_uuid=self.watch_uuid, ) # @todo .quit here could go on close object, so we can run JS if change-detected await self.fetcher.quit(watch=self.watch) # Sanitize lone surrogates - these can appear when servers return malformed/mixed-encoding # content that gets decoded into surrogate characters (e.g. \udcad). Without this, # encode('utf-8') raises UnicodeEncodeError downstream in checksums, diffs, file writes, etc. # Covers all fetchers (requests, playwright, puppeteer, selenium) in one place. # Also note: By this point we SHOULD know the original encoding so it can safely convert to utf-8 for the rest of the app. # See: https://github.com/dgtlmoon/changedetection.io/issues/3952 if self.fetcher.content and isinstance(self.fetcher.content, str): self.fetcher.content = self.fetcher.content.encode('utf-8', errors='replace').decode('utf-8') # After init, call run_changedetection() which will do the actual change-detection def get_extra_watch_config(self, filename): """ Read processor-specific JSON config file from watch data directory. Args: filename: Name of JSON file (e.g., "visual_ssim_score.json") Returns: dict: Parsed JSON data, or empty dict if file doesn't exist """ import json import os watch = self.datastore.data['watching'].get(self.watch_uuid) data_dir = watch.data_dir if not data_dir: return {} filepath = os.path.join(data_dir, filename) if not os.path.isfile(filepath): return {} try: with open(filepath, 'r', encoding='utf-8') as f: return json.load(f) except (json.JSONDecodeError, IOError) as e: logger.warning(f"Failed to read extra watch config {filename}: {e}") return {} def update_extra_watch_config(self, filename, data, merge=True): """ Write processor-specific JSON config file to watch data directory. Args: filename: Name of JSON file (e.g., "visual_ssim_score.json") data: Dictionary to serialize as JSON merge: If True, merge with existing data; if False, overwrite completely """ import json import os watch = self.datastore.data['watching'].get(self.watch_uuid) data_dir = watch.data_dir if not data_dir: logger.warning(f"Cannot save extra watch config {filename}: no data_dir") return # Ensure directory exists watch.ensure_data_dir_exists() filepath = os.path.join(data_dir, filename) try: # If merge is enabled, read existing data first existing_data = {} if merge and os.path.isfile(filepath): try: with open(filepath, 'r', encoding='utf-8') as f: existing_data = json.load(f) except (json.JSONDecodeError, IOError) as e: logger.warning(f"Failed to read existing config for merge: {e}") # Merge new data with existing if merge: existing_data.update(data) data_to_save = existing_data else: data_to_save = data # Write the data with open(filepath, 'w', encoding='utf-8') as f: json.dump(data_to_save, f, indent=2) except IOError as e: logger.error(f"Failed to write extra watch config {filename}: {e}") def get_raw_document_checksum(self): checksum = None if self.fetcher.content: checksum = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest() return checksum @abstractmethod def run_changedetection(self, watch, force_reprocess=False): update_obj = {'last_notification_error': False, 'last_error': False} some_data = 'xxxxx' update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest() changed_detected = False return changed_detected, update_obj, ''.encode('utf-8') ================================================ FILE: changedetectionio/processors/exceptions.py ================================================ class ProcessorException(Exception): def __init__(self, message=None, status_code=None, url=None, screenshot=None, has_filters=False, html_content='', xpath_data=None): self.message = message self.status_code = status_code self.url = url self.screenshot = screenshot self.has_filters = has_filters self.html_content = html_content self.xpath_data = xpath_data return ================================================ FILE: changedetectionio/processors/extract.py ================================================ """ Base data extraction module for all processors. This module handles extracting data from watch history using regex patterns and exporting to CSV format. This is the default extractor that all processors (text_json_diff, restock_diff, etc.) can use by default or override. """ import os from flask_babel import gettext from loguru import logger def render_form(watch, datastore, request, url_for, render_template, flash, redirect, extract_form=None): """ Render the data extraction form. Args: watch: The watch object datastore: The ChangeDetectionStore instance request: Flask request object url_for: Flask url_for function render_template: Flask render_template function flash: Flask flash function redirect: Flask redirect function extract_form: Optional pre-built extract form (for error cases) Returns: Rendered HTML response with the extraction form """ from changedetectionio import forms uuid = watch.get('uuid') # Use provided form or create a new one if extract_form is None: extract_form = forms.extractDataForm( formdata=request.form, data={'extract_regex': request.form.get('extract_regex', '')} ) # Get error information for the template screenshot_url = watch.get_screenshot() is_html_webdriver = watch.fetcher_supports_screenshots password_enabled_and_share_is_off = False if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False): password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access') # Use the shared default template from processors/templates/ # Processors can override this by creating their own extract.py with custom template logic output = render_template( "extract.html", uuid=uuid, extract_form=extract_form, watch_a=watch, last_error=watch['last_error'], last_error_screenshot=watch.get_error_snapshot(), last_error_text=watch.get_error_text(), screenshot=screenshot_url, is_html_webdriver=is_html_webdriver, password_enabled_and_share_is_off=password_enabled_and_share_is_off, extra_title=f" - {watch.label} - Extract Data", extra_stylesheets=[url_for('static_content', group='styles', filename='diff.css')], pure_menu_fixed=False ) return output def process_extraction(watch, datastore, request, url_for, make_response, send_from_directory, flash, redirect, extract_form=None): """ Process the data extraction request and return CSV file. Args: watch: The watch object datastore: The ChangeDetectionStore instance request: Flask request object url_for: Flask url_for function make_response: Flask make_response function send_from_directory: Flask send_from_directory function flash: Flask flash function redirect: Flask redirect function extract_form: Optional pre-built extract form Returns: CSV file download response or redirect to form on error """ from changedetectionio import forms uuid = watch.get('uuid') # Use provided form or create a new one if extract_form is None: extract_form = forms.extractDataForm( formdata=request.form, data={'extract_regex': request.form.get('extract_regex', '')} ) if not extract_form.validate(): flash(gettext("An error occurred, please see below."), "error") # render_template needs to be imported from Flask for this to work from flask import render_template as flask_render_template return render_form( watch=watch, datastore=datastore, request=request, url_for=url_for, render_template=flask_render_template, flash=flash, redirect=redirect, extract_form=extract_form ) extract_regex = request.form.get('extract_regex', '').strip() output = watch.extract_regex_from_all_history(extract_regex) if output: watch_dir = os.path.join(datastore.datastore_path, uuid) response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True)) response.headers['Content-type'] = 'text/csv' response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' response.headers['Pragma'] = 'no-cache' response.headers['Expires'] = "0" return response flash(gettext('No matches found while scanning all of the watch history for that RegEx.'), 'error') return redirect(url_for('ui.ui_diff.diff_history_page_extract_GET', uuid=uuid)) ================================================ FILE: changedetectionio/processors/image_ssim_diff/README.md ================================================ # Fast Screenshot Comparison Processor Visual/screenshot change detection using ultra-fast image comparison algorithms. ## Overview This processor uses **OpenCV** by default for screenshot comparison, providing **50-100x faster** performance compared to the previous SSIM implementation while still detecting meaningful visual changes. ## Current Features - **Ultra-fast OpenCV comparison**: cv2.absdiff with Gaussian blur for noise reduction - **MD5 pre-check**: Fast identical image detection before expensive comparison - **Configurable sensitivity**: Threshold-based change detection - **Three-panel diff view**: Previous | Current | Difference (with red highlights) - **Direct image support**: Works with browser screenshots AND direct image URLs - **Visual selector support**: Compare specific page regions using CSS/XPath selectors - **Download images**: Download any of the three comparison images directly from the diff view ## Performance - **OpenCV (default)**: 50-100x faster than SSIM - **Large screenshots**: Automatic downscaling for diff visualization (configurable via `MAX_DIFF_HEIGHT`/`MAX_DIFF_WIDTH`) - **Memory efficient**: Explicit cleanup of large objects for long-running processes - **JPEG diff images**: Smaller file sizes, faster rendering ## How It Works 1. **Fetch**: Screenshot captured via browser OR direct image URL fetched 2. **MD5 Check**: Quick hash comparison - if identical, skip comparison 3. **Region Selection** (optional): Crop to specific page region if visual selector is configured 4. **OpenCV Comparison**: Fast pixel-level difference detection with Gaussian blur 5. **Change Detection**: Percentage of changed pixels above threshold = change detected 6. **Visualization**: Generate diff image with red-highlighted changed regions ## Architecture ### Default Method: OpenCV The processor uses OpenCV's `cv2.absdiff()` for ultra-fast pixel-level comparison: ```python # Convert to grayscale gray_from = cv2.cvtColor(image_from, cv2.COLOR_RGB2GRAY) gray_to = cv2.cvtColor(image_to, cv2.COLOR_RGB2GRAY) # Apply Gaussian blur (reduces noise, controlled by OPENCV_BLUR_SIGMA env var) gray_from = cv2.GaussianBlur(gray_from, (0, 0), sigma=0.8) gray_to = cv2.GaussianBlur(gray_to, (0, 0), sigma=0.8) # Calculate absolute difference diff = cv2.absdiff(gray_from, gray_to) # Apply threshold (default: 30) _, thresh = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY) # Count changed pixels change_percentage = (changed_pixels / total_pixels) * 100 ``` ### Optional: Pixelmatch For users who need better anti-aliasing detection (especially for text-heavy screenshots), **pixelmatch** can be optionally installed: ```bash pip install pybind11-pixelmatch>=0.1.3 ``` **Note**: Pixelmatch uses a C++17 implementation via pybind11 and may have build issues on some platforms (particularly Alpine/musl systems with symbolic link security restrictions). The application will automatically fall back to OpenCV if pixelmatch is not available. To use pixelmatch instead of OpenCV, set the environment variable: ```bash COMPARISON_METHOD=pixelmatch ``` #### When to use pixelmatch: - Screenshots with lots of text and anti-aliasing - Need to ignore minor font rendering differences between browser versions - 10-20x faster than SSIM (but slower than OpenCV) #### When to stick with OpenCV (default): - General webpage monitoring - Maximum performance (50-100x faster than SSIM) - Simple pixel-level change detection - Avoid build dependencies (Alpine/musl systems) ## Configuration ### Environment Variables ```bash # Comparison method (opencv or pixelmatch) COMPARISON_METHOD=opencv # Default # OpenCV threshold (0-255, lower = more sensitive) COMPARISON_THRESHOLD_OPENCV=30 # Default # Pixelmatch threshold (0-100, mapped to 0-1 scale) COMPARISON_THRESHOLD_PIXELMATCH=10 # Default # Gaussian blur sigma for OpenCV (0 = no blur, higher = more blur) OPENCV_BLUR_SIGMA=0.8 # Default # Minimum change percentage to trigger detection OPENCV_MIN_CHANGE_PERCENT=0.1 # Default (0.1%) PIXELMATCH_MIN_CHANGE_PERCENT=0.1 # Default # Diff visualization image size limits (pixels) MAX_DIFF_HEIGHT=8000 # Default MAX_DIFF_WIDTH=900 # Default ``` ### Per-Watch Configuration - **Comparison Threshold**: Can be configured per-watch in the edit form - Very low sensitivity (10) - Only major changes - Low sensitivity (20) - Significant changes - Medium sensitivity (30) - Moderate changes (default) - High sensitivity (50) - Small changes - Very high sensitivity (75) - Any visible change ### Visual Selector (Region Comparison) Use the "Include filters" field with CSS selectors or XPath to compare only specific page regions: ``` .content-area //div[@id='main'] ``` The processor will automatically crop both screenshots to the bounding box of the first matched element. ## Dependencies ### Required - `opencv-python-headless>=4.8.0.76` - Fast image comparison - `Pillow (PIL)` - Image loading and manipulation - `numpy` - Array operations ### Optional - `pybind11-pixelmatch>=0.1.3` - Alternative comparison method with anti-aliasing detection ## Change Detection Interpretation - **0%** = Identical images (or below minimum change threshold) - **0.1-1%** = Minor differences (anti-aliasing, slight rendering differences) - **1-5%** = Noticeable changes (text updates, small content changes) - **5-20%** = Significant changes (layout shifts, content additions) - **>20%** = Major differences (page redesign, large content changes) ## Technical Notes ### Memory Management ```python # Explicit cleanup for long-running processes img.close() # Close PIL Images buffer.close() # Close BytesIO buffers del large_array # Mark numpy arrays for GC ``` ### Diff Image Generation - Format: JPEG (quality=85, optimized) - Highlight: Red overlay (50% blend with original) - Auto-downscaling: Large screenshots downscaled for faster rendering - Base64 embedded: For direct template rendering ### OpenCV Blur Parameters The Gaussian blur reduces sensitivity to: - Font rendering differences - Anti-aliasing variations - JPEG compression artifacts - Minor pixel shifts (1-2 pixels) Increase `OPENCV_BLUR_SIGMA` to make comparison more tolerant of these differences. ## Comparison: OpenCV vs Pixelmatch vs SSIM | Feature | OpenCV | Pixelmatch | SSIM (old) | |---------|--------|------------|------------| | **Speed** | 50-100x faster | 10-20x faster | Baseline | | **Anti-aliasing** | Via blur | Built-in detection | Built-in | | **Text sensitivity** | High | Medium (AA-aware) | Medium | | **Dependencies** | opencv-python-headless | pybind11-pixelmatch + C++ compiler | scikit-image | | **Alpine/musl support** | ✅ Yes | ⚠️ Build issues | ✅ Yes | | **Memory usage** | Low | Low | High | | **Best for** | General use, max speed | Text-heavy screenshots | Deprecated | ## Migration from SSIM If you're upgrading from the old SSIM-based processor: 1. **Thresholds are different**: SSIM used 0-1 scale (higher = more similar), OpenCV uses 0-255 pixel difference (lower = more similar) 2. **Default threshold**: Start with 30 for OpenCV, adjust based on your needs 3. **Performance**: Expect dramatically faster comparisons, especially for large screenshots 4. **Accuracy**: OpenCV is more sensitive to pixel-level changes; increase `OPENCV_BLUR_SIGMA` if you're getting false positives ## Future Enhancements Potential features for future consideration: - **Change region detection**: Highlight specific areas that changed with bounding boxes - **Perceptual hashing**: Pre-screening filter for even faster checks - **Ignore regions**: Exclude specific page areas (ads, timestamps) from comparison - **Text extraction**: OCR-based text comparison for semantic changes - **Adaptive thresholds**: Different sensitivity for different page regions ## Resources - [OpenCV Documentation](https://docs.opencv.org/) - [pybind11-pixelmatch GitHub](https://github.com/whtsky/pybind11-pixelmatch) - [Pixelmatch (original JS library)](https://github.com/mapbox/pixelmatch) ================================================ FILE: changedetectionio/processors/image_ssim_diff/__init__.py ================================================ """ Visual/screenshot change detection using fast image comparison algorithms. This processor compares screenshots using OpenCV (cv2.absdiff), which is 10-100x faster than SSIM while still detecting meaningful visual changes. """ import os from pathlib import Path processor_description = "Visual/Screenshot change detection (Fast)" processor_name = "image_ssim_diff" processor_weight = 2 # Lower weight = appears at top, heavier weight = appears lower (bottom) # Processor capabilities supports_visual_selector = True supports_browser_steps = True supports_text_filters_and_triggers = False supports_text_filters_and_triggers_elements = False supports_request_type = True PROCESSOR_CONFIG_NAME = f"{Path(__file__).parent.name}.json" # Subprocess timeout settings # Maximum time to wait for subprocess operations (seconds) POLL_TIMEOUT_ABSOLUTE = int(os.getenv('OPENCV_SUBPROCESS_TIMEOUT', '20')) # Template tracking filename CROPPED_IMAGE_TEMPLATE_FILENAME = 'cropped_image_template.png' SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS = [ ('200', 'Low sensitivity (only major changes)'), ('80', 'Medium sensitivity (moderate changes - recommended)'), ('20', 'High sensitivity (small changes)'), ('0', 'Very high sensitivity (any change)') ] SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT=0.999 OPENCV_BLUR_SIGMA=float(os.getenv("OPENCV_BLUR_SIGMA", "3.0")) ================================================ FILE: changedetectionio/processors/image_ssim_diff/difference.py ================================================ """ Screenshot diff visualization for fast image comparison processor. All image operations now use ImageDiffHandler abstraction for clean separation of concerns and easy backend swapping (LibVIPS, OpenCV, PIL, etc.). """ import os import json import time from flask_babel import gettext from loguru import logger from changedetectionio.processors.image_ssim_diff import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT, PROCESSOR_CONFIG_NAME, \ OPENCV_BLUR_SIGMA # All image operations now use OpenCV via isolated_opencv subprocess handler # No direct handler imports needed - subprocess isolation handles everything # Maximum dimensions for diff visualization (can be overridden via environment variable) # Large screenshots don't need full resolution for visual inspection # Reduced defaults to minimize memory usage - 2000px height is plenty for diff viewing MAX_DIFF_HEIGHT = int(os.getenv('MAX_DIFF_HEIGHT', '8000')) MAX_DIFF_WIDTH = int(os.getenv('MAX_DIFF_WIDTH', '900')) def get_asset(asset_name, watch, datastore, request): """ Get processor-specific binary assets for streaming. Uses ImageDiffHandler for all image operations - no more multiprocessing needed as LibVIPS handles threading/memory internally. Supported assets: - 'before': The previous/from screenshot - 'after': The current/to screenshot - 'rendered_diff': The generated diff visualization with red highlights Args: asset_name: Name of the asset to retrieve ('before', 'after', 'rendered_diff') watch: Watch object datastore: Datastore object request: Flask request (for from_version/to_version query params) Returns: tuple: (binary_data, content_type, cache_control_header) or None if not found """ # Get version parameters from query string versions = list(watch.history.keys()) if len(versions) < 2: return None from_version = request.args.get('from_version', versions[-2] if len(versions) >= 2 else versions[0]) to_version = request.args.get('to_version', versions[-1]) # Validate versions exist if from_version not in versions: from_version = versions[-2] if len(versions) >= 2 else versions[0] if to_version not in versions: to_version = versions[-1] try: if asset_name == 'before': # Return the 'from' screenshot with bounding box if configured img_bytes = watch.get_history_snapshot(timestamp=from_version) img_bytes = _draw_bounding_box_if_configured(img_bytes, watch, datastore) mime_type = _detect_mime_type(img_bytes) return (img_bytes, mime_type, 'public, max-age=3600') elif asset_name == 'after': # Return the 'to' screenshot with bounding box if configured img_bytes = watch.get_history_snapshot(timestamp=to_version) img_bytes = _draw_bounding_box_if_configured(img_bytes, watch, datastore) mime_type = _detect_mime_type(img_bytes) return (img_bytes, mime_type, 'public, max-age=3600') elif asset_name == 'rendered_diff': # Generate diff in isolated subprocess to prevent memory leaks # Subprocess provides complete memory isolation from .image_handler import isolated_opencv as process_screenshot_handler img_bytes_from = watch.get_history_snapshot(timestamp=from_version) img_bytes_to = watch.get_history_snapshot(timestamp=to_version) # Get pixel difference threshold sensitivity (per-watch > global) # This controls how different a pixel must be (0-255 scale) to count as "changed" from changedetectionio import processors processor_instance = processors.difference_detection_processor(datastore, watch.get('uuid')) processor_config = processor_instance.get_extra_watch_config(PROCESSOR_CONFIG_NAME) pixel_difference_threshold_sensitivity = processor_config.get('pixel_difference_threshold_sensitivity') if not pixel_difference_threshold_sensitivity: pixel_difference_threshold_sensitivity = datastore.data['settings']['application'].get( 'pixel_difference_threshold_sensitivity', SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT) try: pixel_difference_threshold_sensitivity = int(pixel_difference_threshold_sensitivity) except (ValueError, TypeError): logger.warning( f"Invalid pixel_difference_threshold_sensitivity value '{pixel_difference_threshold_sensitivity}', using default") pixel_difference_threshold_sensitivity = SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT logger.debug(f"Pixel difference threshold sensitivity is {pixel_difference_threshold_sensitivity}") # Generate diff in isolated subprocess (async-safe) import asyncio import threading # Async-safe wrapper: runs coroutine in new thread with its own event loop def run_async_in_thread(): return asyncio.run( process_screenshot_handler.generate_diff_isolated( img_bytes_from, img_bytes_to, pixel_difference_threshold=int(pixel_difference_threshold_sensitivity), blur_sigma=OPENCV_BLUR_SIGMA, max_width=MAX_DIFF_WIDTH, max_height=MAX_DIFF_HEIGHT ) ) # Run in thread to avoid blocking event loop if called from async context result_container = [None] exception_container = [None] def thread_target(): try: result_container[0] = run_async_in_thread() except Exception as e: exception_container[0] = e thread = threading.Thread(target=thread_target, daemon=True, name="ImageDiff-Asset") thread.start() thread.join(timeout=60) if exception_container[0]: raise exception_container[0] diff_image_bytes = result_container[0] if diff_image_bytes: # Note: Bounding box drawing on diff not yet implemented return (diff_image_bytes, 'image/jpeg', 'public, max-age=300') else: logger.error("Failed to generate diff in subprocess") return None else: # Unknown asset return None except Exception as e: logger.error(f"Failed to get asset '{asset_name}': {e}") import traceback logger.error(traceback.format_exc()) return None def _detect_mime_type(img_bytes): """ Detect MIME type using puremagic (same as Watch.py). Args: img_bytes: Image bytes Returns: str: MIME type (e.g., 'image/png', 'image/jpeg') """ try: import puremagic detections = puremagic.magic_string(img_bytes[:2048]) if detections: mime_type = detections[0].mime_type logger.trace(f"Detected MIME type: {mime_type}") return mime_type else: logger.trace("No MIME type detected, using 'image/png' fallback") return 'image/png' except Exception as e: logger.warning(f"puremagic detection failed: {e}, using 'image/png' fallback") return 'image/png' def _draw_bounding_box_if_configured(img_bytes, watch, datastore): """ Draw blue bounding box on image if configured in processor settings. Uses isolated subprocess to prevent memory leaks from large images. Supports two modes: - "Select by element": Use include_filter to find xpath element bbox - "Draw area": Use manually drawn bounding_box from config Args: img_bytes: Image bytes (PNG) watch: Watch object datastore: Datastore object Returns: Image bytes (possibly with bounding box drawn) """ try: # Get processor configuration from changedetectionio import processors processor_instance = processors.difference_detection_processor(datastore, watch.get('uuid')) processor_name = watch.get('processor', 'default') config_filename = f'{processor_name}.json' processor_config = processor_instance.get_extra_watch_config(config_filename) if not processor_config: return img_bytes selection_mode = processor_config.get('selection_mode', 'draw') x, y, width, height = None, None, None, None # Mode 1: Select by element (use include_filter + xpath_data) if selection_mode == 'element': include_filters = watch.get('include_filters', []) if include_filters and len(include_filters) > 0: first_filter = include_filters[0].strip() # Get xpath_data from watch history history_keys = list(watch.history.keys()) if history_keys: latest_snapshot = watch.get_history_snapshot(timestamp=history_keys[-1]) xpath_data_path = watch.get_xpath_data_filepath(timestamp=history_keys[-1]) try: import gzip with gzip.open(xpath_data_path, 'rt') as f: xpath_data = json.load(f) # Find matching element for element in xpath_data.get('size_pos', []): if element.get('xpath') == first_filter and element.get('highlight_as_custom_filter'): x = element.get('left', 0) y = element.get('top', 0) width = element.get('width', 0) height = element.get('height', 0) logger.debug(f"Found element bbox for filter '{first_filter}': x={x}, y={y}, w={width}, h={height}") break except Exception as e: logger.warning(f"Failed to load xpath_data for element selection: {e}") # Mode 2: Draw area (use manually configured bbox) else: bounding_box = processor_config.get('bounding_box') if bounding_box: # Parse bounding box: "x,y,width,height" parts = [int(p.strip()) for p in bounding_box.split(',')] if len(parts) == 4: x, y, width, height = parts else: logger.warning(f"Invalid bounding box format: {bounding_box}") # If no bbox found, return original image if x is None or y is None or width is None or height is None: return img_bytes # Use isolated subprocess to prevent memory leaks from large images from .image_handler import isolated_opencv import asyncio import threading # Async-safe wrapper: runs coroutine in new thread with its own event loop # This prevents blocking when called from async context (update worker) def run_async_in_thread(): return asyncio.run( isolated_opencv.draw_bounding_box_isolated( img_bytes, x, y, width, height, color=(255, 0, 0), # Blue in BGR format thickness=3 ) ) # Always run in thread to avoid blocking event loop if called from async context result_container = [None] exception_container = [None] def thread_target(): try: result_container[0] = run_async_in_thread() except Exception as e: exception_container[0] = e thread = threading.Thread(target=thread_target, daemon=True, name="ImageDiff-BoundingBox") thread.start() thread.join(timeout=15) if exception_container[0]: raise exception_container[0] result = result_container[0] # Return result or original if subprocess failed return result if result else img_bytes except Exception as e: logger.warning(f"Failed to draw bounding box: {e}") import traceback logger.debug(traceback.format_exc()) return img_bytes def render(watch, datastore, request, url_for, render_template, flash, redirect): """ Render the screenshot comparison diff page. Uses ImageDiffHandler for all image operations. Args: watch: Watch object datastore: Datastore object request: Flask request url_for: Flask url_for function render_template: Flask render_template function flash: Flask flash function redirect: Flask redirect function Returns: Rendered template or redirect """ # Get version parameters (from_version, to_version) versions = list(watch.history.keys()) if len(versions) < 2: flash(gettext("Not enough history to compare. Need at least 2 snapshots."), "error") return redirect(url_for('watchlist.index')) # Default: compare latest two versions from_version = request.args.get('from_version', versions[-2] if len(versions) >= 2 else versions[0]) to_version = request.args.get('to_version', versions[-1]) # Validate versions exist if from_version not in versions: from_version = versions[-2] if len(versions) >= 2 else versions[0] if to_version not in versions: to_version = versions[-1] # Get pixel difference threshold sensitivity (per-watch > global > env default) pixel_difference_threshold_sensitivity = watch.get('pixel_difference_threshold_sensitivity') if not pixel_difference_threshold_sensitivity or pixel_difference_threshold_sensitivity == '': pixel_difference_threshold_sensitivity = datastore.data['settings']['application'].get('pixel_difference_threshold_sensitivity', SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT) # Convert to appropriate type try: pixel_difference_threshold_sensitivity = float(pixel_difference_threshold_sensitivity) except (ValueError, TypeError): logger.warning(f"Invalid pixel_difference_threshold_sensitivity value '{pixel_difference_threshold_sensitivity}', using default") pixel_difference_threshold_sensitivity = 30.0 # Get blur sigma blur_sigma = OPENCV_BLUR_SIGMA # Load screenshots from history try: img_bytes_from = watch.get_history_snapshot(timestamp=from_version) img_bytes_to = watch.get_history_snapshot(timestamp=to_version) except Exception as e: logger.error(f"Failed to load screenshots: {e}") flash(gettext("Failed to load screenshots: {}").format(e), "error") return redirect(url_for('watchlist.index')) # Calculate change percentage using isolated subprocess to prevent memory leaks (async-safe) now = time.time() try: from .image_handler import isolated_opencv as process_screenshot_handler import asyncio import threading # Async-safe wrapper: runs coroutine in new thread with its own event loop def run_async_in_thread(): return asyncio.run( process_screenshot_handler.calculate_change_percentage_isolated( img_bytes_from, img_bytes_to, pixel_difference_threshold=int(pixel_difference_threshold_sensitivity), blur_sigma=blur_sigma, max_width=MAX_DIFF_WIDTH, max_height=MAX_DIFF_HEIGHT ) ) # Run in thread to avoid blocking event loop if called from async context result_container = [None] exception_container = [None] def thread_target(): try: result_container[0] = run_async_in_thread() except Exception as e: exception_container[0] = e thread = threading.Thread(target=thread_target, daemon=True, name="ImageDiff-ChangePercentage") thread.start() thread.join(timeout=60) if exception_container[0]: raise exception_container[0] change_percentage = result_container[0] method_display = f"{process_screenshot_handler.IMPLEMENTATION_NAME} (pixel_diff_threshold: {pixel_difference_threshold_sensitivity:.0f})" logger.debug(f"Done change percentage calculation in {time.time() - now:.2f}s") except Exception as e: logger.error(f"Failed to calculate change percentage: {e}") import traceback logger.error(traceback.format_exc()) flash(gettext("Failed to calculate diff: {}").format(e), "error") return redirect(url_for('watchlist.index')) # Load historical data if available (for charts/visualization) comparison_data = {} comparison_config_path = os.path.join(watch.data_dir, "visual_comparison_data.json") if os.path.isfile(comparison_config_path): try: with open(comparison_config_path, 'r') as f: comparison_data = json.load(f) except Exception as e: logger.warning(f"Failed to load comparison history data: {e}") # Render custom template # Template path is namespaced to avoid conflicts with other processors # Images are now served via separate /processor-asset/ endpoints instead of base64 return render_template( 'image_ssim_diff/diff.html', change_percentage=change_percentage, comparison_data=comparison_data, # Full history for charts/visualization comparison_method=method_display, current_diff_url=watch['url'], from_version=from_version, percentage_different=change_percentage, threshold=pixel_difference_threshold_sensitivity, to_version=to_version, uuid=watch.get('uuid'), versions=versions, watch=watch, ) ================================================ FILE: changedetectionio/processors/image_ssim_diff/edit_hook.py ================================================ """ Optional hook called when processor settings are saved in edit page. This hook analyzes the selected region to determine if template matching should be enabled for tracking content movement. Template matching is controlled via ENABLE_TEMPLATE_TRACKING env var (default: False). """ import io import os from loguru import logger from changedetectionio import strtobool from . import CROPPED_IMAGE_TEMPLATE_FILENAME # Template matching controlled via environment variable (default: disabled) # Set ENABLE_TEMPLATE_TRACKING=True to enable TEMPLATE_MATCHING_ENABLED = strtobool(os.getenv('ENABLE_TEMPLATE_TRACKING', 'False')) IMPORT_ERROR = "Template matching disabled (set ENABLE_TEMPLATE_TRACKING=True to enable)" def on_config_save(watch, processor_config, datastore): """ Called after processor config is saved in edit page. Analyzes the bounding box region to determine if it has enough visual features (texture/edges) to enable template matching for tracking content movement when page layout shifts. Args: watch: Watch object processor_config: Dict of processor-specific config datastore: Datastore object Returns: dict: Updated processor_config with auto_track_region setting """ # Check if template matching is globally enabled via ENV var if not TEMPLATE_MATCHING_ENABLED: logger.debug("Template tracking disabled via ENABLE_TEMPLATE_TRACKING env var") processor_config['auto_track_region'] = False return processor_config bounding_box = processor_config.get('bounding_box') if not bounding_box: # No bounding box, disable tracking processor_config['auto_track_region'] = False logger.debug("No bounding box set, disabled auto-tracking") return processor_config try: # Get the latest screenshot from watch history history_keys = list(watch.history.keys()) if len(history_keys) == 0: logger.warning("No screenshot history available yet, cannot analyze for tracking") processor_config['auto_track_region'] = False return processor_config # Get latest screenshot latest_timestamp = history_keys[-1] screenshot_bytes = watch.get_history_snapshot(timestamp=latest_timestamp) if not screenshot_bytes: logger.warning("Could not load screenshot for analysis") processor_config['auto_track_region'] = False return processor_config # Parse bounding box parts = [int(p.strip()) for p in bounding_box.split(',')] if len(parts) != 4: logger.warning("Invalid bounding box format") processor_config['auto_track_region'] = False return processor_config x, y, width, height = parts # Analyze the region for features/texture has_enough_features = analyze_region_features(screenshot_bytes, x, y, width, height) if has_enough_features: logger.info(f"Region has sufficient features for tracking - enabling auto_track_region") processor_config['auto_track_region'] = True # Save the template as cropped.jpg in watch data directory save_template_to_file(watch, screenshot_bytes, x, y, width, height) else: logger.info(f"Region lacks distinctive features - disabling auto_track_region") processor_config['auto_track_region'] = False # Remove old template file if exists template_path = os.path.join(watch.data_dir, CROPPED_IMAGE_TEMPLATE_FILENAME) if os.path.exists(template_path): os.remove(template_path) logger.debug(f"Removed old template file: {template_path}") return processor_config except Exception as e: logger.error(f"Error analyzing region for tracking: {e}") processor_config['auto_track_region'] = False return processor_config def analyze_region_features(screenshot_bytes, x, y, width, height): """ Analyze if a region has enough visual features for template matching. Uses OpenCV to detect corners/edges. If the region has distinctive features, template matching can reliably track it when it moves. Args: screenshot_bytes: Full screenshot as bytes x, y, width, height: Bounding box coordinates Returns: bool: True if region has enough features, False otherwise """ # Template matching disabled - would need OpenCV implementation for region analysis if not TEMPLATE_MATCHING_ENABLED: logger.warning(f"Cannot analyze region features: {IMPORT_ERROR}") return False # Note: Original implementation used LibVIPS handler to crop region, then OpenCV # for feature detection (goodFeaturesToTrack, Canny edge detection, variance). # If re-implementing, use OpenCV directly for both cropping and analysis. # Feature detection would use: cv2.goodFeaturesToTrack, cv2.Canny, np.var return False def save_template_to_file(watch, screenshot_bytes, x, y, width, height): """ Extract the template region and save as cropped_image_template.png in watch data directory. This is a convenience wrapper around handler.save_template() that handles watch directory setup and path construction. Args: watch: Watch object screenshot_bytes: Full screenshot as bytes x, y, width, height: Bounding box coordinates """ # Template matching disabled - would need OpenCV implementation for template saving if not TEMPLATE_MATCHING_ENABLED: logger.warning(f"Cannot save template: {IMPORT_ERROR}") return # Note: Original implementation used LibVIPS handler to crop and save region. # If re-implementing, use OpenCV (cv2.imdecode, crop with array slicing, cv2.imwrite). return ================================================ FILE: changedetectionio/processors/image_ssim_diff/forms.py ================================================ """ Configuration forms for fast screenshot comparison processor. """ from wtforms import SelectField, StringField, validators, ValidationError, IntegerField from flask_babel import lazy_gettext as _l from changedetectionio.forms import processor_text_json_diff_form import re from changedetectionio.processors.image_ssim_diff import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS def validate_bounding_box(form, field): """Validate bounding box format: x,y,width,height with integers.""" if not field.data: return # Optional field if len(field.data) > 100: raise ValidationError(_l('Bounding box value is too long')) # Should be comma-separated integers if not re.match(r'^\d+,\d+,\d+,\d+$', field.data): raise ValidationError(_l('Bounding box must be in format: x,y,width,height (integers only)')) # Validate values are reasonable (not negative, not ridiculously large) parts = [int(p) for p in field.data.split(',')] for part in parts: if part < 0: raise ValidationError(_l('Bounding box values must be non-negative')) if part > 10000: # Reasonable max screen dimension raise ValidationError(_l('Bounding box values are too large')) def validate_selection_mode(form, field): """Validate selection mode value.""" if not field.data: return # Optional field if field.data not in ['element', 'draw']: raise ValidationError(_l('Selection mode must be either "element" or "draw"')) class processor_settings_form(processor_text_json_diff_form): """Form for fast image comparison processor settings.""" processor_config_min_change_percentage = IntegerField( _l('Minimum Change Percentage'), validators=[ validators.Optional(), validators.NumberRange(min=1, max=100, message=_l('Must be between 0 and 100')) ], render_kw={"placeholder": "Use global default (0.1)"} ) processor_config_pixel_difference_threshold_sensitivity = SelectField( _l('Pixel Difference Sensitivity'), choices=[ ('', _l('Use global default')) ] + SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS, validators=[validators.Optional()], default='' ) # Processor-specific config fields (stored in separate JSON file) processor_config_bounding_box = StringField( _l('Bounding Box'), validators=[ validators.Optional(), validators.Length(max=100, message=_l('Bounding box value is too long')), validate_bounding_box ], render_kw={"style": "display: none;", "id": "bounding_box"} ) processor_config_selection_mode = StringField( _l('Selection Mode'), validators=[ validators.Optional(), validators.Length(max=20, message=_l('Selection mode value is too long')), validate_selection_mode ], render_kw={"style": "display: none;", "id": "selection_mode"} ) def extra_tab_content(self): """Tab label for processor-specific settings.""" return _l('Screenshot Comparison') def extra_form_content(self): """Render processor-specific form fields. @NOTE: prepend processor_config_* to the field name so it will save into its own datadir/uuid/image_ssim_diff.json and be read at process time """ return ''' {% from '_helpers.html' import render_field %}
Screenshot Comparison Settings
{{ render_field(form.processor_config_min_change_percentage) }} What percentage of pixels must change to trigger a detection?
For example, 0.1% means if 0.1% or more of the pixels change, it counts as a change.
Lower values = more sensitive (detect smaller changes).
Higher values = less sensitive (only detect larger changes).
Leave blank to use global default (0.1%).
{{ render_field(form.processor_config_pixel_difference_threshold_sensitivity) }} How different must an individual pixel be to count as "changed"?
Low sensitivity (75) = Only count pixels that changed significantly (0-255 scale).
High sensitivity (20) = Count pixels with small changes as different.
Very high (0) = Any pixel change counts.
Select "Use global default" to inherit the system-wide setting.
''' ================================================ FILE: changedetectionio/processors/image_ssim_diff/image_handler/__init__.py ================================================ """ Abstract base class for image processing operations. All image operations for the image_ssim_diff processor must be implemented through this interface to allow different backends (libvips, OpenCV, PIL, etc.). """ from abc import ABC, abstractmethod from typing import Tuple, Optional, Any class ImageDiffHandler(ABC): """ Abstract base class for image processing operations. Implementations must handle all image operations needed for screenshot comparison including loading, cropping, resizing, diffing, and overlays. """ @abstractmethod def load_from_bytes(self, img_bytes: bytes) -> Any: """ Load image from bytes. Args: img_bytes: Image data as bytes (PNG, JPEG, etc.) Returns: Handler-specific image object """ pass @abstractmethod def save_to_bytes(self, img: Any, format: str = 'png', quality: int = 85) -> bytes: """ Save image to bytes. Args: img: Handler-specific image object format: Output format ('png' or 'jpeg') quality: Quality for JPEG (1-100) Returns: Image data as bytes """ pass @abstractmethod def crop(self, img: Any, left: int, top: int, right: int, bottom: int) -> Any: """ Crop image to specified region. Args: img: Handler-specific image object left: Left coordinate top: Top coordinate right: Right coordinate bottom: Bottom coordinate Returns: Cropped image object """ pass @abstractmethod def resize(self, img: Any, max_width: int, max_height: int) -> Any: """ Resize image maintaining aspect ratio. Args: img: Handler-specific image object max_width: Maximum width in pixels max_height: Maximum height in pixels Returns: Resized image object """ pass @abstractmethod def get_dimensions(self, img: Any) -> Tuple[int, int]: """ Get image dimensions. Args: img: Handler-specific image object Returns: Tuple of (width, height) """ pass @abstractmethod def to_grayscale(self, img: Any) -> Any: """ Convert image to grayscale. Args: img: Handler-specific image object Returns: Grayscale image object """ pass @abstractmethod def gaussian_blur(self, img: Any, sigma: float) -> Any: """ Apply Gaussian blur to image. Args: img: Handler-specific image object sigma: Blur sigma value (0 = no blur) Returns: Blurred image object """ pass @abstractmethod def absolute_difference(self, img1: Any, img2: Any) -> Any: """ Calculate absolute difference between two images. Args: img1: First image (handler-specific object) img2: Second image (handler-specific object) Returns: Difference image object """ pass @abstractmethod def threshold(self, img: Any, threshold_value: int) -> Tuple[float, Any]: """ Apply threshold to image and calculate change percentage. Args: img: Handler-specific image object (typically grayscale difference) threshold_value: Threshold value (0-255) Returns: Tuple of (change_percentage, binary_mask) - change_percentage: Percentage of pixels above threshold (0-100) - binary_mask: Handler-specific binary mask object """ pass @abstractmethod def apply_red_overlay(self, img: Any, mask: Any) -> bytes: """ Apply red overlay to image where mask is True. Args: img: Handler-specific image object (color) mask: Handler-specific binary mask object Returns: JPEG bytes with red overlay applied """ pass @abstractmethod def close(self, img: Any) -> None: """ Clean up image resources if needed. Args: img: Handler-specific image object """ pass @abstractmethod def find_template( self, img: Any, template_img: Any, original_bbox: Tuple[int, int, int, int], search_tolerance: float = 0.2 ) -> Optional[Tuple[int, int, int, int]]: """ Find template in image using template matching. Args: img: Handler-specific image object to search in template_img: Handler-specific template image object to find original_bbox: Original bounding box (left, top, right, bottom) search_tolerance: How far to search (0.2 = ±20% of region size) Returns: New bounding box (left, top, right, bottom) or None if not found """ pass @abstractmethod def save_template( self, img: Any, bbox: Tuple[int, int, int, int], output_path: str ) -> bool: """ Save a cropped region as a template file. Args: img: Handler-specific image object bbox: Bounding box to crop (left, top, right, bottom) output_path: Where to save the template PNG Returns: True if successful, False otherwise """ pass @abstractmethod def draw_bounding_box( self, img_bytes: bytes, x: int, y: int, width: int, height: int, color: Tuple[int, int, int] = (255, 0, 0), thickness: int = 3 ) -> bytes: """ Draw a bounding box rectangle on image. Args: img_bytes: Image data as bytes x: Left coordinate y: Top coordinate width: Box width height: Box height color: BGR color tuple (default: blue) thickness: Line thickness in pixels Returns: Image bytes with bounding box drawn """ pass ================================================ FILE: changedetectionio/processors/image_ssim_diff/image_handler/isolated_libvips.py ================================================ """ Subprocess-isolated image operations for memory leak prevention. LibVIPS accumulates C-level memory in long-running processes that cannot be reclaimed by Python's GC or libvips cache management. Using subprocess isolation ensures complete memory cleanup when the process exits. This module wraps LibvipsImageDiffHandler operations in multiprocessing for complete memory isolation without code duplication. Research: https://github.com/libvips/pyvips/issues/234 """ import multiprocessing # CRITICAL: Use 'spawn' context instead of 'fork' to avoid inheriting parent's # LibVIPS threading state which can cause hangs in gaussblur operations # https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods def _worker_generate_diff(conn, img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height): """ Worker: Generate diff visualization using LibvipsImageDiffHandler in isolated subprocess. This runs in a separate process for complete memory isolation. Uses print() instead of loguru to avoid forking issues. """ try: # Import handler inside worker from .libvips_handler import LibvipsImageDiffHandler print(f"[Worker] Initializing handler", flush=True) handler = LibvipsImageDiffHandler() # Load images using handler img_from = handler.load_from_bytes(img_bytes_from) img_to = handler.load_from_bytes(img_bytes_to) # Ensure same size w1, h1 = handler.get_dimensions(img_from) w2, h2 = handler.get_dimensions(img_to) if (w1, h1) != (w2, h2): img_from = handler.resize(img_from, w2, h2) # Downscale for faster diff visualization img_from = handler.resize(img_from, max_width, max_height) img_to = handler.resize(img_to, max_width, max_height) # Convert to grayscale gray_from = handler.to_grayscale(img_from) gray_to = handler.to_grayscale(img_to) # Optional blur - DISABLED due to LibVIPS threading issues in fork # gray_from = handler.gaussian_blur(gray_from, blur_sigma) # gray_to = handler.gaussian_blur(gray_to, blur_sigma) # Calculate difference diff = handler.absolute_difference(gray_from, gray_to) # Threshold to get mask _, diff_mask = handler.threshold(diff, int(threshold)) # Generate diff image with red overlay diff_image_bytes = handler.apply_red_overlay(img_to, diff_mask) print(f"[Worker] Generated diff ({len(diff_image_bytes)} bytes)", flush=True) conn.send(diff_image_bytes) except Exception as e: print(f"[Worker] Error: {e}", flush=True) import traceback traceback.print_exc() conn.send(None) finally: conn.close() def generate_diff_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height): """ Generate diff visualization in isolated subprocess for memory leak prevention. Args: img_bytes_from: Previous screenshot bytes img_bytes_to: Current screenshot bytes threshold: Pixel difference threshold blur_sigma: Gaussian blur sigma max_width: Maximum width for diff max_height: Maximum height for diff Returns: bytes: JPEG diff image or None on failure """ ctx = multiprocessing.get_context('spawn') parent_conn, child_conn = ctx.Pipe() p = ctx.Process( target=_worker_generate_diff, args=(child_conn, img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height) ) p.start() result = None try: # Wait for result (30 second timeout) if parent_conn.poll(30): result = parent_conn.recv() except Exception as e: print(f"[Parent] Error receiving result: {e}", flush=True) finally: # Always close pipe first try: parent_conn.close() except: pass # Try graceful shutdown p.join(timeout=5) if p.is_alive(): print("[Parent] Process didn't exit gracefully, terminating", flush=True) p.terminate() p.join(timeout=3) # Force kill if still alive if p.is_alive(): print("[Parent] Process didn't terminate, killing", flush=True) p.kill() p.join(timeout=1) return result def calculate_change_percentage_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma, max_width, max_height): """ Calculate change percentage in isolated subprocess using handler. Returns: float: Change percentage """ ctx = multiprocessing.get_context('spawn') parent_conn, child_conn = ctx.Pipe() def _worker_calculate(conn): try: # Import handler inside worker from .libvips_handler import LibvipsImageDiffHandler handler = LibvipsImageDiffHandler() # Load images img_from = handler.load_from_bytes(img_bytes_from) img_to = handler.load_from_bytes(img_bytes_to) # Ensure same size w1, h1 = handler.get_dimensions(img_from) w2, h2 = handler.get_dimensions(img_to) if (w1, h1) != (w2, h2): img_from = handler.resize(img_from, w2, h2) # Downscale img_from = handler.resize(img_from, max_width, max_height) img_to = handler.resize(img_to, max_width, max_height) # Convert to grayscale gray_from = handler.to_grayscale(img_from) gray_to = handler.to_grayscale(img_to) # Optional blur gray_from = handler.gaussian_blur(gray_from, blur_sigma) gray_to = handler.gaussian_blur(gray_to, blur_sigma) # Calculate difference diff = handler.absolute_difference(gray_from, gray_to) # Threshold and get percentage change_percentage, _ = handler.threshold(diff, int(threshold)) conn.send(float(change_percentage)) except Exception as e: print(f"[Worker] Calculate error: {e}", flush=True) conn.send(0.0) finally: conn.close() p = ctx.Process(target=_worker_calculate, args=(child_conn,)) p.start() result = 0.0 try: if parent_conn.poll(30): result = parent_conn.recv() except Exception as e: print(f"[Parent] Calculate error receiving result: {e}", flush=True) finally: # Always close pipe first try: parent_conn.close() except: pass # Try graceful shutdown p.join(timeout=5) if p.is_alive(): print("[Parent] Calculate process didn't exit gracefully, terminating", flush=True) p.terminate() p.join(timeout=3) # Force kill if still alive if p.is_alive(): print("[Parent] Calculate process didn't terminate, killing", flush=True) p.kill() p.join(timeout=1) return result def compare_images_isolated(img_bytes_from, img_bytes_to, threshold, blur_sigma, min_change_percentage, crop_region=None): """ Compare images in isolated subprocess for change detection. Args: img_bytes_from: Previous screenshot bytes img_bytes_to: Current screenshot bytes threshold: Pixel difference threshold blur_sigma: Gaussian blur sigma min_change_percentage: Minimum percentage to trigger change detection crop_region: Optional tuple (left, top, right, bottom) for cropping both images Returns: tuple: (changed_detected, change_percentage) """ print(f"[Parent] Starting compare_images_isolated subprocess", flush=True) ctx = multiprocessing.get_context('spawn') parent_conn, child_conn = ctx.Pipe() def _worker_compare(conn): try: print(f"[Worker] Compare worker starting", flush=True) # Import handler inside worker from .libvips_handler import LibvipsImageDiffHandler print(f"[Worker] Initializing handler", flush=True) handler = LibvipsImageDiffHandler() # Load images print(f"[Worker] Loading images (from={len(img_bytes_from)} bytes, to={len(img_bytes_to)} bytes)", flush=True) img_from = handler.load_from_bytes(img_bytes_from) img_to = handler.load_from_bytes(img_bytes_to) print(f"[Worker] Images loaded", flush=True) # Crop if region specified if crop_region: print(f"[Worker] Cropping to region {crop_region}", flush=True) left, top, right, bottom = crop_region img_from = handler.crop(img_from, left, top, right, bottom) img_to = handler.crop(img_to, left, top, right, bottom) print(f"[Worker] Cropping completed", flush=True) # Ensure same size w1, h1 = handler.get_dimensions(img_from) w2, h2 = handler.get_dimensions(img_to) print(f"[Worker] Image dimensions: from={w1}x{h1}, to={w2}x{h2}", flush=True) if (w1, h1) != (w2, h2): print(f"[Worker] Resizing to match dimensions", flush=True) img_from = handler.resize(img_from, w2, h2) # Convert to grayscale print(f"[Worker] Converting to grayscale", flush=True) gray_from = handler.to_grayscale(img_from) gray_to = handler.to_grayscale(img_to) # Optional blur # NOTE: gaussblur can hang in forked subprocesses due to LibVIPS threading # Skip blur as a workaround - sigma=0.8 is subtle and comparison works without it if blur_sigma > 0: print(f"[Worker] Skipping blur (sigma={blur_sigma}) due to LibVIPS threading issues in fork", flush=True) # gray_from = handler.gaussian_blur(gray_from, blur_sigma) # gray_to = handler.gaussian_blur(gray_to, blur_sigma) # Calculate difference print(f"[Worker] Calculating difference", flush=True) diff = handler.absolute_difference(gray_from, gray_to) # Threshold and get percentage print(f"[Worker] Applying threshold ({threshold})", flush=True) change_percentage, _ = handler.threshold(diff, int(threshold)) # Determine if change detected changed_detected = change_percentage > min_change_percentage print(f"[Worker] Comparison complete: changed={changed_detected}, percentage={change_percentage:.2f}%", flush=True) conn.send((changed_detected, float(change_percentage))) except Exception as e: print(f"[Worker] Compare error: {e}", flush=True) import traceback traceback.print_exc() conn.send((False, 0.0)) finally: conn.close() p = ctx.Process(target=_worker_compare, args=(child_conn,)) print(f"[Parent] Starting subprocess (pid will be assigned)", flush=True) p.start() print(f"[Parent] Subprocess started (pid={p.pid}), waiting for result (30s timeout)", flush=True) result = (False, 0.0) try: if parent_conn.poll(30): print(f"[Parent] Result available, receiving", flush=True) result = parent_conn.recv() print(f"[Parent] Result received: {result}", flush=True) else: print(f"[Parent] Timeout waiting for result after 30s", flush=True) except Exception as e: print(f"[Parent] Compare error receiving result: {e}", flush=True) finally: # Always close pipe first try: parent_conn.close() except: pass # Try graceful shutdown import time print(f"[Parent] Waiting for subprocess to exit (5s timeout)", flush=True) join_start = time.time() p.join(timeout=5) join_elapsed = time.time() - join_start print(f"[Parent] First join took {join_elapsed:.2f}s", flush=True) if p.is_alive(): print("[Parent] Compare process didn't exit gracefully, terminating", flush=True) term_start = time.time() p.terminate() p.join(timeout=3) term_elapsed = time.time() - term_start print(f"[Parent] Terminate+join took {term_elapsed:.2f}s", flush=True) # Force kill if still alive if p.is_alive(): print("[Parent] Compare process didn't terminate, killing", flush=True) kill_start = time.time() p.kill() p.join(timeout=1) kill_elapsed = time.time() - kill_start print(f"[Parent] Kill+join took {kill_elapsed:.2f}s", flush=True) print(f"[Parent] Subprocess cleanup complete, returning result", flush=True) return result ================================================ FILE: changedetectionio/processors/image_ssim_diff/image_handler/isolated_opencv.py ================================================ """ OpenCV-based subprocess isolation for image comparison. OpenCV is much more stable in multiprocessing contexts than LibVIPS. No threading issues, no fork problems, picklable functions. """ import multiprocessing import numpy as np from .. import POLL_TIMEOUT_ABSOLUTE # Public implementation name for logging IMPLEMENTATION_NAME = "OpenCV" def _worker_compare(conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, crop_region): """ Worker function for image comparison (must be top-level for pickling with spawn). Args: conn: Pipe connection for sending results img_bytes_from: Previous screenshot bytes img_bytes_to: Current screenshot bytes pixel_difference_threshold: Pixel-level sensitivity (0-255) - how different must a pixel be to count as changed blur_sigma: Gaussian blur sigma crop_region: Optional (left, top, right, bottom) crop coordinates """ import time try: import cv2 # CRITICAL: Disable OpenCV threading to prevent thread explosion # With multiprocessing, each subprocess would otherwise spawn threads equal to CPU cores # This causes excessive thread counts and memory overhead # Research: https://medium.com/@rachittayal7/a-note-on-opencv-threads-performance-in-prod-d10180716fba cv2.setNumThreads(1) print(f"[{time.time():.3f}] [Worker] Compare worker starting (threads=1 for memory optimization)", flush=True) # Decode images from bytes print(f"[{time.time():.3f}] [Worker] Loading images (from={len(img_bytes_from)} bytes, to={len(img_bytes_to)} bytes)", flush=True) img_from = cv2.imdecode(np.frombuffer(img_bytes_from, np.uint8), cv2.IMREAD_COLOR) img_to = cv2.imdecode(np.frombuffer(img_bytes_to, np.uint8), cv2.IMREAD_COLOR) # Check if decoding succeeded if img_from is None: raise ValueError("Failed to decode 'from' image - may be corrupt or unsupported format") if img_to is None: raise ValueError("Failed to decode 'to' image - may be corrupt or unsupported format") print(f"[{time.time():.3f}] [Worker] Images loaded: from={img_from.shape}, to={img_to.shape}", flush=True) # Crop if region specified if crop_region: print(f"[{time.time():.3f}] [Worker] Cropping to region {crop_region}", flush=True) left, top, right, bottom = crop_region img_from = img_from[top:bottom, left:right] img_to = img_to[top:bottom, left:right] print(f"[{time.time():.3f}] [Worker] Cropped: from={img_from.shape}, to={img_to.shape}", flush=True) # Resize if dimensions don't match if img_from.shape != img_to.shape: print(f"[{time.time():.3f}] [Worker] Resizing to match dimensions", flush=True) img_from = cv2.resize(img_from, (img_to.shape[1], img_to.shape[0])) # Convert to grayscale print(f"[{time.time():.3f}] [Worker] Converting to grayscale", flush=True) gray_from = cv2.cvtColor(img_from, cv2.COLOR_BGR2GRAY) gray_to = cv2.cvtColor(img_to, cv2.COLOR_BGR2GRAY) # Optional Gaussian blur if blur_sigma > 0: print(f"[{time.time():.3f}] [Worker] Applying Gaussian blur (sigma={blur_sigma})", flush=True) # OpenCV uses kernel size, convert sigma to kernel size: size = 2 * round(3*sigma) + 1 ksize = int(2 * round(3 * blur_sigma)) + 1 if ksize % 2 == 0: # Must be odd ksize += 1 gray_from = cv2.GaussianBlur(gray_from, (ksize, ksize), blur_sigma) gray_to = cv2.GaussianBlur(gray_to, (ksize, ksize), blur_sigma) print(f"[{time.time():.3f}] [Worker] Blur applied (kernel={ksize}x{ksize})", flush=True) # Calculate absolute difference print(f"[{time.time():.3f}] [Worker] Calculating absolute difference", flush=True) diff = cv2.absdiff(gray_from, gray_to) # Apply threshold print(f"[{time.time():.3f}] [Worker] Applying pixel difference threshold ({pixel_difference_threshold})", flush=True) _, thresholded = cv2.threshold(diff, int(pixel_difference_threshold), 255, cv2.THRESH_BINARY) # Calculate change percentage total_pixels = thresholded.size changed_pixels = np.count_nonzero(thresholded) change_percentage = (changed_pixels / total_pixels) * 100.0 print(f"[{time.time():.3f}] [Worker] Comparison complete: percentage={change_percentage:.2f}%", flush=True) # Return only the score - let the caller decide if it's a "change" conn.send(float(change_percentage)) except Exception as e: print(f"[{time.time():.3f}] [Worker] Error: {e}", flush=True) import traceback traceback.print_exc() # Send error info as dict so parent can re-raise conn.send({'error': str(e), 'traceback': traceback.format_exc()}) finally: conn.close() async def compare_images_isolated(img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, crop_region=None): """ Compare images in isolated subprocess using OpenCV (async-safe). Args: img_bytes_from: Previous screenshot bytes img_bytes_to: Current screenshot bytes pixel_difference_threshold: Pixel-level sensitivity (0-255) - how different must a pixel be to count as changed blur_sigma: Gaussian blur sigma crop_region: Optional (left, top, right, bottom) crop coordinates Returns: float: Change percentage (0-100) """ import time import asyncio print(f"[{time.time():.3f}] [Parent] Starting OpenCV comparison subprocess", flush=True) # Use spawn method for clean process (no fork issues) ctx = multiprocessing.get_context('spawn') parent_conn, child_conn = ctx.Pipe() p = ctx.Process( target=_worker_compare, args=(child_conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, crop_region) ) print(f"[{time.time():.3f}] [Parent] Starting subprocess", flush=True) p.start() print(f"[{time.time():.3f}] [Parent] Subprocess started (pid={p.pid}), waiting for result ({POLL_TIMEOUT_ABSOLUTE}s timeout)", flush=True) result = 0.0 try: # Async-friendly polling: check in small intervals without blocking event loop deadline = time.time() + POLL_TIMEOUT_ABSOLUTE while time.time() < deadline: # Run poll() in thread to avoid blocking event loop has_data = await asyncio.to_thread(parent_conn.poll, 0.1) if has_data: print(f"[{time.time():.3f}] [Parent] Result available, receiving", flush=True) result = await asyncio.to_thread(parent_conn.recv) # Check if result is an error dict if isinstance(result, dict) and 'error' in result: raise RuntimeError(f"Image comparison failed: {result['error']}") print(f"[{time.time():.3f}] [Parent] Result received: {result:.2f}%", flush=True) break await asyncio.sleep(0) # Yield control to event loop else: from loguru import logger logger.critical(f"[OpenCV subprocess] Timeout waiting for compare_images result after {POLL_TIMEOUT_ABSOLUTE}s (subprocess may be hung)") print(f"[{time.time():.3f}] [Parent] Timeout waiting for result after {POLL_TIMEOUT_ABSOLUTE}s", flush=True) raise TimeoutError(f"Image comparison subprocess timeout after {POLL_TIMEOUT_ABSOLUTE}s") except Exception as e: print(f"[{time.time():.3f}] [Parent] Error receiving result: {e}", flush=True) raise finally: # Always close pipe first try: parent_conn.close() except: pass # Try graceful shutdown (async-safe) print(f"[{time.time():.3f}] [Parent] Waiting for subprocess to exit (5s timeout)", flush=True) join_start = time.time() await asyncio.to_thread(p.join, 5) join_elapsed = time.time() - join_start print(f"[{time.time():.3f}] [Parent] First join took {join_elapsed:.2f}s", flush=True) if p.is_alive(): print(f"[{time.time():.3f}] [Parent] Process didn't exit gracefully, terminating", flush=True) term_start = time.time() p.terminate() await asyncio.to_thread(p.join, 3) term_elapsed = time.time() - term_start print(f"[{time.time():.3f}] [Parent] Terminate+join took {term_elapsed:.2f}s", flush=True) # Force kill if still alive if p.is_alive(): print(f"[{time.time():.3f}] [Parent] Process didn't terminate, killing", flush=True) kill_start = time.time() p.kill() await asyncio.to_thread(p.join, 1) kill_elapsed = time.time() - kill_start print(f"[{time.time():.3f}] [Parent] Kill+join took {kill_elapsed:.2f}s", flush=True) print(f"[{time.time():.3f}] [Parent] Subprocess cleanup complete, returning result", flush=True) return result def _worker_generate_diff(conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height): """ Worker function for generating visual diff with red overlay. """ import time try: import cv2 cv2.setNumThreads(1) print(f"[{time.time():.3f}] [Worker] Generate diff worker starting", flush=True) # Decode images img_from = cv2.imdecode(np.frombuffer(img_bytes_from, np.uint8), cv2.IMREAD_COLOR) img_to = cv2.imdecode(np.frombuffer(img_bytes_to, np.uint8), cv2.IMREAD_COLOR) # Resize if needed to match dimensions if img_from.shape != img_to.shape: img_from = cv2.resize(img_from, (img_to.shape[1], img_to.shape[0])) # Downscale to max dimensions for faster processing h, w = img_to.shape[:2] if w > max_width or h > max_height: scale = min(max_width / w, max_height / h) new_w = int(w * scale) new_h = int(h * scale) img_from = cv2.resize(img_from, (new_w, new_h)) img_to = cv2.resize(img_to, (new_w, new_h)) # Convert to grayscale gray_from = cv2.cvtColor(img_from, cv2.COLOR_BGR2GRAY) gray_to = cv2.cvtColor(img_to, cv2.COLOR_BGR2GRAY) # Optional blur if blur_sigma > 0: ksize = int(2 * round(3 * blur_sigma)) + 1 if ksize % 2 == 0: ksize += 1 gray_from = cv2.GaussianBlur(gray_from, (ksize, ksize), blur_sigma) gray_to = cv2.GaussianBlur(gray_to, (ksize, ksize), blur_sigma) # Calculate difference diff = cv2.absdiff(gray_from, gray_to) # Apply threshold to get mask _, mask = cv2.threshold(diff, int(pixel_difference_threshold), 255, cv2.THRESH_BINARY) # Create red overlay on original 'to' image # Where mask is 255 (changed), blend 50% red overlay = img_to.copy() overlay[:, :, 2] = np.where(mask > 0, np.clip(overlay[:, :, 2] * 0.5 + 127, 0, 255).astype(np.uint8), overlay[:, :, 2]) overlay[:, :, 0:2] = np.where(mask[:, :, np.newaxis] > 0, (overlay[:, :, 0:2] * 0.5).astype(np.uint8), overlay[:, :, 0:2]) # Encode as JPEG _, encoded = cv2.imencode('.jpg', overlay, [cv2.IMWRITE_JPEG_QUALITY, 85]) diff_bytes = encoded.tobytes() print(f"[{time.time():.3f}] [Worker] Generated diff ({len(diff_bytes)} bytes)", flush=True) conn.send(diff_bytes) except Exception as e: print(f"[{time.time():.3f}] [Worker] Generate diff error: {e}", flush=True) import traceback traceback.print_exc() # Send error info as dict so parent can re-raise conn.send({'error': str(e), 'traceback': traceback.format_exc()}) finally: conn.close() async def generate_diff_isolated(img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height): """ Generate visual diff with red overlay in isolated subprocess (async-safe). Returns: bytes: JPEG diff image or None on failure """ import time import asyncio print(f"[{time.time():.3f}] [Parent] Starting generate_diff subprocess", flush=True) ctx = multiprocessing.get_context('spawn') parent_conn, child_conn = ctx.Pipe() p = ctx.Process( target=_worker_generate_diff, args=(child_conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height) ) print(f"[{time.time():.3f}] [Parent] Starting subprocess", flush=True) p.start() print(f"[{time.time():.3f}] [Parent] Subprocess started (pid={p.pid}), waiting for result ({POLL_TIMEOUT_ABSOLUTE}s timeout)", flush=True) result = None try: # Async-friendly polling: check in small intervals without blocking event loop deadline = time.time() + POLL_TIMEOUT_ABSOLUTE while time.time() < deadline: # Run poll() in thread to avoid blocking event loop has_data = await asyncio.to_thread(parent_conn.poll, 0.1) if has_data: print(f"[{time.time():.3f}] [Parent] Result available, receiving", flush=True) result = await asyncio.to_thread(parent_conn.recv) # Check if result is an error dict if isinstance(result, dict) and 'error' in result: raise RuntimeError(f"Generate diff failed: {result['error']}") print(f"[{time.time():.3f}] [Parent] Result received ({len(result) if result else 0} bytes)", flush=True) break await asyncio.sleep(0) # Yield control to event loop else: from loguru import logger logger.critical(f"[OpenCV subprocess] Timeout waiting for generate_diff result after {POLL_TIMEOUT_ABSOLUTE}s (subprocess may be hung)") print(f"[{time.time():.3f}] [Parent] Timeout waiting for result after {POLL_TIMEOUT_ABSOLUTE}s", flush=True) raise TimeoutError(f"Generate diff subprocess timeout after {POLL_TIMEOUT_ABSOLUTE}s") except Exception as e: print(f"[{time.time():.3f}] [Parent] Error receiving diff: {e}", flush=True) raise finally: # Always close pipe first try: parent_conn.close() except: pass # Try graceful shutdown (async-safe) print(f"[{time.time():.3f}] [Parent] Waiting for subprocess to exit (5s timeout)", flush=True) join_start = time.time() await asyncio.to_thread(p.join, 5) join_elapsed = time.time() - join_start print(f"[{time.time():.3f}] [Parent] First join took {join_elapsed:.2f}s", flush=True) if p.is_alive(): print(f"[{time.time():.3f}] [Parent] Process didn't exit gracefully, terminating", flush=True) term_start = time.time() p.terminate() await asyncio.to_thread(p.join, 3) term_elapsed = time.time() - term_start print(f"[{time.time():.3f}] [Parent] Terminate+join took {term_elapsed:.2f}s", flush=True) if p.is_alive(): print(f"[{time.time():.3f}] [Parent] Process didn't terminate, killing", flush=True) kill_start = time.time() p.kill() await asyncio.to_thread(p.join, 1) kill_elapsed = time.time() - kill_start print(f"[{time.time():.3f}] [Parent] Kill+join took {kill_elapsed:.2f}s", flush=True) print(f"[{time.time():.3f}] [Parent] Subprocess cleanup complete, returning result", flush=True) return result def _worker_draw_bounding_box(conn, img_bytes, x, y, width, height, color, thickness): """ Worker function for drawing bounding box on image. """ import time try: import cv2 cv2.setNumThreads(1) print(f"[{time.time():.3f}] [Worker] Draw bounding box worker starting", flush=True) # Decode image img = cv2.imdecode(np.frombuffer(img_bytes, np.uint8), cv2.IMREAD_COLOR) if img is None: print(f"[{time.time():.3f}] [Worker] Failed to decode image", flush=True) conn.send(None) return # Draw rectangle (BGR format) cv2.rectangle(img, (x, y), (x + width, y + height), color, thickness) # Encode back to PNG _, encoded = cv2.imencode('.png', img) result_bytes = encoded.tobytes() print(f"[{time.time():.3f}] [Worker] Bounding box drawn ({len(result_bytes)} bytes)", flush=True) conn.send(result_bytes) except Exception as e: print(f"[{time.time():.3f}] [Worker] Draw bounding box error: {e}", flush=True) import traceback traceback.print_exc() # Send error info as dict so parent can re-raise conn.send({'error': str(e), 'traceback': traceback.format_exc()}) finally: conn.close() async def draw_bounding_box_isolated(img_bytes, x, y, width, height, color=(255, 0, 0), thickness=3): """ Draw bounding box on image in isolated subprocess (async-safe). Args: img_bytes: Image data as bytes x: Left coordinate y: Top coordinate width: Box width height: Box height color: BGR color tuple (default: blue) thickness: Line thickness in pixels Returns: bytes: PNG image with bounding box or None on failure """ import time import asyncio print(f"[{time.time():.3f}] [Parent] Starting draw_bounding_box subprocess", flush=True) ctx = multiprocessing.get_context('spawn') parent_conn, child_conn = ctx.Pipe() p = ctx.Process( target=_worker_draw_bounding_box, args=(child_conn, img_bytes, x, y, width, height, color, thickness) ) print(f"[{time.time():.3f}] [Parent] Starting subprocess", flush=True) p.start() print(f"[{time.time():.3f}] [Parent] Subprocess started (pid={p.pid}), waiting for result ({POLL_TIMEOUT_ABSOLUTE}s timeout)", flush=True) result = None try: # Async-friendly polling: check in small intervals without blocking event loop deadline = time.time() + POLL_TIMEOUT_ABSOLUTE while time.time() < deadline: # Run poll() in thread to avoid blocking event loop has_data = await asyncio.to_thread(parent_conn.poll, 0.1) if has_data: print(f"[{time.time():.3f}] [Parent] Result available, receiving", flush=True) # Run recv() in thread too result = await asyncio.to_thread(parent_conn.recv) # Check if result is an error dict if isinstance(result, dict) and 'error' in result: raise RuntimeError(f"Draw bounding box failed: {result['error']}") print(f"[{time.time():.3f}] [Parent] Result received ({len(result) if result else 0} bytes)", flush=True) break # Yield control to event loop await asyncio.sleep(0) else: from loguru import logger logger.critical(f"[OpenCV subprocess] Timeout waiting for draw_bounding_box result after {POLL_TIMEOUT_ABSOLUTE}s (subprocess may be hung)") print(f"[{time.time():.3f}] [Parent] Timeout waiting for result after {POLL_TIMEOUT_ABSOLUTE}s", flush=True) raise TimeoutError(f"Draw bounding box subprocess timeout after {POLL_TIMEOUT_ABSOLUTE}s") except Exception as e: print(f"[{time.time():.3f}] [Parent] Error receiving result: {e}", flush=True) raise finally: # Always close pipe first try: parent_conn.close() except: pass # Try graceful shutdown (run join in thread to avoid blocking) print(f"[{time.time():.3f}] [Parent] Waiting for subprocess to exit (3s timeout)", flush=True) join_start = time.time() await asyncio.to_thread(p.join, 3) join_elapsed = time.time() - join_start print(f"[{time.time():.3f}] [Parent] First join took {join_elapsed:.2f}s", flush=True) if p.is_alive(): print(f"[{time.time():.3f}] [Parent] Process didn't exit gracefully, terminating", flush=True) term_start = time.time() p.terminate() await asyncio.to_thread(p.join, 2) term_elapsed = time.time() - term_start print(f"[{time.time():.3f}] [Parent] Terminate+join took {term_elapsed:.2f}s", flush=True) if p.is_alive(): print(f"[{time.time():.3f}] [Parent] Process didn't terminate, killing", flush=True) kill_start = time.time() p.kill() await asyncio.to_thread(p.join, 1) kill_elapsed = time.time() - kill_start print(f"[{time.time():.3f}] [Parent] Kill+join took {kill_elapsed:.2f}s", flush=True) print(f"[{time.time():.3f}] [Parent] Subprocess cleanup complete, returning result", flush=True) return result def _worker_calculate_percentage(conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height): """ Worker function for calculating change percentage. """ import time try: import cv2 cv2.setNumThreads(1) # Decode images img_from = cv2.imdecode(np.frombuffer(img_bytes_from, np.uint8), cv2.IMREAD_COLOR) img_to = cv2.imdecode(np.frombuffer(img_bytes_to, np.uint8), cv2.IMREAD_COLOR) # Resize if needed if img_from.shape != img_to.shape: img_from = cv2.resize(img_from, (img_to.shape[1], img_to.shape[0])) # Downscale to max dimensions h, w = img_to.shape[:2] if w > max_width or h > max_height: scale = min(max_width / w, max_height / h) new_w = int(w * scale) new_h = int(h * scale) img_from = cv2.resize(img_from, (new_w, new_h)) img_to = cv2.resize(img_to, (new_w, new_h)) # Convert to grayscale gray_from = cv2.cvtColor(img_from, cv2.COLOR_BGR2GRAY) gray_to = cv2.cvtColor(img_to, cv2.COLOR_BGR2GRAY) # Optional blur if blur_sigma > 0: ksize = int(2 * round(3 * blur_sigma)) + 1 if ksize % 2 == 0: ksize += 1 gray_from = cv2.GaussianBlur(gray_from, (ksize, ksize), blur_sigma) gray_to = cv2.GaussianBlur(gray_to, (ksize, ksize), blur_sigma) # Calculate difference diff = cv2.absdiff(gray_from, gray_to) # Apply threshold _, thresholded = cv2.threshold(diff, int(pixel_difference_threshold), 255, cv2.THRESH_BINARY) # Calculate percentage total_pixels = thresholded.size changed_pixels = np.count_nonzero(thresholded) change_percentage = (changed_pixels / total_pixels) * 100.0 conn.send(float(change_percentage)) except Exception as e: print(f"[{time.time():.3f}] [Worker] Calculate percentage error: {e}", flush=True) import traceback traceback.print_exc() # Send error info as dict so parent can re-raise conn.send({'error': str(e), 'traceback': traceback.format_exc()}) finally: conn.close() async def calculate_change_percentage_isolated(img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height): """ Calculate change percentage in isolated subprocess (async-safe). Returns: float: Change percentage """ import time import asyncio print(f"[{time.time():.3f}] [Parent] Starting calculate_percentage subprocess", flush=True) ctx = multiprocessing.get_context('spawn') parent_conn, child_conn = ctx.Pipe() p = ctx.Process( target=_worker_calculate_percentage, args=(child_conn, img_bytes_from, img_bytes_to, pixel_difference_threshold, blur_sigma, max_width, max_height) ) print(f"[{time.time():.3f}] [Parent] Starting subprocess", flush=True) p.start() print(f"[{time.time():.3f}] [Parent] Subprocess started (pid={p.pid}), waiting for result ({POLL_TIMEOUT_ABSOLUTE}s timeout)", flush=True) result = 0.0 try: # Async-friendly polling: check in small intervals without blocking event loop deadline = time.time() + POLL_TIMEOUT_ABSOLUTE while time.time() < deadline: # Run poll() in thread to avoid blocking event loop has_data = await asyncio.to_thread(parent_conn.poll, 0.1) if has_data: print(f"[{time.time():.3f}] [Parent] Result available, receiving", flush=True) result = await asyncio.to_thread(parent_conn.recv) # Check if result is an error dict if isinstance(result, dict) and 'error' in result: raise RuntimeError(f"Calculate change percentage failed: {result['error']}") print(f"[{time.time():.3f}] [Parent] Result received: {result:.2f}%", flush=True) break await asyncio.sleep(0) # Yield control to event loop else: from loguru import logger logger.critical(f"[OpenCV subprocess] Timeout waiting for calculate_change_percentage result after {POLL_TIMEOUT_ABSOLUTE}s (subprocess may be hung)") print(f"[{time.time():.3f}] [Parent] Timeout waiting for result after {POLL_TIMEOUT_ABSOLUTE}s", flush=True) raise TimeoutError(f"Calculate change percentage subprocess timeout after {POLL_TIMEOUT_ABSOLUTE}s") except Exception as e: print(f"[{time.time():.3f}] [Parent] Error receiving percentage: {e}", flush=True) raise finally: # Always close pipe first try: parent_conn.close() except: pass # Try graceful shutdown (async-safe) print(f"[{time.time():.3f}] [Parent] Waiting for subprocess to exit (5s timeout)", flush=True) join_start = time.time() await asyncio.to_thread(p.join, 5) join_elapsed = time.time() - join_start print(f"[{time.time():.3f}] [Parent] First join took {join_elapsed:.2f}s", flush=True) if p.is_alive(): print(f"[{time.time():.3f}] [Parent] Process didn't exit gracefully, terminating", flush=True) term_start = time.time() p.terminate() await asyncio.to_thread(p.join, 3) term_elapsed = time.time() - term_start print(f"[{time.time():.3f}] [Parent] Terminate+join took {term_elapsed:.2f}s", flush=True) if p.is_alive(): print(f"[{time.time():.3f}] [Parent] Process didn't terminate, killing", flush=True) kill_start = time.time() p.kill() await asyncio.to_thread(p.join, 1) kill_elapsed = time.time() - kill_start print(f"[{time.time():.3f}] [Parent] Kill+join took {kill_elapsed:.2f}s", flush=True) print(f"[{time.time():.3f}] [Parent] Subprocess cleanup complete, returning result", flush=True) return result ================================================ FILE: changedetectionio/processors/image_ssim_diff/image_handler/libvips_handler.py ================================================ """ LibVIPS implementation of ImageDiffHandler. Uses pyvips for high-performance image processing with streaming architecture and low memory footprint. Ideal for large screenshots (8000px+). """ from __future__ import annotations import os from typing import Tuple, Any, TYPE_CHECKING from loguru import logger if TYPE_CHECKING: import pyvips try: import pyvips PYVIPS_AVAILABLE = True except ImportError: PYVIPS_AVAILABLE = False logger.warning("pyvips not available - install with: pip install pyvips") from . import ImageDiffHandler class LibvipsImageDiffHandler(ImageDiffHandler): """ LibVIPS implementation using streaming architecture. Benefits: - 3x faster than ImageMagick - 5x less memory than PIL - Automatic multi-threading - Streaming - processes images in chunks """ def __init__(self): if not PYVIPS_AVAILABLE: raise ImportError("pyvips is not installed. Install with: pip install pyvips") def load_from_bytes(self, img_bytes: bytes) -> pyvips.Image: """Load image from bytes using libvips streaming.""" return pyvips.Image.new_from_buffer(img_bytes, '') def save_to_bytes(self, img: pyvips.Image, format: str = 'png', quality: int = 85) -> bytes: """ Save image to bytes using temp file. Note: Uses temp file instead of write_to_buffer() to avoid C memory leak. See: https://github.com/libvips/pyvips/issues/234 """ import tempfile format = format.lower() try: if format == 'png': suffix = '.png' write_args = {'compression': 6} elif format in ['jpg', 'jpeg']: suffix = '.jpg' write_args = {'Q': quality} else: raise ValueError(f"Unsupported format: {format}") # Use temp file to avoid write_to_buffer() memory leak with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp: temp_path = tmp.name # Write to file img.write_to_file(temp_path, **write_args) # Read bytes and clean up with open(temp_path, 'rb') as f: image_bytes = f.read() os.unlink(temp_path) return image_bytes except Exception as e: logger.error(f"Failed to save via temp file: {e}") # Fallback to write_to_buffer if temp file fails if format == 'png': return img.write_to_buffer('.png', compression=6) else: return img.write_to_buffer('.jpg', Q=quality) def crop(self, img: pyvips.Image, left: int, top: int, right: int, bottom: int) -> pyvips.Image: """Crop image using libvips.""" width = right - left height = bottom - top return img.crop(left, top, width, height) def resize(self, img: pyvips.Image, max_width: int, max_height: int) -> pyvips.Image: """ Resize image maintaining aspect ratio. Uses thumbnail_image for efficient downscaling with streaming. """ width, height = img.width, img.height if width <= max_width and height <= max_height: return img # Calculate scaling to fit within max dimensions width_ratio = max_width / width if width > max_width else 1.0 height_ratio = max_height / height if height > max_height else 1.0 ratio = min(width_ratio, height_ratio) new_width = int(width * ratio) new_height = int(height * ratio) logger.debug(f"Resizing image: {width}x{height} -> {new_width}x{new_height}") # thumbnail_image is faster than resize for downscaling return img.thumbnail_image(new_width, height=new_height) def get_dimensions(self, img: pyvips.Image) -> Tuple[int, int]: """Get image dimensions.""" return (img.width, img.height) def to_grayscale(self, img: pyvips.Image) -> pyvips.Image: """Convert to grayscale using 'b-w' colorspace.""" return img.colourspace('b-w') def gaussian_blur(self, img: pyvips.Image, sigma: float) -> pyvips.Image: """Apply Gaussian blur.""" if sigma > 0: return img.gaussblur(sigma) return img def absolute_difference(self, img1: pyvips.Image, img2: pyvips.Image) -> pyvips.Image: """ Calculate absolute difference using operator overloading. LibVIPS supports arithmetic operations between images. """ return (img1 - img2).abs() def threshold(self, img: pyvips.Image, threshold_value: int) -> Tuple[float, pyvips.Image]: """ Apply threshold and calculate change percentage. Uses ifthenelse for efficient thresholding. """ # Create binary mask: pixels above threshold = 255, others = 0 mask = (img > threshold_value).ifthenelse(255, 0) # Calculate percentage by averaging mask values # avg() returns mean pixel value (0-255) # Divide by 255 to get proportion, multiply by 100 for percentage mean_value = mask.avg() change_percentage = (mean_value / 255.0) * 100.0 return float(change_percentage), mask def apply_red_overlay(self, img: pyvips.Image, mask: pyvips.Image) -> bytes: """ Apply red overlay where mask is True (50% blend). Args: img: Color image (will be converted to RGB if needed) mask: Binary mask (255 where changed, 0 elsewhere) Returns: JPEG bytes with red overlay """ import tempfile # Ensure RGB colorspace if img.bands == 1: img = img.colourspace('srgb') # Normalize mask to 0-1 range for blending mask_normalized = mask / 255.0 # Split into R, G, B channels channels = img.bandsplit() r, g, b = channels[0], channels[1], channels[2] # Apply red overlay (50% blend): # Where mask is 1: blend 50% original with 50% red (255) # Where mask is 0: keep original r = r * (1 - mask_normalized * 0.5) + 127.5 * mask_normalized g = g * (1 - mask_normalized * 0.5) b = b * (1 - mask_normalized * 0.5) # Recombine channels result = r.bandjoin([g, b]) # CRITICAL: Use temp file instead of write_to_buffer() # write_to_buffer() leaks C memory that isn't returned to OS # See: https://github.com/libvips/pyvips/issues/234 try: with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp: temp_path = tmp.name # Write to file (doesn't leak like write_to_buffer) result.write_to_file(temp_path, Q=85) # Read bytes and clean up with open(temp_path, 'rb') as f: image_bytes = f.read() os.unlink(temp_path) return image_bytes except Exception as e: logger.error(f"Failed to write image via temp file: {e}") # Fallback to write_to_buffer if temp file fails return result.write_to_buffer('.jpg', Q=85) def close(self, img: pyvips.Image) -> None: """ LibVIPS uses automatic reference counting. No explicit cleanup needed - memory freed when references drop to zero. """ pass def find_template( self, img: pyvips.Image, template_img: pyvips.Image, original_bbox: Tuple[int, int, int, int], search_tolerance: float = 0.2 ) -> Tuple[int, int, int, int]: """ Find template in image using OpenCV template matching. Note: This temporarily converts to numpy for OpenCV operations since libvips doesn't have template matching built-in. """ import cv2 import numpy as np try: left, top, right, bottom = original_bbox width = right - left height = bottom - top # Calculate search region margin_x = int(width * search_tolerance) margin_y = int(height * search_tolerance) search_left = max(0, left - margin_x) search_top = max(0, top - margin_y) search_right = min(img.width, right + margin_x) search_bottom = min(img.height, bottom + margin_y) # Crop search region search_region = self.crop(img, search_left, search_top, search_right, search_bottom) # Convert to numpy arrays for OpenCV search_array = np.ndarray( buffer=search_region.write_to_memory(), dtype=np.uint8, shape=[search_region.height, search_region.width, search_region.bands] ) template_array = np.ndarray( buffer=template_img.write_to_memory(), dtype=np.uint8, shape=[template_img.height, template_img.width, template_img.bands] ) # Convert to grayscale if len(search_array.shape) == 3: search_gray = cv2.cvtColor(search_array, cv2.COLOR_RGB2GRAY) else: search_gray = search_array if len(template_array.shape) == 3: template_gray = cv2.cvtColor(template_array, cv2.COLOR_RGB2GRAY) else: template_gray = template_array logger.debug(f"Searching for template in region: ({search_left}, {search_top}) to ({search_right}, {search_bottom})") # Perform template matching result = cv2.matchTemplate(search_gray, template_gray, cv2.TM_CCOEFF_NORMED) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result) logger.debug(f"Template matching confidence: {max_val:.2%}") # Check if match is good enough (80% confidence threshold) if max_val >= 0.8: # Calculate new bounding box in original image coordinates match_x = search_left + max_loc[0] match_y = search_top + max_loc[1] new_bbox = (match_x, match_y, match_x + width, match_y + height) # Calculate movement distance move_x = abs(match_x - left) move_y = abs(match_y - top) logger.info(f"Template found at ({match_x}, {match_y}), " f"moved {move_x}px horizontally, {move_y}px vertically, " f"confidence: {max_val:.2%}") return new_bbox else: logger.warning(f"Template match confidence too low: {max_val:.2%} (need 80%)") return None except Exception as e: logger.error(f"Template matching error: {e}") return None def save_template( self, img: pyvips.Image, bbox: Tuple[int, int, int, int], output_path: str ) -> bool: """ Save a cropped region as a template file. """ import os try: left, top, right, bottom = bbox width = right - left height = bottom - top # Ensure output directory exists os.makedirs(os.path.dirname(output_path), exist_ok=True) # Crop template region template = self.crop(img, left, top, right, bottom) # Save as PNG template.write_to_file(output_path, compression=6) logger.info(f"Saved template: {output_path} ({width}x{height}px)") return True except Exception as e: logger.error(f"Failed to save template: {e}") return False ================================================ FILE: changedetectionio/processors/image_ssim_diff/preview.py ================================================ """ Preview rendering for SSIM screenshot processor. Renders images properly in the browser instead of showing raw bytes. """ from flask_babel import gettext from loguru import logger def get_asset(asset_name, watch, datastore, request): """ Get processor-specific binary assets for preview streaming. This function supports serving images as separate HTTP responses instead of embedding them as base64 in the HTML template, solving memory issues with large screenshots. Supported assets: - 'screenshot': The screenshot for the specified version Args: asset_name: Name of the asset to retrieve ('screenshot') watch: Watch object datastore: Datastore object request: Flask request (for version query param) Returns: tuple: (binary_data, content_type, cache_control_header) or None if not found """ if asset_name != 'screenshot': return None versions = list(watch.history.keys()) if len(versions) == 0: return None # Get the version from query string (default: latest) preferred_version = request.args.get('version') timestamp = versions[-1] if preferred_version and preferred_version in versions: timestamp = preferred_version try: screenshot_bytes = watch.get_history_snapshot(timestamp=timestamp) # Verify we got bytes (should always be bytes for image files) if not isinstance(screenshot_bytes, bytes): logger.error(f"Expected bytes but got {type(screenshot_bytes)} for screenshot at {timestamp}") return None # Detect image format using puremagic (same as Watch.py) try: import puremagic detections = puremagic.magic_string(screenshot_bytes[:2048]) if detections: mime_type = detections[0].mime_type logger.trace(f"Detected MIME type: {mime_type}") else: mime_type = 'image/png' # Default fallback except Exception as e: logger.warning(f"puremagic detection failed: {e}, using 'image/png' fallback") mime_type = 'image/png' return (screenshot_bytes, mime_type, 'public, max-age=10') except Exception as e: logger.error(f"Failed to load screenshot for preview asset: {e}") return None def render(watch, datastore, request, url_for, render_template, flash, redirect): """ Render the preview page for screenshot watches. Args: watch: Watch object datastore: Datastore object request: Flask request url_for: Flask url_for function render_template: Flask render_template function flash: Flask flash function redirect: Flask redirect function Returns: Rendered template or redirect """ versions = list(watch.history.keys()) if len(versions) == 0: flash(gettext("Preview unavailable - No snapshots captured yet"), "error") return redirect(url_for('watchlist.index')) # Get the version to display (default: latest) preferred_version = request.args.get('version') timestamp = versions[-1] if preferred_version and preferred_version in versions: timestamp = preferred_version # Render custom template for image preview # Screenshot is now served via separate /processor-asset/ endpoint instead of base64 # This significantly reduces memory usage by not embedding large images in HTML return render_template( 'image_ssim_diff/preview.html', watch=watch, uuid=watch.get('uuid'), versions=versions, timestamp=timestamp, current_diff_url=watch['url'] ) ================================================ FILE: changedetectionio/processors/image_ssim_diff/processor.py ================================================ """ Core fast screenshot comparison processor. Uses OpenCV with subprocess isolation for high-performance, low-memory image processing. All operations run in isolated subprocesses for complete memory cleanup and stability. """ import hashlib import time from loguru import logger from changedetectionio.processors.exceptions import ProcessorException from . import SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT, PROCESSOR_CONFIG_NAME, OPENCV_BLUR_SIGMA from ..base import difference_detection_processor, SCREENSHOT_FORMAT_PNG # All image operations now use OpenCV via isolated_opencv subprocess handler # Template matching temporarily disabled pending OpenCV implementation # Translation marker for extraction def _(x): return x name = _('Visual / Image screenshot change detection') description = _('Compares screenshots using fast OpenCV algorithm, 10-100x faster than SSIM') del _ processor_weight = 2 list_badge_text = "Visual" class perform_site_check(difference_detection_processor): """Fast screenshot comparison processor using OpenCV.""" # Override to use PNG format for better image comparison (JPEG compression creates noise) screenshot_format = SCREENSHOT_FORMAT_PNG def run_changedetection(self, watch, force_reprocess=False): """ Perform screenshot comparison using OpenCV subprocess handler. Returns: tuple: (changed_detected, update_obj, screenshot_bytes) """ now = time.time() # Get the current screenshot if not self.fetcher.screenshot: raise ProcessorException( message="No screenshot available. Ensure the watch is configured to use a real browser.", url=watch.get('url') ) self.screenshot = self.fetcher.screenshot self.xpath_data = self.fetcher.xpath_data # Quick MD5 check - skip expensive comparison if images are identical from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame current_md5 = hashlib.md5(self.screenshot).hexdigest() previous_md5 = watch.get('previous_md5') if previous_md5 and current_md5 == previous_md5: logger.debug(f"UUID: {watch.get('uuid')} - Screenshot MD5 unchanged ({current_md5}), skipping comparison") raise checksumFromPreviousCheckWasTheSame() else: logger.debug(f"UUID: {watch.get('uuid')} - Screenshot MD5 changed") # Check if bounding box is set (for drawn area mode) # Read from processor-specific config JSON file (named after processor) crop_region = None processor_config = self.get_extra_watch_config(PROCESSOR_CONFIG_NAME) bounding_box = processor_config.get('bounding_box') if processor_config else None # Get pixel difference threshold sensitivity (per-watch > global) # This controls how different a pixel must be (0-255 scale) to count as "changed" pixel_difference_threshold_sensitivity = processor_config.get('pixel_difference_threshold_sensitivity') if not pixel_difference_threshold_sensitivity: pixel_difference_threshold_sensitivity = self.datastore.data['settings']['application'].get('pixel_difference_threshold_sensitivity', SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT) try: pixel_difference_threshold_sensitivity = int(pixel_difference_threshold_sensitivity) except (ValueError, TypeError): logger.warning(f"Invalid pixel_difference_threshold_sensitivity value '{pixel_difference_threshold_sensitivity}', using default") pixel_difference_threshold_sensitivity = SCREENSHOT_COMPARISON_THRESHOLD_OPTIONS_DEFAULT # Get minimum change percentage (per-watch > global > env var default) # This controls what percentage of pixels must change to trigger a detection min_change_percentage = processor_config.get('min_change_percentage') if not min_change_percentage: min_change_percentage = self.datastore.data['settings']['application'].get('min_change_percentage', 1) try: min_change_percentage = int(min_change_percentage) except (ValueError, TypeError): logger.warning(f"Invalid min_change_percentage value '{min_change_percentage}', using default 0.1") min_change_percentage = 1 # Template matching for tracking content movement template_matching_enabled = processor_config.get('auto_track_region', False) #@@todo disabled for now if bounding_box: try: # Parse bounding box: "x,y,width,height" parts = [int(p.strip()) for p in bounding_box.split(',')] if len(parts) == 4: x, y, width, height = parts # Crop uses (left, top, right, bottom) crop_region = (max(0, x), max(0, y), x + width, y + height) logger.info(f"UUID: {watch.get('uuid')} - Bounding box enabled: cropping to region {crop_region} (x={x}, y={y}, w={width}, h={height})") else: logger.warning(f"UUID: {watch.get('uuid')} - Invalid bounding box format: {bounding_box} (expected 4 values)") except Exception as e: logger.warning(f"UUID: {watch.get('uuid')} - Failed to parse bounding box '{bounding_box}': {e}") # If no bounding box, check if visual selector (include_filters) is set for region-based comparison if not crop_region: include_filters = watch.get('include_filters', []) if include_filters and len(include_filters) > 0: # Get the first filter to use for cropping first_filter = include_filters[0].strip() if first_filter and self.xpath_data: try: import json # xpath_data is JSON string from browser xpath_data_obj = json.loads(self.xpath_data) if isinstance(self.xpath_data, str) else self.xpath_data # Find the bounding box for the first filter for element in xpath_data_obj.get('size_pos', []): # Match the filter with the element's xpath if element.get('xpath') == first_filter and element.get('highlight_as_custom_filter'): # Found the element - extract crop coordinates left = element.get('left', 0) top = element.get('top', 0) width = element.get('width', 0) height = element.get('height', 0) # Crop uses (left, top, right, bottom) crop_region = (max(0, left), max(0, top), left + width, top + height) logger.info(f"UUID: {watch.get('uuid')} - Visual selector enabled: cropping to region {crop_region} for filter: {first_filter}") break except Exception as e: logger.warning(f"UUID: {watch.get('uuid')} - Failed to parse xpath_data for visual selector: {e}") # Store original crop region for template matching original_crop_region = crop_region # Check if this is the first check (no previous history) history_keys = list(watch.history.keys()) if len(history_keys) == 0: # First check - save baseline, no comparison logger.info(f"UUID: {watch.get('uuid')} - First check for watch {watch.get('uuid')} - saving baseline screenshot") # LibVIPS uses automatic reference counting - no explicit cleanup needed update_obj = { 'previous_md5': hashlib.md5(self.screenshot).hexdigest(), 'last_error': False } logger.trace(f"Processed in {time.time() - now:.3f}s") return False, update_obj, self.screenshot # Get previous screenshot bytes from history previous_timestamp = history_keys[-1] previous_screenshot_bytes = watch.get_history_snapshot(timestamp=previous_timestamp) # Screenshots are stored as PNG, so this should be bytes if isinstance(previous_screenshot_bytes, str): # If it's a string (shouldn't be for screenshots, but handle it) previous_screenshot_bytes = previous_screenshot_bytes.encode('utf-8') # Template matching is temporarily disabled pending OpenCV implementation # crop_region calculated above will be used as-is # Perform comparison in isolated subprocess to prevent memory leaks try: from .image_handler import isolated_opencv as process_screenshot_handler # stuff in watch doesnt need to be there logger.debug(f"UUID: {watch.get('uuid')} - Starting isolated subprocess comparison (crop_region={crop_region})") # Compare using isolated subprocess with OpenCV (async-safe to avoid blocking event loop) # Pass raw bytes and crop region - subprocess handles all image operations import asyncio import threading # Async-safe wrapper: runs coroutine in new thread with its own event loop # This prevents blocking the async update worker's event loop def run_async_in_thread(): return asyncio.run( process_screenshot_handler.compare_images_isolated( img_bytes_from=previous_screenshot_bytes, img_bytes_to=self.screenshot, pixel_difference_threshold=pixel_difference_threshold_sensitivity, blur_sigma=OPENCV_BLUR_SIGMA, crop_region=crop_region # Pass crop region for isolated cropping ) ) # Run in thread to avoid blocking event loop when called from async update worker result_container = [None] exception_container = [None] def thread_target(): try: result_container[0] = run_async_in_thread() except Exception as e: exception_container[0] = e thread = threading.Thread(target=thread_target, daemon=True, name="ImageDiff-Processor") thread.start() thread.join(timeout=60) if exception_container[0]: raise exception_container[0] # Subprocess returns only the change score - we decide if it's a "change" change_score = result_container[0] if change_score is None: raise RuntimeError("Image comparison subprocess returned no result") changed_detected = change_score > min_change_percentage logger.info(f"UUID: {watch.get('uuid')} - {process_screenshot_handler.IMPLEMENTATION_NAME}: {change_score:.2f}% pixels changed, pixel_diff_threshold_sensitivity: {pixel_difference_threshold_sensitivity:.0f} score={change_score:.2f}%, min_change_threshold={min_change_percentage}%") except Exception as e: logger.error(f"UUID: {watch.get('uuid')} - Failed to compare screenshots: {e}") logger.trace(f"UUID: {watch.get('uuid')} - Processed in {time.time() - now:.3f}s") raise ProcessorException( message=f"UUID: {watch.get('uuid')} - Screenshot comparison failed: {e}", url=watch.get('url') ) # Return results update_obj = { 'previous_md5': hashlib.md5(self.screenshot).hexdigest(), 'last_error': False } if changed_detected: logger.info(f"UUID: {watch.get('uuid')} - Change detected using OpenCV! Score: {change_score:.2f}") else: logger.debug(f"UUID: {watch.get('uuid')} - No significant change using OpenCV. Score: {change_score:.2f}") logger.trace(f"UUID: {watch.get('uuid')} - Processed in {time.time() - now:.3f}s") return changed_detected, update_obj, self.screenshot ================================================ FILE: changedetectionio/processors/image_ssim_diff/templates/image_ssim_diff/diff.html ================================================ {% extends 'base.html' %} {% from '_helpers.html' import render_field, render_checkbox_field, render_button %} {% block content %}
{% if versions|length >= 1 %} {% endif %}
Change Detection: {{ "%.2f"|format(change_percentage) }}% of pixels changed {% if change_percentage > 0.1 %} ⚠ Change Detected {% else %} ✓ No Significant Change {% endif %}
{%- if versions|length >= 2 -%} {%- endif -%}

Interactive Comparison

Drag slider to compare Previous ({{ from_version|format_timestamp_timeago }}) vs Current ({{ to_version|format_timestamp_timeago }})
Previous screenshot
Current screenshot
Previous Current

Difference Visualization

Red = Changed Pixels
Difference visualization with red highlights
{% if comparison_data and comparison_data.get('history') and comparison_data.history|length > 1 %}

Comparison History

Recent comparison results (last {{ comparison_data.history|length }} checks)

{% for entry in comparison_data.history|reverse %} {% endfor %}
Timestamp Change % Method Changed?
{{ entry.timestamp|format_timestamp_timeago }} {{ "%.2f"|format(entry.change_percentage) }}% {{ entry.method }} {% if entry.changed %} Yes {% else %} No {% endif %}
{% endif %}
{% endblock %} ================================================ FILE: changedetectionio/processors/image_ssim_diff/templates/image_ssim_diff/preview.html ================================================ {% extends 'base.html' %} {% block content %} {% if versions|length >= 2 %}

Keyboard: ← Previous   → Next
{% endif %}

Screenshot from {{ timestamp|format_timestamp_timeago }}

Screenshot preview
{% endblock %} ================================================ FILE: changedetectionio/processors/image_ssim_diff/util.py ================================================ """ DEPRECATED: All multiprocessing functions have been removed. The image_ssim_diff processor now uses LibVIPS via ImageDiffHandler abstraction, which provides superior performance and memory efficiency through streaming architecture and automatic threading. All image operations are now handled by: - imagehandler.py: Abstract base class defining the interface - libvips_handler.py: LibVIPS implementation with streaming and threading Historical note: This file previously contained multiprocessing workers for: - Template matching (find_region_with_template_matching_isolated) - Template regeneration (regenerate_template_isolated) - Image cropping (crop_image_isolated, crop_pil_image_isolated) These have been replaced by handler methods which are: - Faster (no subprocess overhead) - More memory efficient (LibVIPS streaming) - Cleaner (no multiprocessing deadlocks) - Better tested (no logger/forking issues) """ ================================================ FILE: changedetectionio/processors/magic.py ================================================ """ Content Type Detection and Stream Classification This module provides intelligent content-type detection for changedetection.io. It addresses the common problem where HTTP Content-Type headers are missing, incorrect, or too generic, which would otherwise cause the wrong processor to be used. The guess_stream_type class combines: 1. HTTP Content-Type headers (when available and reliable) 2. Python-magic library for MIME detection (analyzing actual file content) 3. Content-based pattern matching for text formats (HTML tags, XML declarations, etc.) This multi-layered approach ensures accurate detection of RSS feeds, JSON, HTML, PDF, plain text, CSV, YAML, and XML formats - even when servers provide misleading headers. Used by: processors/text_json_diff/processor.py and other content processors """ # When to apply the 'cdata to real HTML' hack RSS_XML_CONTENT_TYPES = [ "application/rss+xml", "application/rdf+xml", "application/atom+xml", "text/rss+xml", # rare, non-standard "application/x-rss+xml", # legacy (older feed software) "application/x-atom+xml", # legacy (older Atom) ] # JSON Content-types JSON_CONTENT_TYPES = [ "application/activity+json", "application/feed+json", "application/json", "application/ld+json", "application/vnd.api+json", ] # Generic XML Content-types (non-RSS/Atom) XML_CONTENT_TYPES = [ "text/xml", "application/xml", ] HTML_PATTERNS = [' Union[float, None]: # Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer. standardized_value = raw_value if ',' in standardized_value and '.' in standardized_value: # Identify the correct decimal separator if standardized_value.rfind('.') > standardized_value.rfind(','): standardized_value = standardized_value.replace(',', '') else: standardized_value = standardized_value.replace('.', '').replace(',', '.') else: standardized_value = standardized_value.replace(',', '.') # Remove any non-numeric characters except for the decimal point standardized_value = re.sub(r'[^\d.-]', '', standardized_value) if standardized_value: # Convert to float # @todo locale needs to be the locale of the webpage return float(parse_decimal(standardized_value, locale='en')) return None def __init__(self, *args, **kwargs): # Define default values default_values = { 'in_stock': None, 'price': None, 'currency': None, 'original_price': None } # Initialize the dictionary with default values super().__init__(default_values) # Update with any provided positional arguments (dictionaries) if args: if len(args) == 1 and isinstance(args[0], dict): self.update(args[0]) else: raise ValueError("Only one positional argument of type 'dict' is allowed") def __setitem__(self, key, value): # Custom logic to handle setting price and original_price if key == 'price' or key == 'original_price': if isinstance(value, str): value = self.parse_currency(raw_value=value) super().__setitem__(key, value) class Watch(BaseWatch): def __init__(self, *arg, **kw): super().__init__(*arg, **kw) self['restock'] = Restock(kw['default']['restock']) if kw.get('default') and kw['default'].get('restock') else Restock() def clear_watch(self): super().clear_watch() self.update({'restock': Restock()}) def extra_notification_token_values(self): values = super().extra_notification_token_values() values['restock'] = self.get('restock', {}) return values def extra_notification_token_placeholder_info(self): values = super().extra_notification_token_placeholder_info() values.append(('restock.price', "Price detected")) values.append(('restock.original_price', "Original price at first check")) return values ================================================ FILE: changedetectionio/processors/restock_diff/api.yaml ================================================ components: schemas: processor_config_restock_diff: type: object description: Configuration for the restock_diff processor (restock and price tracking) properties: in_stock_processing: type: string enum: [in_stock_only, all_changes, 'off'] default: in_stock_only description: | When to trigger on stock changes: - `in_stock_only`: Only trigger on Out Of Stock -> In Stock transitions - `all_changes`: Trigger on any availability change - `off`: Disable stock/availability tracking follow_price_changes: type: boolean default: true description: Monitor and track price changes price_change_min: type: [number, 'null'] description: Trigger a notification when the price drops below this value price_change_max: type: [number, 'null'] description: Trigger a notification when the price rises above this value price_change_threshold_percent: type: [number, 'null'] minimum: 0 maximum: 100 description: Minimum price change percentage since the original price to trigger a notification paths: /watch: post: x-code-samples: - lang: 'curl' label: 'Restock & price tracking' source: | curl -X POST "http://localhost:5000/api/v1/watch" \ -H "x-api-key: YOUR_API_KEY" \ -H "Content-Type: application/json" \ -d '{ "url": "https://example.com/product", "processor": "restock_diff", "processor_config_restock_diff": { "in_stock_processing": "in_stock_only", "follow_price_changes": true, "price_change_threshold_percent": 5 } }' - lang: 'Python' label: 'Restock & price tracking' source: | import requests headers = { 'x-api-key': 'YOUR_API_KEY', 'Content-Type': 'application/json' } data = { 'url': 'https://example.com/product', 'processor': 'restock_diff', 'processor_config_restock_diff': { 'in_stock_processing': 'in_stock_only', 'follow_price_changes': True, 'price_change_threshold_percent': 5, } } response = requests.post('http://localhost:5000/api/v1/watch', headers=headers, json=data) print(response.json()) /watch/{uuid}: put: x-code-samples: - lang: 'curl' label: 'Update restock config' source: | curl -X PUT "http://localhost:5000/api/v1/watch/YOUR-UUID" \ -H "x-api-key: YOUR_API_KEY" \ -H "Content-Type: application/json" \ -d '{ "processor_config_restock_diff": { "in_stock_processing": "all_changes", "follow_price_changes": true, "price_change_min": 10.00, "price_change_max": 500.00 } }' - lang: 'Python' label: 'Update restock config' source: | import requests headers = { 'x-api-key': 'YOUR_API_KEY', 'Content-Type': 'application/json' } uuid = 'YOUR-UUID' data = { 'processor_config_restock_diff': { 'in_stock_processing': 'all_changes', 'follow_price_changes': True, 'price_change_min': 10.00, 'price_change_max': 500.00, } } response = requests.put(f'http://localhost:5000/api/v1/watch/{uuid}', headers=headers, json=data) print(response.text) /tag/{uuid}: put: x-code-samples: - lang: 'curl' label: 'Set restock config on group/tag' source: | curl -X PUT "http://localhost:5000/api/v1/tag/YOUR-TAG-UUID" \ -H "x-api-key: YOUR_API_KEY" \ -H "Content-Type: application/json" \ -d '{ "overrides_watch": true, "processor_config_restock_diff": { "in_stock_processing": "in_stock_only", "follow_price_changes": true, "price_change_threshold_percent": 10 } }' - lang: 'Python' label: 'Set restock config on group/tag' source: | import requests headers = { 'x-api-key': 'YOUR_API_KEY', 'Content-Type': 'application/json' } tag_uuid = 'YOUR-TAG-UUID' data = { 'overrides_watch': True, 'processor_config_restock_diff': { 'in_stock_processing': 'in_stock_only', 'follow_price_changes': True, 'price_change_threshold_percent': 10, } } response = requests.put(f'http://localhost:5000/api/v1/tag/{tag_uuid}', headers=headers, json=data) print(response.text) ================================================ FILE: changedetectionio/processors/restock_diff/forms.py ================================================ from wtforms import ( BooleanField, validators, FloatField ) from wtforms.fields.choices import RadioField from wtforms.fields.form import FormField from wtforms.form import Form from flask_babel import lazy_gettext as _l from changedetectionio.forms import processor_text_json_diff_form class RestockSettingsForm(Form): in_stock_processing = RadioField(label=_l('Re-stock detection'), choices=[ ('in_stock_only', _l("In Stock only (Out Of Stock -> In Stock only)")), ('all_changes', _l("Any availability changes")), ('off', _l("Off, don't follow availability/restock")), ], default="in_stock_only") price_change_min = FloatField(_l('Below price to trigger notification'), [validators.Optional()], render_kw={"placeholder": _l("No limit"), "size": "10"}) price_change_max = FloatField(_l('Above price to trigger notification'), [validators.Optional()], render_kw={"placeholder": _l("No limit"), "size": "10"}) price_change_threshold_percent = FloatField(_l('Threshold in %% for price changes since the original price'), validators=[ validators.Optional(), validators.NumberRange(min=0, max=100, message=_l("Should be between 0 and 100")), ], render_kw={"placeholder": "0%", "size": "5"}) follow_price_changes = BooleanField(_l('Follow price changes'), default=True) class processor_settings_form(processor_text_json_diff_form): processor_config_restock_diff = FormField(RestockSettingsForm) def extra_tab_content(self): return _l('Restock & Price Detection') def extra_form_content(self): output = "" if getattr(self, 'watch', None) and getattr(self, 'datastore'): for tag_uuid in self.watch.get('tags'): tag = self.datastore.data['settings']['application']['tags'].get(tag_uuid, {}) if tag.get('overrides_watch'): # @todo - Quick and dirty, cant access 'url_for' here because its out of scope somehow output = f"""

Note! A Group tag overrides the restock and price detection here.

""" output += """ {% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
{{ render_field(form.processor_config_restock_diff.in_stock_processing) }}
{{ render_checkbox_field(form.processor_config_restock_diff.follow_price_changes) }} Changes in price should trigger a notification
{{ render_field(form.processor_config_restock_diff.price_change_min, placeholder=watch.get('restock', {}).get('price')) }} Minimum amount, Trigger a change/notification when the price drops below this value.
{{ render_field(form.processor_config_restock_diff.price_change_max, placeholder=watch.get('restock', {}).get('price')) }} Maximum amount, Trigger a change/notification when the price rises above this value.
{{ render_field(form.processor_config_restock_diff.price_change_threshold_percent) }} Price must change more than this % to trigger a change since the first check.
For example, If the product is $1,000 USD originally, 2% would mean it has to change more than $20 since the first check.
""" return output ================================================ FILE: changedetectionio/processors/restock_diff/processor.py ================================================ from ..base import difference_detection_processor from ..exceptions import ProcessorException from . import Restock from loguru import logger from changedetectionio.content_fetchers.exceptions import checksumFromPreviousCheckWasTheSame import urllib3 import time urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) # Translatable strings - extracted by pybabel, translated at runtime in __init__.py # Use a marker function so pybabel can extract these strings def _(x): return x # Translation marker for extraction only name = _('Re-stock & Price detection for pages with a SINGLE product') description = _('Detects if the product goes back to in-stock') del _ # Remove marker function processor_weight = 1 list_badge_text = "Restock" # _() class UnableToExtractRestockData(Exception): def __init__(self, status_code): # Set this so we can use it in other parts of the app self.status_code = status_code return class MoreThanOnePriceFound(Exception): def __init__(self): return def _search_prop_by_value(matches, value): for properties in matches: for prop in properties: if value in prop[0]: return prop[1] # Yield the desired value and exit the function def _deduplicate_prices(data): import re ''' Some price data has multiple entries, OR it has a single entry with ['$159', '159', 159, "$ 159"] or just "159" Get all the values, clean it and add it to a set then return the unique values ''' unique_data = set() # Return the complete 'datum' where its price was not seen before for datum in data: if isinstance(datum.value, list): # Process each item in the list normalized_value = set([float(re.sub(r'[^\d.]', '', str(item))) for item in datum.value if str(item).strip()]) unique_data.update(normalized_value) else: # Process single value v = float(re.sub(r'[^\d.]', '', str(datum.value))) unique_data.add(v) return list(unique_data) # ============================================================================= # MEMORY MANAGEMENT: Why We Use Multiprocessing (Linux Only) # ============================================================================= # # The get_itemprop_availability() function uses 'extruct' to parse HTML metadata # (JSON-LD, microdata, OpenGraph, etc). Extruct internally uses lxml, which wraps # libxml2 - a C library that allocates memory at the C level. # # Memory Leak Problem: # -------------------- # 1. lxml's document_fromstring() creates thousands of Python objects backed by # C-level allocations (nodes, attributes, text content) # 2. Python's garbage collector can mark these objects as collectible, but # cannot force the OS to reclaim the actual C-level memory # 3. malloc/free typically doesn't return memory to OS - it just marks it as # "free in the process address space" # 4. With repeated parsing of large HTML (5MB+ pages), memory accumulates even # after Python GC runs # # Why Multiprocessing Fixes This: # -------------------------------- # When a subprocess exits, the OS forcibly reclaims ALL memory including C-level # allocations that Python GC couldn't release. This ensures clean memory state # after each extraction. # # Performance Impact: # ------------------- # - Memray analysis showed 1.2M document_fromstring allocations per page # - Without subprocess: memory grows by ~50-500MB per parse and lingers # - With subprocess: ~35MB overhead but forces full cleanup after each run # - Trade-off: 35MB resource_tracker vs 500MB+ accumulated leak = much better at scale # # References: # ----------- # - lxml memory issues: https://medium.com/devopss-hole/python-lxml-memory-leak-b8d0b1000dc7 # - libxml2 caching behavior: https://www.mail-archive.com/lxml@python.org/msg00026.html # - GC limitations with C extensions: https://benbernardblog.com/tracking-down-a-freaky-python-memory-leak-part-2/ # # Additional Context: # ------------------- # - jsonpath_ng (used to query the parsed data) is pure Python and doesn't leak # - The leak is specifically from lxml's document parsing, not the JSONPath queries # - Linux-only because multiprocessing spawn is well-tested there; other platforms # use direct call as fallback # # Alternative Solution (Future Optimization): # ------------------------------------------- # This entire problem could be avoided by using regex to extract just the machine # data blocks (JSON-LD, microdata, OpenGraph tags) BEFORE parsing with lxml: # # 1. Use regex to extract blocks # 2. Use regex to extract tags # 3. Use regex to find itemprop/itemtype attributes and their containing elements # 4. Parse ONLY those extracted snippets instead of the entire HTML document # # Benefits: # - Avoids parsing 5MB of HTML when we only need a few KB of metadata # - Eliminates the lxml memory leak entirely # - Faster extraction (regex is much faster than DOM parsing) # - No subprocess overhead needed # # Trade-offs: # - Regex for HTML is brittle (comments, CDATA, edge cases) # - Microdata extraction would be complex (need to track element boundaries) # - Would need extensive testing to ensure we don't miss valid data # - extruct is battle-tested; regex solution would need similar maturity # # For now, the subprocess approach is safer and leverages existing extruct code. # ============================================================================= def _extract_itemprop_availability_worker(pipe_conn): """ Subprocess worker for itemprop extraction (Linux memory management). Uses spawn multiprocessing to isolate extruct/lxml memory allocations. When the subprocess exits, the OS reclaims ALL memory including lxml's C-level allocations that Python's GC cannot release. Args: pipe_conn: Pipe connection to receive HTML and send result """ import json import gc html_content = None result_data = None try: # Receive HTML as raw bytes (no pickle) html_bytes = pipe_conn.recv_bytes() html_content = html_bytes.decode('utf-8') # Explicitly delete html_bytes to free memory del html_bytes gc.collect() # Perform extraction in subprocess (uses extruct/lxml) result_data = get_itemprop_availability(html_content) # Convert Restock object to dict for JSON serialization result = { 'success': True, 'data': dict(result_data) if result_data else {} } pipe_conn.send_bytes(json.dumps(result).encode('utf-8')) # Clean up before exit del result_data, html_content, result gc.collect() except MoreThanOnePriceFound: # Serialize the specific exception type result = { 'success': False, 'exception_type': 'MoreThanOnePriceFound' } pipe_conn.send_bytes(json.dumps(result).encode('utf-8')) except Exception as e: # Serialize other exceptions result = { 'success': False, 'exception_type': type(e).__name__, 'exception_message': str(e) } pipe_conn.send_bytes(json.dumps(result).encode('utf-8')) finally: # Final cleanup before subprocess exits # Variables may already be deleted in try block, so use try/except try: del html_content except (NameError, UnboundLocalError): pass try: del result_data except (NameError, UnboundLocalError): pass gc.collect() pipe_conn.close() def extract_itemprop_availability_safe(html_content) -> Restock: """ Extract itemprop availability with hybrid approach for memory efficiency. Strategy (fastest to slowest, least to most memory): 1. Try pure Python extraction (JSON-LD, OpenGraph, microdata) - covers 80%+ of cases 2. Fall back to extruct with subprocess isolation on Linux for complex cases Args: html_content: HTML string to parse Returns: Restock: Extracted availability data Raises: MoreThanOnePriceFound: When multiple prices detected Other exceptions: From extruct/parsing """ import platform # Step 1: Try pure Python extraction first (fast, no lxml, no memory leak) try: from .pure_python_extractor import extract_metadata_pure_python, query_price_availability logger.trace("Attempting pure Python metadata extraction (no lxml)") extracted_data = extract_metadata_pure_python(html_content) price_data = query_price_availability(extracted_data) # If we got price AND availability, we're done! if price_data.get('price') and price_data.get('availability'): result = Restock(price_data) logger.debug(f"Pure Python extraction successful: {dict(result)}") return result # If we got some data but not everything, still try extruct for completeness if price_data.get('price') or price_data.get('availability'): logger.debug(f"Pure Python extraction partial: {price_data}, will try extruct for completeness") except Exception as e: logger.debug(f"Pure Python extraction failed: {e}, falling back to extruct") # Step 2: Fall back to extruct (uses lxml, needs subprocess on Linux) logger.trace("Falling back to extruct (lxml-based) with subprocess isolation") # Only use subprocess isolation on Linux # Other platforms may have issues with spawn or don't need the aggressive memory management if platform.system() == 'Linux': import multiprocessing import json import gc try: ctx = multiprocessing.get_context('spawn') parent_conn, child_conn = ctx.Pipe() p = ctx.Process(target=_extract_itemprop_availability_worker, args=(child_conn,)) p.start() # Send HTML as raw bytes (no pickle) html_bytes = html_content.encode('utf-8') parent_conn.send_bytes(html_bytes) # Explicitly delete html_bytes copy immediately after sending del html_bytes gc.collect() # Receive result as JSON result_bytes = parent_conn.recv_bytes() result = json.loads(result_bytes.decode('utf-8')) # Wait for subprocess to complete p.join() # Close pipes parent_conn.close() child_conn.close() # Clean up all subprocess-related objects del p, parent_conn, child_conn, result_bytes gc.collect() # Handle result or re-raise exception if result['success']: # Reconstruct Restock object from dict restock_obj = Restock(result['data']) # Clean up result dict del result gc.collect() return restock_obj else: # Re-raise the exception that occurred in subprocess exception_type = result['exception_type'] exception_msg = result.get('exception_message', '') del result gc.collect() if exception_type == 'MoreThanOnePriceFound': raise MoreThanOnePriceFound() else: raise Exception(f"{exception_type}: {exception_msg}") except Exception as e: # If multiprocessing itself fails, log and fall back to direct call logger.warning(f"Subprocess extraction failed: {e}, falling back to direct call") gc.collect() return get_itemprop_availability(html_content) else: # Non-Linux: direct call (no subprocess overhead needed) return get_itemprop_availability(html_content) # should return Restock() # add casting? def get_itemprop_availability(html_content) -> Restock: """ Kind of funny/cool way to find price/availability in one many different possibilities. Use 'extruct' to find any possible RDFa/microdata/json-ld data, make a JSON string from the output then search it. """ from jsonpath_ng import parse import re now = time.time() import extruct logger.trace(f"Imported extruct module in {time.time() - now:.3f}s") now = time.time() # Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest. syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph'] try: data = extruct.extract(html_content, syntaxes=syntaxes) except Exception as e: logger.warning(f"Unable to extract data, document parsing with extruct failed with {type(e).__name__} - {str(e)}") return Restock() logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s") # First phase, dead simple scanning of anything that looks useful value = Restock() if data: logger.debug("Using jsonpath to find price/availability/etc") price_parse = parse('$..(price|Price)') pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )') availability_parse = parse('$..(availability|Availability)') price_result = _deduplicate_prices(price_parse.find(data)) if price_result: # Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and # parse that for the UI? if len(price_result) > 1 and len(price_result) > 1: # See of all prices are different, in the case that one product has many embedded data types with the same price # One might have $121.95 and another 121.95 etc logger.warning(f"More than one price found {price_result}, throwing exception, cant use this plugin.") raise MoreThanOnePriceFound() value['price'] = price_result[0] pricecurrency_result = pricecurrency_parse.find(data) if pricecurrency_result: value['currency'] = pricecurrency_result[0].value availability_result = availability_parse.find(data) if availability_result: value['availability'] = availability_result[0].value if value.get('availability'): value['availability'] = re.sub(r'(?i)^(https|http)://schema.org/', '', value.get('availability').strip(' "\'').lower()) if value.get('availability') else None # Second, go dig OpenGraph which is something that jsonpath_ng cant do because of the tuples and double-dots (:) if not value.get('price') or value.get('availability'): logger.debug("Alternatively digging through OpenGraph properties for restock/price info..") jsonpath_expr = parse('$..properties') for match in jsonpath_expr.find(data): if not value.get('price'): value['price'] = _search_prop_by_value([match.value], "price:amount") if not value.get('availability'): value['availability'] = _search_prop_by_value([match.value], "product:availability") if not value.get('currency'): value['currency'] = _search_prop_by_value([match.value], "price:currency") logger.trace(f"Processed with Extruct in {time.time()-now:.3f}s") return value def is_between(number, lower=None, upper=None): """ Check if a number is between two values. Parameters: number (float): The number to check. lower (float or None): The lower bound (inclusive). If None, no lower bound. upper (float or None): The upper bound (inclusive). If None, no upper bound. Returns: bool: True if the number is between the lower and upper bounds, False otherwise. """ return (lower is None or lower <= number) and (upper is None or number <= upper) class perform_site_check(difference_detection_processor): screenshot = None xpath_data = None def run_changedetection(self, watch, force_reprocess=False): import hashlib if not watch: raise Exception("Watch no longer exists.") current_raw_document_checksum = self.get_raw_document_checksum() # Skip processing only if BOTH conditions are true: # 1. HTML content unchanged (checksum matches last saved checksum) # 2. Watch configuration was not edited (including trigger_text, filters, etc.) # The was_edited flag handles all watch configuration changes, so we don't need # separate checks for trigger_text or other processing rules. if (not force_reprocess and not watch.was_edited and self.last_raw_content_checksum and self.last_raw_content_checksum == current_raw_document_checksum): raise checksumFromPreviousCheckWasTheSame() # Unset any existing notification error update_obj = {'last_notification_error': False, 'last_error': False, 'restock': Restock()} self.screenshot = self.fetcher.screenshot self.xpath_data = self.fetcher.xpath_data # Track the content type (readonly field, doesn't trigger was_edited) update_obj['content-type'] = self.fetcher.headers.get('Content-Type', '') # Use hyphen (matches OpenAPI spec) update_obj["last_check_status"] = self.fetcher.get_last_status_code() # Save the raw content checksum to file (processor implementation detail, not watch config) self.update_last_raw_content_checksum(current_raw_document_checksum) # Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly. # Otherwise it will assume "in stock" because nothing suggesting the opposite was found #useless # from ...html_tools import html_to_text # text = html_to_text(self.fetcher.content) # logger.debug(f"Length of text after conversion: {len(text)}") # if not len(text): # from ...content_fetchers.exceptions import ReplyWithContentButNoText # raise ReplyWithContentButNoText(url=watch.link, # status_code=self.fetcher.get_last_status_code(), # screenshot=self.fetcher.screenshot, # html_content=self.fetcher.content, # xpath_data=self.fetcher.xpath_data # ) # Which restock settings to compare against? # Settings are stored in restock_diff.json (migrated from watch.json by update_30). _extra_config = self.get_extra_watch_config('restock_diff.json') restock_settings = _extra_config.get('restock_diff') or { 'follow_price_changes': True, 'in_stock_processing': 'in_stock_only', } # See if any tags have 'activate for individual watches in this tag/group?' enabled and use the first we find for tag_uuid in watch.get('tags'): tag = self.datastore.data['settings']['application']['tags'].get(tag_uuid, {}) if tag.get('overrides_watch'): restock_settings = tag.get('processor_config_restock_diff') or {} logger.info(f"Watch {watch.get('uuid')} - Tag '{tag.get('title')}' selected for restock settings override") break itemprop_availability = {} multiple_prices_found = False # Try built-in extraction first, this will scan metadata in the HTML # On Linux, this runs in a subprocess to prevent lxml/extruct memory leaks try: itemprop_availability = extract_itemprop_availability_safe(self.fetcher.content) except MoreThanOnePriceFound as e: # Don't raise immediately - let plugins try to handle this case # Plugins might be able to determine which price is correct logger.warning(f"Built-in detection found multiple prices on {watch.get('url')}, will try plugin override") multiple_prices_found = True itemprop_availability = {} # If built-in extraction didn't get both price AND availability, try plugin override # Only check plugin if this watch is using a fetcher that might provide better data has_price = itemprop_availability.get('price') is not None has_availability = itemprop_availability.get('availability') is not None # @TODO !!! some setting like "Use as fallback" or "always use", "t if not (has_price and has_availability) or True: from changedetectionio.pluggy_interface import get_itemprop_availability_from_plugin fetcher_name = watch.get('fetch_backend', 'html_requests') # Resolve 'system' to the actual fetcher being used # This allows plugins to work even when watch uses "system settings default" if fetcher_name == 'system': # Get the actual fetcher that was used (from self.fetcher) # Fetcher class name gives us the actual backend (e.g., 'html_requests', 'html_webdriver') actual_fetcher = type(self.fetcher).__name__ if 'html_requests' in actual_fetcher.lower(): fetcher_name = 'html_requests' elif 'webdriver' in actual_fetcher.lower() or 'playwright' in actual_fetcher.lower(): fetcher_name = 'html_webdriver' logger.debug(f"Resolved 'system' fetcher to actual fetcher: {fetcher_name}") # Try plugin override - plugins can decide if they support this fetcher if fetcher_name: logger.debug(f"Calling extra plugins for getting item price/availability (fetcher: {fetcher_name})") plugin_availability = get_itemprop_availability_from_plugin(self.fetcher.content, fetcher_name, self.fetcher, watch.link) if plugin_availability: # Plugin provided better data, use it plugin_has_price = plugin_availability.get('price') is not None plugin_has_availability = plugin_availability.get('availability') is not None # Only use plugin data if it's actually better than what we have if plugin_has_price or plugin_has_availability: itemprop_availability = plugin_availability logger.info(f"Using plugin-provided availability data for fetcher '{fetcher_name}' (built-in had price={has_price}, availability={has_availability}; plugin has price={plugin_has_price}, availability={plugin_has_availability})") if not plugin_availability: logger.debug("No item price/availability from plugins") # If we had multiple prices and plugins also failed, NOW raise the exception if multiple_prices_found and not itemprop_availability.get('price'): raise ProcessorException( message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.", url=watch.get('url'), status_code=self.fetcher.get_last_status_code(), screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data ) # Something valid in get_itemprop_availability() by scraping metadata ? if itemprop_availability.get('price') or itemprop_availability.get('availability'): # Store for other usage update_obj['restock'] = itemprop_availability if itemprop_availability.get('availability'): # @todo: Configurable? if any(substring.lower() in itemprop_availability['availability'].lower() for substring in [ 'instock', 'instoreonly', 'limitedavailability', 'onlineonly', 'presale'] ): update_obj['restock']['in_stock'] = True else: update_obj['restock']['in_stock'] = False # Main detection method fetched_md5 = None # store original price if not set if itemprop_availability and itemprop_availability.get('price') and not itemprop_availability.get('original_price'): itemprop_availability['original_price'] = itemprop_availability.get('price') update_obj['restock']["original_price"] = itemprop_availability.get('price') if not self.fetcher.instock_data and not itemprop_availability.get('availability') and not itemprop_availability.get('price'): raise ProcessorException( message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.", url=watch.get('url'), status_code=self.fetcher.get_last_status_code(), screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data ) logger.debug(f"self.fetcher.instock_data is - '{self.fetcher.instock_data}' and itemprop_availability.get('availability') is {itemprop_availability.get('availability')}") # Nothing automatic in microdata found, revert to scraping the page if self.fetcher.instock_data and itemprop_availability.get('availability') is None: # 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold. # Careful! this does not really come from chrome/js when the watch is set to plaintext update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned instock_data - '{self.fetcher.instock_data}' from JS scraper.") # Very often websites will lie about the 'availability' in the metadata, so if the scraped version says its NOT in stock, use that. if self.fetcher.instock_data and self.fetcher.instock_data != 'Possibly in stock': if update_obj['restock'].get('in_stock'): logger.warning( f"Lie detected in the availability machine data!! when scraping said its not in stock!! itemprop was '{itemprop_availability}' and scraped from browser was '{self.fetcher.instock_data}' update obj was {update_obj['restock']} ") logger.warning(f"Setting instock to FALSE, scraper found '{self.fetcher.instock_data}' in the body but metadata reported not-in-stock") update_obj['restock']["in_stock"] = False # What we store in the snapshot price = update_obj.get('restock').get('price') if update_obj.get('restock').get('price') else "" snapshot_content = f"In Stock: {update_obj.get('restock').get('in_stock')} - Price: {price}" # Main detection method fetched_md5 = hashlib.md5(snapshot_content.encode('utf-8')).hexdigest() # The main thing that all this at the moment comes down to :) changed_detected = False logger.debug(f"Watch UUID {watch.get('uuid')} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}") # out of stock -> back in stock only? if watch.get('restock') and watch['restock'].get('in_stock') != update_obj['restock'].get('in_stock'): # Yes if we only care about it going to instock, AND we are in stock if restock_settings.get('in_stock_processing') == 'in_stock_only' and update_obj['restock']['in_stock']: changed_detected = True if restock_settings.get('in_stock_processing') == 'all_changes': # All cases changed_detected = True if restock_settings.get('follow_price_changes') and watch.get('restock') and update_obj.get('restock') and update_obj['restock'].get('price'): price = float(update_obj['restock'].get('price')) # Default to current price if no previous price found if watch['restock'].get('original_price'): previous_price = float(watch['restock'].get('original_price')) # It was different, but negate it further down if price != previous_price: changed_detected = True # Minimum/maximum price limit if update_obj.get('restock') and update_obj['restock'].get('price'): logger.debug( f"{watch.get('uuid')} - Change was detected, 'price_change_max' is '{restock_settings.get('price_change_max', '')}' 'price_change_min' is '{restock_settings.get('price_change_min', '')}', price from website is '{update_obj['restock'].get('price', '')}'.") if update_obj['restock'].get('price'): min_limit = float(restock_settings.get('price_change_min')) if restock_settings.get('price_change_min') else None max_limit = float(restock_settings.get('price_change_max')) if restock_settings.get('price_change_max') else None price = float(update_obj['restock'].get('price')) logger.debug(f"{watch.get('uuid')} after float conversion - Min limit: '{min_limit}' Max limit: '{max_limit}' Price: '{price}'") if min_limit or max_limit: if is_between(number=price, lower=min_limit, upper=max_limit): # Price was between min/max limit, so there was nothing todo in any case logger.trace(f"{watch.get('uuid')} {price} is between {min_limit} and {max_limit}, nothing to check, forcing changed_detected = False (was {changed_detected})") changed_detected = False else: logger.trace(f"{watch.get('uuid')} {price} is between {min_limit} and {max_limit}, continuing normal comparison") # Price comparison by % if watch['restock'].get('original_price') and changed_detected and restock_settings.get('price_change_threshold_percent'): previous_price = float(watch['restock'].get('original_price')) pc = float(restock_settings.get('price_change_threshold_percent')) change = abs((price - previous_price) / previous_price * 100) if change and change <= pc: logger.debug(f"{watch.get('uuid')} Override change-detected to FALSE because % threshold ({pc}%) was {change:.3f}%") changed_detected = False else: logger.debug(f"{watch.get('uuid')} Price change was {change:.3f}% , (threshold {pc}%)") # Always record the new checksum update_obj["previous_md5"] = fetched_md5 return changed_detected, update_obj, snapshot_content.strip() ================================================ FILE: changedetectionio/processors/restock_diff/pure_python_extractor.py ================================================ """ Pure Python metadata extractor - no lxml, no memory leaks. This module provides a fast, memory-efficient alternative to extruct for common e-commerce metadata extraction. It handles: - JSON-LD (covers 80%+ of modern sites) - OpenGraph meta tags - Basic microdata attributes Uses Python's built-in html.parser instead of lxml/libxml2, avoiding C-level memory allocation issues. For edge cases, the main processor can fall back to extruct (with subprocess isolation on Linux). """ from html.parser import HTMLParser import json import re from loguru import logger class JSONLDExtractor(HTMLParser): """ Extract JSON-LD structured data from HTML. Finds all `); } else { // Fallback: trigger download if popup is blocked const a = document.createElement("a"); a.href = jpegDataUrl; a.download = "changedetection-diff-" + Date.now() + ".jpg"; a.click(); } } /** * Update button UI state */ function setButtonState(button, isLoading, originalHtml = '') { if (!button) return; if (isLoading) { button.innerHTML = 'Generating...'; button.style.opacity = "0.5"; button.style.pointerEvents = "none"; } else { button.innerHTML = originalHtml; button.style.opacity = "1"; button.style.pointerEvents = "auto"; } } /** * Main function: Convert selected diff text to a shareable JPEG image * * Features: * - Expands partial selections to full lines * - Preserves all diff highlighting and formatting * - Adds metadata footer with URL and version info * - Embeds EXIF metadata in the JPEG * - Opens in new window or downloads if popup blocked */ async function diffToJpeg() { // Validate dependencies if (typeof html2canvas === 'undefined') { alert("html2canvas library is not loaded yet. Please wait a moment and try again."); return; } // Validate selection const selection = window.getSelection(); if (!selection || selection.rangeCount === 0 || selection.isCollapsed) { alert("Please select the text/lines you want to capture first by highlighting with your mouse."); return; } const originalRange = selection.getRangeAt(0); const differenceElement = document.getElementById("difference"); if (!differenceElement || !differenceElement.contains(originalRange.commonAncestorContainer)) { alert("Please select text within the diff content."); return; } // Setup UI state const btn = document.getElementById("share-as-image-btn"); const originalBtnHtml = btn ? btn.innerHTML : ''; setButtonState(btn, true); let tempElement = null; try { // Expand selection to full lines and clone content const expandedRange = expandRangeToFullLines(originalRange, differenceElement); const selectedFragment = expandedRange.cloneContents(); // Count lines for footer const selectedLines = countLines(selectedFragment); const totalLines = countLines(differenceElement); // Create temporary element with proper styling tempElement = createCaptureElement(selectedFragment, differenceElement); // Append footer to innerWrapper (inside the border), not outerWrapper tempElement._innerWrapper.appendChild(createFooter(selectedLines, totalLines)); // Add to DOM for rendering document.body.appendChild(tempElement); // Wait for rendering await new Promise(resolve => setTimeout(resolve, RENDER_DELAY_MS)); // Capture to canvas const canvas = await html2canvas(tempElement, { scale: CANVAS_SCALE, useCORS: true, allowTaint: true, logging: false, backgroundColor: '#ffffff', scrollX: 0, scrollY: 0 }); // Validate canvas if (canvas.width === 0 || canvas.height === 0) { throw new Error("Canvas is empty - no content captured"); } // Convert to JPEG let jpeg = canvas.toDataURL("image/jpeg", JPEG_QUALITY); if (jpeg === "data:," || jpeg.length < 100) { throw new Error("Failed to generate image data"); } // Add EXIF metadata jpeg = addExifMetadata(jpeg); // Display the image displayImage(jpeg); // Clear selection selection.removeAllRanges(); } catch (error) { console.error("Error generating image:", error); alert("Failed to generate image: " + error.message); } finally { // Cleanup if (tempElement && tempElement.parentNode) { tempElement.parentNode.removeChild(tempElement); } setButtonState(btn, false, originalBtnHtml); } } ================================================ FILE: changedetectionio/static/js/stepper.js ================================================ $(document).ready(function(){ checkUserVal(); $('#fetch_backend input').on('change', checkUserVal); }); var checkUserVal = function(){ if($('#fetch_backend input:checked').val()=='html_requests') { $('#request-override').show(); $('#webdriver-stepper').hide(); } else { $('#request-override').hide(); $('#webdriver-stepper').show(); } }; $('a.row-options').on('click', function(){ var row=$(this.closest('tr')); switch($(this).data("action")) { case 'remove': $(row).remove(); break; case 'add': var new_row=$(row).clone(true).insertAfter($(row)); $('input', new_new).val(""); break; case 'add': var new_row=$(row).clone(true).insertAfter($(row)); $('input', new_new).val(""); break; case 'resend-step': break; } }); ================================================ FILE: changedetectionio/static/js/tabs.js ================================================ // Rewrite this is a plugin.. is all this JS really 'worth it?' window.addEventListener('hashchange', function () { // Only remove active from tab elements, not menu items var tabs = document.querySelectorAll('.tabs li.active'); tabs.forEach(function(tab) { tab.classList.remove('active'); }); document.body.classList.remove('full-width'); set_active_tab(); }, false); var has_errors = document.querySelectorAll(".messages .error"); if (!has_errors.length) { if (document.location.hash == "") { location.replace(document.querySelector(".tabs ul li:first-child a").hash); } else { set_active_tab(); } } else { focus_error_tab(); } function set_active_tab() { document.body.classList.remove('full-width'); var tab = document.querySelectorAll(".tabs a[href='" + location.hash + "']"); if (tab.length) { tab[0].parentElement.classList.add("active"); } } function focus_error_tab() { // time to use jquery or vuejs really, // activate the tab with the error var tabs = document.querySelectorAll('.tabs li a'), i; for (i = 0; i < tabs.length; ++i) { var tab_name = tabs[i].hash.replace('#', ''); var pane_errors = document.querySelectorAll('#' + tab_name + ' .error') if (pane_errors.length) { document.location.hash = '#' + tab_name; return true; } } return false; } ================================================ FILE: changedetectionio/static/js/toast.js ================================================ /** * Toast - Modern toast notification system * Inspired by Toastify, Notyf, and React Hot Toast * * Usage: * Toast.success('Operation completed!'); * Toast.error('Something went wrong'); * Toast.info('Here is some information'); * Toast.warning('Warning message'); * Toast.show('Custom message', { type: 'success', duration: 3000 }); * * License: MIT */ (function(window) { 'use strict'; // Toast configuration const defaultConfig = { duration: 5000, // Auto-dismiss after 5 seconds (0 = no auto-dismiss) position: 'top-center', // top-right, top-center, top-left, bottom-right, bottom-center, bottom-left closeButton: true, // Show close button progressBar: true, // Show progress bar pauseOnHover: true, // Pause auto-dismiss on hover maxToasts: 5, // Maximum toasts to show at once offset: '20px', // Offset from edge zIndex: 10000, // Z-index for toast container }; let config = { ...defaultConfig }; let toastCount = 0; let container = null; /** * Initialize toast system with custom config */ function init(userConfig = {}) { config = { ...defaultConfig, ...userConfig }; createContainer(); } /** * Create toast container if it doesn't exist */ function createContainer() { if (container) return; container = document.createElement('div'); container.className = `toast-container toast-${config.position}`; container.style.zIndex = config.zIndex; document.body.appendChild(container); } /** * Show a toast notification */ function show(message, options = {}) { createContainer(); const toast = createToastElement(message, options); // Limit number of toasts const existingToasts = container.querySelectorAll('.toast'); if (existingToasts.length >= config.maxToasts) { removeToast(existingToasts[0]); } // Add to container container.appendChild(toast); // Trigger animation requestAnimationFrame(() => { toast.classList.add('toast-show'); }); // Auto-dismiss if (options.duration !== 0 && (options.duration || config.duration) > 0) { setupAutoDismiss(toast, options.duration || config.duration); } return { dismiss: () => removeToast(toast) }; } /** * Create toast DOM element */ function createToastElement(message, options) { const toast = document.createElement('div'); toast.className = `toast toast-${options.type || 'default'}`; toast.setAttribute('role', 'alert'); toast.setAttribute('aria-live', 'polite'); // Icon const icon = createIcon(options.type || 'default'); if (icon) { toast.appendChild(icon); } // Message const messageEl = document.createElement('div'); messageEl.className = 'toast-message'; messageEl.textContent = message; toast.appendChild(messageEl); // Close button if (options.closeButton !== false && config.closeButton) { const closeBtn = document.createElement('button'); closeBtn.className = 'toast-close'; closeBtn.innerHTML = '×'; closeBtn.setAttribute('aria-label', 'Close'); closeBtn.onclick = () => removeToast(toast); toast.appendChild(closeBtn); } // Progress bar if (options.progressBar !== false && config.progressBar && (options.duration || config.duration) > 0) { const progressBar = document.createElement('div'); progressBar.className = 'toast-progress'; toast.appendChild(progressBar); toast._progressBar = progressBar; } return toast; } /** * Create icon based on toast type */ function createIcon(type) { const iconEl = document.createElement('div'); iconEl.className = 'toast-icon'; const svg = document.createElementNS('http://www.w3.org/2000/svg', 'svg'); svg.setAttribute('viewBox', '0 0 24 24'); svg.setAttribute('fill', 'none'); svg.setAttribute('stroke', 'currentColor'); svg.setAttribute('stroke-width', '2'); let path = ''; switch (type) { case 'success': path = 'M20 6L9 17l-5-5'; break; case 'error': path = 'M18 6L6 18M6 6l12 12'; break; case 'warning': path = 'M12 9v4m0 4h.01M12 2a10 10 0 100 20 10 10 0 000-20z'; svg.setAttribute('stroke-width', '1.5'); break; case 'info': path = 'M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z'; svg.setAttribute('stroke-width', '1.5'); break; default: return null; } const pathEl = document.createElementNS('http://www.w3.org/2000/svg', 'path'); pathEl.setAttribute('d', path); pathEl.setAttribute('stroke-linecap', 'round'); pathEl.setAttribute('stroke-linejoin', 'round'); svg.appendChild(pathEl); iconEl.appendChild(svg); return iconEl; } /** * Setup auto-dismiss with progress bar */ function setupAutoDismiss(toast, duration) { let startTime = Date.now(); let remainingTime = duration; let isPaused = false; let animationFrame; function updateProgress() { if (isPaused) return; const elapsed = Date.now() - startTime; const progress = Math.min(elapsed / duration, 1); if (toast._progressBar) { toast._progressBar.style.transform = `scaleX(${1 - progress})`; } if (progress >= 1) { removeToast(toast); } else { animationFrame = requestAnimationFrame(updateProgress); } } // Pause on hover if (config.pauseOnHover) { toast.addEventListener('mouseenter', () => { isPaused = true; remainingTime = duration - (Date.now() - startTime); cancelAnimationFrame(animationFrame); }); toast.addEventListener('mouseleave', () => { isPaused = false; startTime = Date.now(); duration = remainingTime; animationFrame = requestAnimationFrame(updateProgress); }); } animationFrame = requestAnimationFrame(updateProgress); } /** * Remove toast with animation */ function removeToast(toast) { if (!toast || !toast.parentElement) return; toast.classList.add('toast-hide'); // Remove after animation setTimeout(() => { if (toast.parentElement) { toast.parentElement.removeChild(toast); } }, 300); } // Convenience methods function success(message, options = {}) { return show(message, { ...options, type: 'success' }); } function error(message, options = {}) { return show(message, { ...options, type: 'error' }); } function warning(message, options = {}) { return show(message, { ...options, type: 'warning' }); } function info(message, options = {}) { return show(message, { ...options, type: 'info' }); } /** * Clear all toasts */ function clear() { if (!container) return; const toasts = container.querySelectorAll('.toast'); toasts.forEach(removeToast); } // Public API window.Toast = { init, show, success, error, warning, info, clear, version: '1.0.0' }; // Auto-initialize document.addEventListener('DOMContentLoaded', () => { init(); }); })(window); ================================================ FILE: changedetectionio/static/js/toggle-theme.js ================================================ /** * @file * Toggles theme between light and dark mode. */ $(document).ready(function () { $(".toggle-light-mode").on("click", function () { const isDark = $("html").attr("data-darkmode") === "true"; $("html").attr("data-darkmode", !isDark); setCookieValue(!isDark); }); const setCookieValue = (value) => { document.cookie = `css_dark_mode=${value};max-age=31536000;path=/` } // Search input box behaviour const toggle_search = document.getElementById("toggle-search"); const search_q = document.getElementById("search-q"); if(search_q) { window.addEventListener('keydown', function (e) { if (e.altKey == true && e.keyCode == 83) { search_q.classList.toggle('expanded'); search_q.focus(); } }); search_q.onkeydown = (e) => { var key = e.keyCode || e.which; if (key === 13) { document.searchForm.submit(); } }; toggle_search.onclick = () => { // Could be that they want to search something once text is in there if (search_q.value.length) { document.searchForm.submit(); } else { // If not.. search_q.classList.toggle('expanded'); search_q.focus(); } }; } $('#heart-us').click(function () { $("#overlay").toggleClass('visible'); heartpath.style.fill = document.getElementById("overlay").classList.contains("visible") ? '#ff0000' : 'var(--color-background)'; }); setInterval(function () { $('body').toggleClass('spinner-active', $.active > 0); }, 2000); }); ================================================ FILE: changedetectionio/static/js/vis.js ================================================ $(document).ready(function () { // Lazy Hide/Show elements mechanism $('[data-visible-for]').hide(); function show_related_elem(e) { var n = $(e).attr('name') + "=" + $(e).val(); if (n === 'fetch_backend=system') { n = "fetch_backend=" + default_system_fetch_backend; } $(`[data-visible-for~="${n}"]`).show(); } $(':radio').on('keyup keypress blur change click', function (e) { $(`[data-visible-for]`).hide(); $('.advanced-options').hide(); show_related_elem(this); }); $(':radio:checked').each(function (e) { show_related_elem(this); }) // Show advanced $('.show-advanced').click(function (e) { $(this).closest('.tab-pane-inner').find('.advanced-options').each(function (e) { $(this).toggle(); }) }); }); ================================================ FILE: changedetectionio/static/js/visual-selector.js ================================================ // Copyright (C) 2021 Leigh Morresi (dgtlmoon@gmail.com) // All rights reserved. // yes - this is really a hack, if you are a front-ender and want to help, please get in touch! let runInClearMode = false; $(document).ready(() => { let currentSelections = []; let currentSelection = null; let appendToList = false; let c, xctx, ctx; let xScale = 1, yScale = 1; let selectorImage, selectorImageRect, selectorData; let elementHandlers = {}; // Store references to element selection handlers (needed for draw mode toggling) // Box drawing mode variables (for image_ssim_diff processor) let drawMode = false; let isDrawing = false; let isDragging = false; let drawStartX, drawStartY; let dragOffsetX, dragOffsetY; let drawnBox = null; let resizeHandle = null; const HANDLE_SIZE = 8; const isImageProcessor = $('input[value="image_ssim_diff"]').is(':checked'); // Global jQuery selectors with "Elem" appended const $selectorCanvasElem = $('#selector-canvas'); const $includeFiltersElem = $("#include_filters"); const $selectorBackgroundElem = $("img#selector-background"); const $selectorCurrentXpathElem = $("#selector-current-xpath span"); const $fetchingUpdateNoticeElem = $('.fetching-update-notice'); const $selectorWrapperElem = $("#selector-wrapper"); // Color constants const FILL_STYLE_HIGHLIGHT = 'rgba(205,0,0,0.35)'; const FILL_STYLE_GREYED_OUT = 'rgba(205,205,205,0.95)'; const STROKE_STYLE_HIGHLIGHT = 'rgba(255,0,0, 0.9)'; const FILL_STYLE_REDLINE = 'rgba(255,0,0, 0.1)'; const STROKE_STYLE_REDLINE = 'rgba(225,0,0,0.9)'; $('#visualselector-tab').click(() => { $selectorBackgroundElem.off('load'); currentSelections = []; bootstrapVisualSelector(); }); function clearReset() { ctx.clearRect(0, 0, c.width, c.height); if ($includeFiltersElem.val().length) { alert("Existing filters under the 'Filters & Triggers' tab were cleared."); } $includeFiltersElem.val(''); currentSelections = []; // Means we ignore the xpaths from the scraper marked as sel.highlight_as_custom_filter (it matched a previous selector) runInClearMode = true; highlightCurrentSelected(); } function splitToList(v) { return v.split('\n').map(line => line.trim()).filter(line => line.length > 0); } function sortScrapedElementsBySize() { // Sort the currentSelections array by area (width * height) in descending order selectorData['size_pos'].sort((a, b) => { const areaA = a.width * a.height; const areaB = b.width * b.height; return areaB - areaA; }); } $(document).on('keydown keyup', (event) => { if (event.code === 'ShiftLeft' || event.code === 'ShiftRight') { appendToList = event.type === 'keydown'; } if (event.type === 'keydown') { if ($selectorBackgroundElem.is(":visible") && event.key === "Escape") { clearReset(); } } }); $('#clear-selector').on('click', () => { clearReset(); }); // So if they start switching between visualSelector and manual filters, stop it from rendering old filters $('li.tab a').on('click', () => { runInClearMode = true; }); if (!window.location.hash || window.location.hash !== '#visualselector') { $selectorBackgroundElem.attr('src', ''); return; } bootstrapVisualSelector(); function bootstrapVisualSelector() { $selectorBackgroundElem .on("error", () => { $fetchingUpdateNoticeElem.html("Ooops! The VisualSelector tool needs at least one fetched page, please unpause the watch and/or wait for the watch to complete fetching and then reload this page.") .css('color', '#bb0000'); $('#selector-current-xpath, #clear-selector').hide(); }) .on('load', () => { console.log("Loaded background..."); c = document.getElementById("selector-canvas"); xctx = c.getContext("2d"); ctx = c.getContext("2d"); fetchData(); $selectorCanvasElem.off("mousemove mousedown"); }) .attr("src", screenshot_url); let s = `${$selectorBackgroundElem.attr('src')}?${new Date().getTime()}`; $selectorBackgroundElem.attr('src', s); } function alertIfFilterNotFound() { let existingFilters = splitToList($includeFiltersElem.val()); let sizePosXpaths = selectorData['size_pos'].map(sel => sel.xpath); for (let filter of existingFilters) { if (!sizePosXpaths.includes(filter)) { alert(`One or more of your existing filters was not found and will be removed when a new filter is selected.`); break; } } } function fetchData() { $fetchingUpdateNoticeElem.html("Fetching element data.."); $.ajax({ url: watch_visual_selector_data_url, context: document.body }).done((data) => { $fetchingUpdateNoticeElem.html("Rendering.."); selectorData = data; sortScrapedElementsBySize(); console.log(`Reported browser width from backend: ${data['browser_width']}`); // Little sanity check for the user, alert them if something missing alertIfFilterNotFound(); setScale(); reflowSelector(); // Initialize draw mode after everything is set up initializeDrawMode(); $fetchingUpdateNoticeElem.fadeOut(); }); } function updateFiltersText() { // Assuming currentSelections is already defined and contains the selections let uniqueSelections = new Set(currentSelections.map(sel => (sel[0] === '/' ? `xpath:${sel.xpath}` : sel.xpath))); if (currentSelections.length > 0) { // Convert the Set back to an array and join with newline characters let textboxFilterText = Array.from(uniqueSelections).join("\n"); $includeFiltersElem.val(textboxFilterText); } } function setScale() { $selectorWrapperElem.show(); selectorImage = $selectorBackgroundElem[0]; selectorImageRect = selectorImage.getBoundingClientRect(); $selectorCanvasElem.attr({ 'height': selectorImageRect.height, 'width': selectorImageRect.width }); $selectorWrapperElem.attr('width', selectorImageRect.width); $('#visual-selector-heading').css('max-width', selectorImageRect.width + "px") xScale = selectorImageRect.width / selectorImage.naturalWidth; yScale = selectorImageRect.height / selectorImage.naturalHeight; ctx.strokeStyle = STROKE_STYLE_HIGHLIGHT; ctx.fillStyle = FILL_STYLE_REDLINE; ctx.lineWidth = 3; console.log("Scaling set x: " + xScale + " by y:" + yScale); $("#selector-current-xpath").css('max-width', selectorImageRect.width); } function reflowSelector() { $(window).resize(() => { setScale(); highlightCurrentSelected(); }); setScale(); console.log(selectorData['size_pos'].length + " selectors found"); let existingFilters = splitToList($includeFiltersElem.val()); selectorData['size_pos'].forEach(sel => { if ((!runInClearMode && sel.highlight_as_custom_filter) || existingFilters.includes(sel.xpath)) { console.log("highlighting " + c); currentSelections.push(sel); } }); highlightCurrentSelected(); updateFiltersText(); // Store handler references for later use elementHandlers.handleMouseMove = handleMouseMove.debounce(5); elementHandlers.handleMouseDown = handleMouseDown.debounce(5); elementHandlers.handleMouseLeave = highlightCurrentSelected.debounce(5); $selectorCanvasElem.bind('mousemove', elementHandlers.handleMouseMove); $selectorCanvasElem.bind('mousedown', elementHandlers.handleMouseDown); $selectorCanvasElem.bind('mouseleave', elementHandlers.handleMouseLeave); function handleMouseMove(e) { if (!e.offsetX && !e.offsetY) { const targetOffset = $(e.target).offset(); e.offsetX = e.pageX - targetOffset.left; e.offsetY = e.pageY - targetOffset.top; } ctx.fillStyle = FILL_STYLE_HIGHLIGHT; selectorData['size_pos'].forEach(sel => { if (e.offsetY > sel.top * yScale && e.offsetY < sel.top * yScale + sel.height * yScale && e.offsetX > sel.left * yScale && e.offsetX < sel.left * yScale + sel.width * yScale) { setCurrentSelectedText(sel.xpath); drawHighlight(sel); currentSelections.push(sel); currentSelection = sel; highlightCurrentSelected(); currentSelections.pop(); } }) } function setCurrentSelectedText(s) { $selectorCurrentXpathElem[0].innerHTML = s; } function drawHighlight(sel) { ctx.strokeRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale); ctx.fillRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale); } function handleMouseDown() { // If we are in 'appendToList' mode, grow the list, if not, just 1 currentSelections = appendToList ? [...currentSelections, currentSelection] : [currentSelection]; highlightCurrentSelected(); updateFiltersText(); } } function highlightCurrentSelected() { xctx.fillStyle = FILL_STYLE_GREYED_OUT; xctx.strokeStyle = STROKE_STYLE_REDLINE; xctx.lineWidth = 3; xctx.clearRect(0, 0, c.width, c.height); currentSelections.forEach(sel => { //xctx.clearRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale); xctx.strokeRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale); }); } // ============= BOX DRAWING MODE (for image_ssim_diff processor) ============= function initializeDrawMode() { if (!isImageProcessor || !c) return; const $selectorModeRadios = $('input[name="selector-mode"]'); const $boundingBoxField = $('#bounding_box'); const $selectionModeField = $('#selection_mode'); // Load existing selection mode if present const savedMode = $selectionModeField.val(); if (savedMode && (savedMode === 'element' || savedMode === 'draw')) { $selectorModeRadios.filter(`[value="${savedMode}"]`).prop('checked', true); console.log('Loaded saved mode:', savedMode); } // Load existing bounding box if present const existingBox = $boundingBoxField.val(); if (existingBox) { try { const parts = existingBox.split(',').map(p => parseFloat(p)); if (parts.length === 4) { drawnBox = { x: parts[0] * xScale, y: parts[1] * yScale, width: parts[2] * xScale, height: parts[3] * yScale }; console.log('Loaded saved bounding box:', existingBox); } } catch (e) { console.error('Failed to parse existing bounding box:', e); } } // Update mode when radio changes $selectorModeRadios.off('change').on('change', function() { const newMode = $(this).val(); drawMode = newMode === 'draw'; console.log('Mode changed to:', newMode); // Save the mode to the hidden field $selectionModeField.val(newMode); if (drawMode) { enableDrawMode(); } else { disableDrawMode(); } }); // Set initial mode based on which radio is checked drawMode = $selectorModeRadios.filter(':checked').val() === 'draw'; console.log('Initial mode:', drawMode ? 'draw' : 'element'); // Save initial mode $selectionModeField.val(drawMode ? 'draw' : 'element'); if (drawMode) { enableDrawMode(); } } function enableDrawMode() { console.log('Enabling draw mode...'); // Unbind element selection handlers $selectorCanvasElem.unbind('mousemove mousedown mouseleave'); // Set cursor to crosshair $selectorCanvasElem.css('cursor', 'crosshair'); // Bind draw mode handlers $selectorCanvasElem.on('mousedown', handleDrawMouseDown); $selectorCanvasElem.on('mousemove', handleDrawMouseMove); $selectorCanvasElem.on('mouseup', handleDrawMouseUp); $selectorCanvasElem.on('mouseleave', handleDrawMouseUp); // Clear element selections and xpath display currentSelections = []; $includeFiltersElem.val(''); $selectorCurrentXpathElem.html('Draw mode - click and drag to select an area'); // Clear the canvas if (ctx && xctx) { ctx.clearRect(0, 0, c.width, c.height); xctx.clearRect(0, 0, c.width, c.height); } // Redraw if we have an existing box if (drawnBox) { drawBox(); } } function disableDrawMode() { console.log('Disabling draw mode, switching to element mode...'); // Unbind draw handlers $selectorCanvasElem.unbind('mousedown mousemove mouseup mouseleave'); // Reset cursor $selectorCanvasElem.css('cursor', 'default'); // Clear drawn box drawnBox = null; $('#bounding_box').val(''); // Clear the canvases if (ctx && xctx) { ctx.clearRect(0, 0, c.width, c.height); xctx.clearRect(0, 0, c.width, c.height); } // Restore element selections from include_filters currentSelections = []; if (selectorData && selectorData['size_pos']) { let existingFilters = splitToList($includeFiltersElem.val()); selectorData['size_pos'].forEach(sel => { if ((!runInClearMode && sel.highlight_as_custom_filter) || existingFilters.includes(sel.xpath)) { console.log("Restoring selection: " + sel.xpath); currentSelections.push(sel); } }); } // Re-enable element selection handlers using stored references if (elementHandlers.handleMouseMove) { $selectorCanvasElem.bind('mousemove', elementHandlers.handleMouseMove); $selectorCanvasElem.bind('mousedown', elementHandlers.handleMouseDown); $selectorCanvasElem.bind('mouseleave', elementHandlers.handleMouseLeave); } // Restore the element selection display $selectorCurrentXpathElem.html('Hover over elements to select'); // Highlight the restored selections highlightCurrentSelected(); } function handleDrawMouseDown(e) { const rect = c.getBoundingClientRect(); const x = e.clientX - rect.left; const y = e.clientY - rect.top; // Check if clicking on a resize handle if (drawnBox) { resizeHandle = getResizeHandle(x, y); if (resizeHandle) { isDrawing = true; drawStartX = x; drawStartY = y; return; } // Check if clicking inside the box (for dragging) if (isInsideBox(x, y)) { isDragging = true; dragOffsetX = x - drawnBox.x; dragOffsetY = y - drawnBox.y; $selectorCanvasElem.css('cursor', 'move'); return; } } // Start new box isDrawing = true; drawStartX = x; drawStartY = y; drawnBox = { x: x, y: y, width: 0, height: 0 }; } function handleDrawMouseMove(e) { const rect = c.getBoundingClientRect(); const x = e.clientX - rect.left; const y = e.clientY - rect.top; // Update cursor based on position if (!isDrawing && !isDragging && drawnBox) { const handle = getResizeHandle(x, y); if (handle) { $selectorCanvasElem.css('cursor', getHandleCursor(handle)); } else if (isInsideBox(x, y)) { $selectorCanvasElem.css('cursor', 'move'); } else { $selectorCanvasElem.css('cursor', 'crosshair'); } } // Handle dragging the box if (isDragging) { drawnBox.x = x - dragOffsetX; drawnBox.y = y - dragOffsetY; drawBox(); return; } if (!isDrawing) return; if (resizeHandle) { // Resize existing box resizeBox(x, y); } else { // Draw new box drawnBox.width = x - drawStartX; drawnBox.height = y - drawStartY; } drawBox(); } function handleDrawMouseUp(e) { if (!isDrawing && !isDragging) return; isDrawing = false; isDragging = false; resizeHandle = null; if (drawnBox) { // Normalize box (handle negative dimensions) if (drawnBox.width < 0) { drawnBox.x += drawnBox.width; drawnBox.width = Math.abs(drawnBox.width); } if (drawnBox.height < 0) { drawnBox.y += drawnBox.height; drawnBox.height = Math.abs(drawnBox.height); } // Constrain to canvas bounds drawnBox.x = Math.max(0, Math.min(drawnBox.x, c.width - drawnBox.width)); drawnBox.y = Math.max(0, Math.min(drawnBox.y, c.height - drawnBox.height)); // Save to form field (convert from scaled to natural coordinates) const naturalX = Math.round(drawnBox.x / xScale); const naturalY = Math.round(drawnBox.y / yScale); const naturalWidth = Math.round(drawnBox.width / xScale); const naturalHeight = Math.round(drawnBox.height / yScale); $('#bounding_box').val(`${naturalX},${naturalY},${naturalWidth},${naturalHeight}`); drawBox(); } } function drawBox() { if (!drawnBox) return; // Clear and redraw ctx.clearRect(0, 0, c.width, c.height); xctx.clearRect(0, 0, c.width, c.height); // Draw box ctx.strokeStyle = STROKE_STYLE_REDLINE; ctx.fillStyle = FILL_STYLE_REDLINE; ctx.lineWidth = 3; const drawX = drawnBox.width >= 0 ? drawnBox.x : drawnBox.x + drawnBox.width; const drawY = drawnBox.height >= 0 ? drawnBox.y : drawnBox.y + drawnBox.height; const drawW = Math.abs(drawnBox.width); const drawH = Math.abs(drawnBox.height); ctx.strokeRect(drawX, drawY, drawW, drawH); ctx.fillRect(drawX, drawY, drawW, drawH); // Draw resize handles if (!isDrawing) { drawResizeHandles(drawX, drawY, drawW, drawH); } } function drawResizeHandles(x, y, w, h) { ctx.fillStyle = '#fff'; ctx.strokeStyle = '#000'; ctx.lineWidth = 1; const handles = [ { x: x, y: y }, // top-left { x: x + w, y: y }, // top-right { x: x, y: y + h }, // bottom-left { x: x + w, y: y + h } // bottom-right ]; handles.forEach(handle => { ctx.fillRect(handle.x - HANDLE_SIZE/2, handle.y - HANDLE_SIZE/2, HANDLE_SIZE, HANDLE_SIZE); ctx.strokeRect(handle.x - HANDLE_SIZE/2, handle.y - HANDLE_SIZE/2, HANDLE_SIZE, HANDLE_SIZE); }); } function isInsideBox(x, y) { if (!drawnBox) return false; const drawX = drawnBox.width >= 0 ? drawnBox.x : drawnBox.x + drawnBox.width; const drawY = drawnBox.height >= 0 ? drawnBox.y : drawnBox.y + drawnBox.height; const drawW = Math.abs(drawnBox.width); const drawH = Math.abs(drawnBox.height); return x >= drawX && x <= drawX + drawW && y >= drawY && y <= drawY + drawH; } function getResizeHandle(x, y) { if (!drawnBox) return null; const drawX = drawnBox.width >= 0 ? drawnBox.x : drawnBox.x + drawnBox.width; const drawY = drawnBox.height >= 0 ? drawnBox.y : drawnBox.y + drawnBox.height; const drawW = Math.abs(drawnBox.width); const drawH = Math.abs(drawnBox.height); const handles = { 'tl': { x: drawX, y: drawY }, 'tr': { x: drawX + drawW, y: drawY }, 'bl': { x: drawX, y: drawY + drawH }, 'br': { x: drawX + drawW, y: drawY + drawH } }; for (const [key, handle] of Object.entries(handles)) { if (Math.abs(x - handle.x) <= HANDLE_SIZE && Math.abs(y - handle.y) <= HANDLE_SIZE) { return key; } } return null; } function getHandleCursor(handle) { const cursors = { 'tl': 'nw-resize', 'tr': 'ne-resize', 'bl': 'sw-resize', 'br': 'se-resize' }; return cursors[handle] || 'crosshair'; } function resizeBox(x, y) { const dx = x - drawStartX; const dy = y - drawStartY; const originalBox = { ...drawnBox }; switch (resizeHandle) { case 'tl': drawnBox.x = x; drawnBox.y = y; drawnBox.width = originalBox.x + originalBox.width - x; drawnBox.height = originalBox.y + originalBox.height - y; break; case 'tr': drawnBox.y = y; drawnBox.width = x - originalBox.x; drawnBox.height = originalBox.y + originalBox.height - y; break; case 'bl': drawnBox.x = x; drawnBox.width = originalBox.x + originalBox.width - x; drawnBox.height = y - originalBox.y; break; case 'br': drawnBox.width = x - originalBox.x; drawnBox.height = y - originalBox.y; break; } drawStartX = x; drawStartY = y; } }); ================================================ FILE: changedetectionio/static/js/watch-overview.js ================================================ $(function () { function normalizeUrl(el) { const val = el.value.trim(); if (val && !/^[a-zA-Z][a-zA-Z\d+\-.]*:/.test(val)) { el.value = 'https://' + val; } } $('#url').on('blur keydown', function (e) { if (e.type === 'blur' || e.key === 'Enter') { normalizeUrl(this); } }); $('form').on('submit', function () { normalizeUrl($('#url')[0]); }); // Remove unviewed status when normally clicked $('.diff-link').click(function () { $(this).closest('.unviewed').removeClass('unviewed'); }); $('td[data-timestamp]').each(function () { $(this).prop('title', new Intl.DateTimeFormat(undefined, { dateStyle: 'full', timeStyle: 'long' }).format($(this).data('timestamp') * 1000)); }) $("#checkbox-assign-tag").click(function (e) { $('#op_extradata').val(prompt("Enter a tag name")); }); $('.history-link').click(function (e) { // Incase they click 'back' in the browser, it should be removed. $(this).closest('tr').removeClass('unviewed'); }); $('.with-share-link > *').click(function () { $("#copied-clipboard").remove(); var range = document.createRange(); var n = $("#share-link")[0]; range.selectNode(n); window.getSelection().removeAllRanges(); window.getSelection().addRange(range); document.execCommand("copy"); window.getSelection().removeAllRanges(); $('.with-share-link').append('Copied to clipboard'); $("#copied-clipboard").fadeOut(2500, function () { $(this).remove(); }); }); $(".watch-table tr").click(function (event) { var tagName = event.target.tagName.toLowerCase(); if (tagName === 'tr' || tagName === 'td') { var x = $('input[type=checkbox]', this); if (x) { $(x).click(); } } }); // checkboxes - check all $("#check-all").click(function (e) { $('input[type=checkbox]').not(this).prop('checked', this.checked); }); const time_check_step_size_seconds=1; // checkboxes - show/hide buttons $("input[type=checkbox]").click(function (e) { if ($('input[type=checkbox]:checked').length) { $('#checkbox-operations').slideDown(); } else { $('#checkbox-operations').slideUp(); } }); setInterval(function () { // Background ETA completion for 'checking now' $(".watch-table .checking-now .last-checked").each(function () { const eta_complete = parseFloat($(this).data('eta_complete')); const fetch_duration = parseInt($(this).data('fetchduration')); if (eta_complete + 2 > nowtimeserver && fetch_duration > 3) { const remaining_seconds = Math.abs(eta_complete) - nowtimeserver - 1; let r = Math.round((1.0 - (remaining_seconds / fetch_duration)) * 100); if (r < 10) { r = 10; } if (r >= 90) { r = 100; } $(this).css('background-size', `${r}% 100%`); } else { // Snap to full complete $(this).css('background-size', `100% 100%`); } }); nowtimeserver = nowtimeserver + time_check_step_size_seconds; }, time_check_step_size_seconds * 1000); }); ================================================ FILE: changedetectionio/static/js/watch-settings.js ================================================ function request_textpreview_update() { if (!$('body').hasClass('preview-text-enabled')) { console.error("Preview text was requested but body tag was not setup") return } const data = {}; $('textarea:visible, input:visible').each(function () { const $element = $(this); // Cache the jQuery object for the current element const name = $element.attr('name'); // Get the name attribute of the element data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : false) : $element.val(); }); $('body').toggleClass('spinner-active', 1); $.abortiveSingularAjax({ type: "POST", url: preview_text_edit_filters_url, data: data, namespace: 'watchEdit' }).done(function (data) { console.debug(data['duration']) $('#filters-and-triggers #text-preview-before-inner').text(data['before_filter']); $('#filters-and-triggers #text-preview-inner') .text(data['after_filter']) .highlightLines([ { 'color': 'var(--highlight-trigger-text-bg-color)', 'lines': data['trigger_line_numbers'], 'title': "Triggers a change if this text appears, AND something changed in the document." }, { 'color': 'var(--highlight-ignored-text-bg-color)', 'lines': data['ignore_line_numbers'], 'title': "Ignored for calculating changes, but still shown." }, { 'color': 'var(--highlight-blocked-text-bg-color)', 'lines': data['blocked_line_numbers'], 'title': "No change-detection will occur because this text exists." } ]) }).fail(function (error) { if (error.statusText === 'abort') { console.log('Request was aborted due to a new request being fired.'); } else { $('#filters-and-triggers #text-preview-inner').text('There was an error communicating with the server.'); } }) } $(document).ready(function () { $('#notification-setting-reset-to-default').click(function (e) { $('#notification_title').val(''); $('#notification_body').val(''); $('#notification_format').val('System default'); $('#notification_urls').val(''); $('#notification_muted_none').prop('checked', true); // in the case of a ternary field e.preventDefault(); }); $("#notification-token-toggle").click(function (e) { e.preventDefault(); $('#notification-tokens-info').toggle(); }); toggleOpacity('#time_between_check_use_default', '#time-check-widget-wrapper, #time-between-check-schedule', false); const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0); $("#text-preview-inner").css('max-height', (vh - 300) + "px"); $("#text-preview-before-inner").css('max-height', (vh - 300) + "px"); $("#activate-text-preview").click(function (e) { $('body').toggleClass('preview-text-enabled') request_textpreview_update(); const method = $('body').hasClass('preview-text-enabled') ? 'on' : 'off'; $('#filters-and-triggers textarea')[method]('blur', request_textpreview_update.throttle(1000)); $('#filters-and-triggers input')[method]('change', request_textpreview_update.throttle(1000)); $("#filters-and-triggers-tab")[method]('click', request_textpreview_update.throttle(1000)); }); $('.minitabs-wrapper').miniTabs({ "Content after filters": "#text-preview-inner", "Content raw/before filters": "#text-preview-before-inner" }); }); ================================================ FILE: changedetectionio/static/styles/.dockerignore ================================================ node_modules package-lock.json ================================================ FILE: changedetectionio/static/styles/.gitignore ================================================ node_modules package-lock.json ================================================ FILE: changedetectionio/static/styles/diff-image.css ================================================ .comparison-score{padding:1em;background:var(--color-table-stripe);border-radius:4px;margin:1em 0;border:1px solid var(--color-border-table-cell);color:var(--color-text)}.change-detected{color:#d32f2f;font-weight:bold}.no-change{color:#388e3c;font-weight:bold}.comparison-grid{display:grid;grid-template-columns:1fr 1fr;gap:1em;margin:1em 1em}@media(max-width: 1200px){.comparison-grid{grid-template-columns:1fr}}.image-comparison{position:relative;width:100%;overflow:hidden;border:1px solid var(--color-border-table-cell);box-shadow:0 2px 4px rgba(0,0,0,.1);user-select:none}.image-comparison img{display:block;width:100%;height:auto;max-width:100%;border:none;box-shadow:none}.comparison-image-wrapper{position:relative;width:100%;display:flex;align-items:flex-start;justify-content:center;background-color:var(--color-background);background-image:linear-gradient(45deg, var(--color-table-stripe) 25%, transparent 25%),linear-gradient(-45deg, var(--color-table-stripe) 25%, transparent 25%),linear-gradient(45deg, transparent 75%, var(--color-table-stripe) 75%),linear-gradient(-45deg, transparent 75%, var(--color-table-stripe) 75%);background-size:20px 20px;background-position:0 0,0 10px,10px -10px,-10px 0px}.comparison-after{position:absolute;top:0;left:0;width:100%;height:100%;clip-path:inset(0 0 0 50%)}.comparison-slider{position:absolute;top:0;left:50%;width:4px;height:100%;background:#0078e7;cursor:ew-resize;transform:translateX(-2px);z-index:10}.comparison-handle{position:absolute;top:50%;left:50%;width:48px;height:48px;background:#0078e7;border:3px solid #fff;border-radius:50%;transform:translate(-50%, -50%);box-shadow:0 2px 8px rgba(0,0,0,.3);display:flex;align-items:center;justify-content:center;cursor:ew-resize;transition:top .1s ease-out}.comparison-handle::after{content:"⇄";color:#fff;font-size:24px;font-weight:bold;pointer-events:none}.comparison-labels{position:absolute;top:10px;width:100%;display:flex;justify-content:space-between;padding:0 0px;z-index:5;pointer-events:none}.comparison-label{background:rgba(0,0,0,.7);color:#fff;padding:.5em 1em;border-radius:4px;font-size:.9em;font-weight:bold}.screenshot-panel{text-align:center;background:var(--color-background);border:1px solid var(--color-border-table-cell);border-radius:4px;padding:1em;box-shadow:0 2px 4px rgba(0,0,0,.05)}.screenshot-panel h3{margin:0 0 1em 0;font-size:1.1em;color:var(--color-text);border-bottom:2px solid var(--color-background-button-primary);padding-bottom:.5em}.screenshot-panel.diff h3{border-bottom-color:#d32f2f}.screenshot-panel img{max-width:100%;height:auto;border:1px solid var(--color-border-table-cell);box-shadow:0 2px 4px rgba(0,0,0,.1)}.version-selector{display:inline-block;margin:0 .5em}.version-selector label{font-weight:bold;margin-right:.5em;color:var(--color-text)}#settings{background:var(--color-background);padding:1.5em;border-radius:4px;box-shadow:0 2px 4px rgba(0,0,0,.05);margin-bottom:2em;border:1px solid var(--color-border-table-cell)}#settings h2{margin-top:0;color:var(--color-text)}.diff-fieldset{border:none;padding:0;margin:0}.edit-link{float:right;margin-top:-0.5em}.comparison-description{color:var(--color-text-input-description);font-size:.9em;margin-bottom:1em}.download-link{color:var(--color-link);text-decoration:none;display:inline-flex;align-items:center;gap:.3em;font-size:.85em}.download-link:hover{text-decoration:underline}.diff-section-header{color:#d32f2f;font-size:.9em;margin-bottom:1em;font-weight:bold;display:flex;align-items:center;justify-content:center;gap:1em}.comparison-history-section{margin-top:3em;padding:1em;background:var(--color-background);border:1px solid var(--color-border-table-cell);border-radius:4px;box-shadow:0 2px 4px rgba(0,0,0,.05)}.comparison-history-section h3{color:var(--color-text)}.comparison-history-section p{color:var(--color-text-input-description);font-size:.9em}.history-changed-yes{color:#d32f2f;font-weight:bold}.history-changed-no{color:#388e3c} ================================================ FILE: changedetectionio/static/styles/diff.css ================================================ #diff-form{background:rgba(0,0,0,.05);padding:1em;border-radius:10px;margin-bottom:1em;color:#fff;font-size:.9rem;text-align:center}#diff-form label.from-to-label{width:4rem;text-decoration:none;padding:.5rem}#diff-form label.from-to-label#change-from{color:#b30000;background:#fadad7}#diff-form label.from-to-label#change-to{background:#eaf2c2;color:#406619}#diff-form #diff-style>span{display:inline-block;padding:.3em}#diff-form #diff-style>span label{font-weight:normal}#diff-form *{vertical-align:middle}body.difference-page section.content{padding-top:40px}#diff-ui{background:var(--color-background);padding:1rem;border-radius:5px}@media(min-width: 767px){#diff-ui{min-width:50%}}#diff-ui #text{font-size:11px}#diff-ui pre{white-space:break-spaces;overflow-wrap:anywhere}#diff-ui h1{display:inline;font-size:100%}#diff-ui #result{white-space:pre-wrap;word-break:break-word;overflow-wrap:break-word}#diff-ui .source{position:absolute;right:1%;top:.2em}@-moz-document url-prefix(){#diff-ui body{height:99%}}#diff-ui td#diff-col div{text-align:justify;white-space:pre-wrap}#diff-ui .ignored{background-color:#ccc;opacity:.7}#diff-ui .triggered{background-color:#1b98f8}#diff-ui .ignored.triggered{background-color:red}#diff-ui .tab-pane-inner#screenshot{text-align:center}#diff-ui .tab-pane-inner#screenshot img{max-width:99%}#diff-ui .pure-form button.reset-margin{margin:0px}#diff-ui .diff-fieldset{display:flex;align-items:center;gap:4px;flex-wrap:wrap}#diff-ui ul#highlightSnippetActions{list-style-type:none;display:flex;align-items:center;justify-content:center;gap:1.5rem;flex-wrap:wrap;padding:0;margin:0}#diff-ui ul#highlightSnippetActions li{display:flex;flex-direction:column;align-items:center;text-align:center;padding:.5rem;gap:.3rem}#diff-ui ul#highlightSnippetActions li button,#diff-ui ul#highlightSnippetActions li a{white-space:nowrap}#diff-ui ul#highlightSnippetActions span{font-size:.8rem;color:var(--color-text-input-description)}#diff-ui #cell-diff-jump-visualiser{display:flex;flex-direction:row;gap:1px;background:var(--color-background);border-radius:3px;overflow-x:hidden;position:sticky;top:0;z-index:10;padding-top:1rem;padding-bottom:1rem;justify-content:center}#diff-ui #cell-diff-jump-visualiser>div{flex:1;min-width:1px;max-width:10px;height:10px;background:var(--color-background-button-cancel);opacity:.3;border-radius:1px;transition:opacity .2s;position:relative}#diff-ui #cell-diff-jump-visualiser>div.deletion{background:#b30000;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.insertion{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.note{background:#406619;opacity:1}#diff-ui #cell-diff-jump-visualiser>div.mixed{background:linear-gradient(to right, #b30000 50%, #406619 50%);opacity:1}#diff-ui #cell-diff-jump-visualiser>div.current-position::after{content:"";position:absolute;bottom:-6px;left:50%;transform:translateX(-50%);width:0;height:0;border-left:4px solid rgba(0,0,0,0);border-right:4px solid rgba(0,0,0,0);border-bottom:4px solid var(--color-text)}#diff-ui #cell-diff-jump-visualiser>div:hover{opacity:.8;cursor:pointer}#text-diff-heading-area .snapshot-age{padding:4px;margin:.5rem 0;background-color:var(--color-background-snapshot-age);border-radius:3px;font-weight:bold;margin-bottom:4px}#text-diff-heading-area .snapshot-age.error{background-color:var(--color-error-background-snapshot-age);color:var(--color-error-text-snapshot-age)}#text-diff-heading-area .snapshot-age>*{padding-right:1rem} ================================================ FILE: changedetectionio/static/styles/package.json ================================================ { "name": "changedetection.io-theme", "version": "0.0.3", "description": "", "main": "index.js", "engines": { "node": ">=18.0.0" }, "scripts": { "watch": "sass --watch scss:. --style=compressed --no-source-map", "build": "sass scss:. --style=compressed --no-source-map" }, "author": "Leigh Morresi / Web Technologies s.r.o.", "license": "Apache", "dependencies": { "sass": "^1.77.8" } } ================================================ FILE: changedetectionio/static/styles/pure-min.css ================================================ /*! Pure v2.0.5 Copyright 2013 Yahoo! Licensed under the BSD License. https://github.com/pure-css/pure/blob/master/LICENSE */ /*! normalize.css v | MIT License | git.io/normalize Copyright (c) Nicolas Gallagher and Jonathan Neal */ /*! normalize.css v8.0.1 | MIT License | github.com/necolas/normalize.css */html{line-height:1.15;-webkit-text-size-adjust:100%}body{margin:0}main{display:block}h1{font-size:2em;margin:.67em 0}hr{-webkit-box-sizing:content-box;box-sizing:content-box;height:0;overflow:visible}pre{font-family:monospace,monospace;font-size:1em}a{background-color:transparent}abbr[title]{border-bottom:none;text-decoration:underline;-webkit-text-decoration:underline dotted;text-decoration:underline dotted}b,strong{font-weight:bolder}code,kbd,samp{font-family:monospace,monospace;font-size:1em}small{font-size:80%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}img{border-style:none}button,input,optgroup,select,textarea{font-family:inherit;font-size:100%;line-height:1.15;margin:0}button,input{overflow:visible}button,select{text-transform:none}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button}[type=button]::-moz-focus-inner,[type=reset]::-moz-focus-inner,[type=submit]::-moz-focus-inner,button::-moz-focus-inner{border-style:none;padding:0}[type=button]:-moz-focusring,[type=reset]:-moz-focusring,[type=submit]:-moz-focusring,button:-moz-focusring{outline:1px dotted ButtonText}fieldset{padding:.35em .75em .625em}legend{-webkit-box-sizing:border-box;box-sizing:border-box;color:inherit;display:table;max-width:100%;padding:0;white-space:normal}progress{vertical-align:baseline}textarea{overflow:auto}[type=checkbox],[type=radio]{-webkit-box-sizing:border-box;box-sizing:border-box;padding:0}[type=number]::-webkit-inner-spin-button,[type=number]::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}[type=search]::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}details{display:block}summary{display:list-item}template{display:none}[hidden]{display:none}html{font-family:sans-serif}.hidden,[hidden]{display:none!important}.pure-img{max-width:100%;height:auto;display:block}.pure-g{letter-spacing:-.31em;text-rendering:optimizespeed;font-family:FreeSans,Arimo,"Droid Sans",Helvetica,Arial,sans-serif;display:-webkit-box;display:-ms-flexbox;display:flex;-webkit-box-orient:horizontal;-webkit-box-direction:normal;-ms-flex-flow:row wrap;flex-flow:row wrap;-ms-flex-line-pack:start;align-content:flex-start}@media all and (-ms-high-contrast:none),(-ms-high-contrast:active){table .pure-g{display:block}}.opera-only :-o-prefocus,.pure-g{word-spacing:-.43em}.pure-u{display:inline-block;letter-spacing:normal;word-spacing:normal;vertical-align:top;text-rendering:auto}.pure-g [class*=pure-u]{font-family:sans-serif}.pure-u-1,.pure-u-1-1,.pure-u-1-12,.pure-u-1-2,.pure-u-1-24,.pure-u-1-3,.pure-u-1-4,.pure-u-1-5,.pure-u-1-6,.pure-u-1-8,.pure-u-10-24,.pure-u-11-12,.pure-u-11-24,.pure-u-12-24,.pure-u-13-24,.pure-u-14-24,.pure-u-15-24,.pure-u-16-24,.pure-u-17-24,.pure-u-18-24,.pure-u-19-24,.pure-u-2-24,.pure-u-2-3,.pure-u-2-5,.pure-u-20-24,.pure-u-21-24,.pure-u-22-24,.pure-u-23-24,.pure-u-24-24,.pure-u-3-24,.pure-u-3-4,.pure-u-3-5,.pure-u-3-8,.pure-u-4-24,.pure-u-4-5,.pure-u-5-12,.pure-u-5-24,.pure-u-5-5,.pure-u-5-6,.pure-u-5-8,.pure-u-6-24,.pure-u-7-12,.pure-u-7-24,.pure-u-7-8,.pure-u-8-24,.pure-u-9-24{display:inline-block;letter-spacing:normal;word-spacing:normal;vertical-align:top;text-rendering:auto}.pure-u-1-24{width:4.1667%}.pure-u-1-12,.pure-u-2-24{width:8.3333%}.pure-u-1-8,.pure-u-3-24{width:12.5%}.pure-u-1-6,.pure-u-4-24{width:16.6667%}.pure-u-1-5{width:20%}.pure-u-5-24{width:20.8333%}.pure-u-1-4,.pure-u-6-24{width:25%}.pure-u-7-24{width:29.1667%}.pure-u-1-3,.pure-u-8-24{width:33.3333%}.pure-u-3-8,.pure-u-9-24{width:37.5%}.pure-u-2-5{width:40%}.pure-u-10-24,.pure-u-5-12{width:41.6667%}.pure-u-11-24{width:45.8333%}.pure-u-1-2,.pure-u-12-24{width:50%}.pure-u-13-24{width:54.1667%}.pure-u-14-24,.pure-u-7-12{width:58.3333%}.pure-u-3-5{width:60%}.pure-u-15-24,.pure-u-5-8{width:62.5%}.pure-u-16-24,.pure-u-2-3{width:66.6667%}.pure-u-17-24{width:70.8333%}.pure-u-18-24,.pure-u-3-4{width:75%}.pure-u-19-24{width:79.1667%}.pure-u-4-5{width:80%}.pure-u-20-24,.pure-u-5-6{width:83.3333%}.pure-u-21-24,.pure-u-7-8{width:87.5%}.pure-u-11-12,.pure-u-22-24{width:91.6667%}.pure-u-23-24{width:95.8333%}.pure-u-1,.pure-u-1-1,.pure-u-24-24,.pure-u-5-5{width:100%}.pure-button{display:inline-block;line-height:normal;white-space:nowrap;vertical-align:middle;text-align:center;cursor:pointer;-webkit-user-drag:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;-webkit-box-sizing:border-box;box-sizing:border-box}.pure-button::-moz-focus-inner{padding:0;border:0}.pure-button-group{letter-spacing:-.31em;text-rendering:optimizespeed}.opera-only :-o-prefocus,.pure-button-group{word-spacing:-.43em}.pure-button-group .pure-button{letter-spacing:normal;word-spacing:normal;vertical-align:top;text-rendering:auto}.pure-button{font-family:inherit;font-size:100%;padding:.5em 1em;color:rgba(0,0,0,.8);border:none transparent;background-color:#e6e6e6;text-decoration:none;border-radius:2px}.pure-button-hover,.pure-button:focus,.pure-button:hover{background-image:-webkit-gradient(linear,left top,left bottom,from(transparent),color-stop(40%,rgba(0,0,0,.05)),to(rgba(0,0,0,.1)));background-image:linear-gradient(transparent,rgba(0,0,0,.05) 40%,rgba(0,0,0,.1))}.pure-button:focus{outline:0}.pure-button-active,.pure-button:active{-webkit-box-shadow:0 0 0 1px rgba(0,0,0,.15) inset,0 0 6px rgba(0,0,0,.2) inset;box-shadow:0 0 0 1px rgba(0,0,0,.15) inset,0 0 6px rgba(0,0,0,.2) inset;border-color:#000}.pure-button-disabled,.pure-button-disabled:active,.pure-button-disabled:focus,.pure-button-disabled:hover,.pure-button[disabled]{border:none;background-image:none;opacity:.4;cursor:not-allowed;-webkit-box-shadow:none;box-shadow:none;pointer-events:none}.pure-button-hidden{display:none}.pure-button-primary,.pure-button-selected,a.pure-button-primary,a.pure-button-selected{background-color:#0078e7;color:#fff}.pure-button-group .pure-button{margin:0;border-radius:0;border-right:1px solid rgba(0,0,0,.2)}.pure-button-group .pure-button:first-child{border-top-left-radius:2px;border-bottom-left-radius:2px}.pure-button-group .pure-button:last-child{border-top-right-radius:2px;border-bottom-right-radius:2px;border-right:none}.pure-form input[type=color],.pure-form input[type=date],.pure-form input[type=datetime-local],.pure-form input[type=datetime],.pure-form input[type=email],.pure-form input[type=month],.pure-form input[type=number],.pure-form input[type=password],.pure-form input[type=search],.pure-form input[type=tel],.pure-form input[type=text],.pure-form input[type=time],.pure-form input[type=url],.pure-form input[type=week],.pure-form select,.pure-form textarea{padding:.5em .6em;display:inline-block;border:1px solid #ccc;-webkit-box-shadow:inset 0 1px 3px #ddd;box-shadow:inset 0 1px 3px #ddd;border-radius:4px;vertical-align:middle;-webkit-box-sizing:border-box;box-sizing:border-box}.pure-form input:not([type]){padding:.5em .6em;display:inline-block;border:1px solid #ccc;-webkit-box-shadow:inset 0 1px 3px #ddd;box-shadow:inset 0 1px 3px #ddd;border-radius:4px;-webkit-box-sizing:border-box;box-sizing:border-box}.pure-form input[type=color]{padding:.2em .5em}.pure-form input[type=color]:focus,.pure-form input[type=date]:focus,.pure-form input[type=datetime-local]:focus,.pure-form input[type=datetime]:focus,.pure-form input[type=email]:focus,.pure-form input[type=month]:focus,.pure-form input[type=number]:focus,.pure-form input[type=password]:focus,.pure-form input[type=search]:focus,.pure-form input[type=tel]:focus,.pure-form input[type=text]:focus,.pure-form input[type=time]:focus,.pure-form input[type=url]:focus,.pure-form input[type=week]:focus,.pure-form select:focus,.pure-form textarea:focus{outline:0;border-color:#129fea}.pure-form input:not([type]):focus{outline:0;border-color:#129fea}.pure-form input[type=checkbox]:focus,.pure-form input[type=file]:focus,.pure-form input[type=radio]:focus{outline:thin solid #129fea;outline:1px auto #129fea}.pure-form .pure-checkbox,.pure-form .pure-radio{margin:.5em 0;display:block}.pure-form input[type=color][disabled],.pure-form input[type=date][disabled],.pure-form input[type=datetime-local][disabled],.pure-form input[type=datetime][disabled],.pure-form input[type=email][disabled],.pure-form input[type=month][disabled],.pure-form input[type=number][disabled],.pure-form input[type=password][disabled],.pure-form input[type=search][disabled],.pure-form input[type=tel][disabled],.pure-form input[type=text][disabled],.pure-form input[type=time][disabled],.pure-form input[type=url][disabled],.pure-form input[type=week][disabled],.pure-form select[disabled],.pure-form textarea[disabled]{cursor:not-allowed;background-color:#eaeded;color:#cad2d3}.pure-form input:not([type])[disabled]{cursor:not-allowed;background-color:#eaeded;color:#cad2d3}.pure-form input[readonly],.pure-form select[readonly],.pure-form textarea[readonly]{background-color:#eee;color:#777;border-color:#ccc}.pure-form input:focus:invalid,.pure-form select:focus:invalid,.pure-form textarea:focus:invalid{color:#b94a48;border-color:#e9322d}.pure-form input[type=checkbox]:focus:invalid:focus,.pure-form input[type=file]:focus:invalid:focus,.pure-form input[type=radio]:focus:invalid:focus{outline-color:#e9322d}.pure-form select{height:2.25em;border:1px solid #ccc;background-color:#fff}.pure-form select[multiple]{height:auto}.pure-form label{margin:.5em 0 .2em}.pure-form fieldset{margin:0;padding:.35em 0 .75em;border:0}.pure-form legend{display:block;width:100%;padding:.3em 0;margin-bottom:.3em;color:#333;border-bottom:1px solid #e5e5e5}.pure-form-stacked input[type=color],.pure-form-stacked input[type=date],.pure-form-stacked input[type=datetime-local],.pure-form-stacked input[type=datetime],.pure-form-stacked input[type=email],.pure-form-stacked input[type=file],.pure-form-stacked input[type=month],.pure-form-stacked input[type=number],.pure-form-stacked input[type=password],.pure-form-stacked input[type=search],.pure-form-stacked input[type=tel],.pure-form-stacked input[type=text],.pure-form-stacked input[type=time],.pure-form-stacked input[type=url],.pure-form-stacked input[type=week],.pure-form-stacked label,.pure-form-stacked select,.pure-form-stacked textarea{display:block;margin:.25em 0}.pure-form-stacked input:not([type]){display:block;margin:.25em 0}.pure-form-aligned input,.pure-form-aligned select,.pure-form-aligned textarea,.pure-form-message-inline{display:inline-block;vertical-align:middle}.pure-form-aligned textarea{vertical-align:top}.pure-form-aligned .pure-control-group{margin-bottom:.5em}.pure-form-aligned .pure-control-group label{text-align:right;display:inline-block;vertical-align:middle;width:10em;margin:0 1em 0 0}.pure-form-aligned .pure-controls{margin:1.5em 0 0 11em}.pure-form .pure-input-rounded,.pure-form input.pure-input-rounded{border-radius:2em;padding:.5em 1em}.pure-form .pure-group fieldset{margin-bottom:10px}.pure-form .pure-group input,.pure-form .pure-group textarea{display:block;padding:10px;margin:0 0 -1px;border-radius:0;position:relative;top:-1px}.pure-form .pure-group input:focus,.pure-form .pure-group textarea:focus{z-index:3}.pure-form .pure-group input:first-child,.pure-form .pure-group textarea:first-child{top:1px;border-radius:4px 4px 0 0;margin:0}.pure-form .pure-group input:first-child:last-child,.pure-form .pure-group textarea:first-child:last-child{top:1px;border-radius:4px;margin:0}.pure-form .pure-group input:last-child,.pure-form .pure-group textarea:last-child{top:-2px;border-radius:0 0 4px 4px;margin:0}.pure-form .pure-group button{margin:.35em 0}.pure-form .pure-input-1{width:100%}.pure-form .pure-input-3-4{width:75%}.pure-form .pure-input-2-3{width:66%}.pure-form .pure-input-1-2{width:50%}.pure-form .pure-input-1-3{width:33%}.pure-form .pure-input-1-4{width:25%}.pure-form-message-inline{display:inline-block;padding-left:.3em;color:#666;vertical-align:middle;font-size:.875em}.pure-form-message{display:block;color:#666;font-size:.875em}@media only screen and (max-width :480px){.pure-form button[type=submit]{margin:.7em 0 0}.pure-form input:not([type]),.pure-form input[type=color],.pure-form input[type=date],.pure-form input[type=datetime-local],.pure-form input[type=datetime],.pure-form input[type=email],.pure-form input[type=month],.pure-form input[type=number],.pure-form input[type=password],.pure-form input[type=search],.pure-form input[type=tel],.pure-form input[type=text],.pure-form input[type=time],.pure-form input[type=url],.pure-form input[type=week],.pure-form label{margin-bottom:.3em;display:block}.pure-group input:not([type]),.pure-group input[type=color],.pure-group input[type=date],.pure-group input[type=datetime-local],.pure-group input[type=datetime],.pure-group input[type=email],.pure-group input[type=month],.pure-group input[type=number],.pure-group input[type=password],.pure-group input[type=search],.pure-group input[type=tel],.pure-group input[type=text],.pure-group input[type=time],.pure-group input[type=url],.pure-group input[type=week]{margin-bottom:0}.pure-form-aligned .pure-control-group label{margin-bottom:.3em;text-align:left;display:block;width:100%}.pure-form-aligned .pure-controls{margin:1.5em 0 0 0}.pure-form-message,.pure-form-message-inline{display:block;font-size:.75em;padding:.2em 0 .8em}}.pure-menu{-webkit-box-sizing:border-box;box-sizing:border-box}.pure-menu-fixed{position:fixed;left:0;top:0;z-index:3}.pure-menu-item,.pure-menu-list{position:relative}.pure-menu-list{list-style:none;margin:0;padding:0}.pure-menu-item{padding:0;margin:0;height:100%}.pure-menu-heading,.pure-menu-link{display:block;text-decoration:none;white-space:nowrap}.pure-menu-horizontal{width:100%;white-space:nowrap}.pure-menu-horizontal .pure-menu-list{display:inline-block}.pure-menu-horizontal .pure-menu-heading,.pure-menu-horizontal .pure-menu-item,.pure-menu-horizontal .pure-menu-separator{display:inline-block;vertical-align:middle}.pure-menu-item .pure-menu-item{display:block}.pure-menu-children{display:none;position:absolute;left:100%;top:0;margin:0;padding:0;z-index:3}.pure-menu-horizontal .pure-menu-children{left:0;top:auto;width:inherit}.pure-menu-active>.pure-menu-children,.pure-menu-allow-hover:hover>.pure-menu-children{display:block;position:absolute}.pure-menu-has-children>.pure-menu-link:after{padding-left:.5em;content:"\25B8";font-size:small}.pure-menu-horizontal .pure-menu-has-children>.pure-menu-link:after{content:"\25BE"}.pure-menu-scrollable{overflow-y:scroll;overflow-x:hidden}.pure-menu-scrollable .pure-menu-list{display:block}.pure-menu-horizontal.pure-menu-scrollable .pure-menu-list{display:inline-block}.pure-menu-horizontal.pure-menu-scrollable{white-space:nowrap;overflow-y:hidden;overflow-x:auto;padding:.5em 0}.pure-menu-horizontal .pure-menu-children .pure-menu-separator,.pure-menu-separator{background-color:#ccc;height:1px;margin:.3em 0}.pure-menu-horizontal .pure-menu-separator{width:1px;height:1.3em;margin:0 .3em}.pure-menu-horizontal .pure-menu-children .pure-menu-separator{display:block;width:auto}.pure-menu-heading{text-transform:uppercase;color:#565d64}.pure-menu-link{color:#777}.pure-menu-children{background-color:#fff}.pure-menu-heading,.pure-menu-link{padding:.5em 1em}.pure-menu-disabled{opacity:.5}.pure-menu-disabled .pure-menu-link:hover{background-color:transparent;cursor:default}.pure-menu-active>.pure-menu-link,.pure-menu-link:focus,.pure-menu-link:hover{background-color:#eee}.pure-menu-selected>.pure-menu-link,.pure-menu-selected>.pure-menu-link:visited{color:#000}.pure-table{border-collapse:collapse;border-spacing:0;empty-cells:show;border:1px solid #cbcbcb}.pure-table caption{color:#000;font:italic 85%/1 arial,sans-serif;padding:1em 0;text-align:center}.pure-table td,.pure-table th{border-left:1px solid #cbcbcb;border-width:0 0 0 1px;font-size:inherit;margin:0;overflow:visible;padding:.5em 1em}.pure-table thead{background-color:#e0e0e0;color:#000;text-align:left;vertical-align:bottom}.pure-table td{background-color:transparent}.pure-table-odd td{background-color:#f2f2f2}.pure-table-striped tr:nth-child(2n-1) td{background-color:#f2f2f2}.pure-table-bordered td{border-bottom:1px solid #cbcbcb}.pure-table-bordered tbody>tr:last-child>td{border-bottom-width:0}.pure-table-horizontal td,.pure-table-horizontal th{border-width:0 0 1px 0;border-bottom:1px solid #cbcbcb}.pure-table-horizontal tbody>tr:last-child>td{border-bottom-width:0} ================================================ FILE: changedetectionio/static/styles/scss/_settings.scss ================================================ /** * SCSS variables (compile-time) * These can be used in media queries and other places where CSS custom properties don't work */ // Breakpoints $desktop-wide-breakpoint: 980px; ================================================ FILE: changedetectionio/static/styles/scss/diff-image.scss ================================================ /** * Image Comparison Diff Styles * Styles for the interactive image comparison slider and screenshot diff visualization */ .comparison-score { padding: 1em; background: var(--color-table-stripe); border-radius: 4px; margin: 1em 0; border: 1px solid var(--color-border-table-cell); color: var(--color-text); } .change-detected { color: #d32f2f; font-weight: bold; } .no-change { color: #388e3c; font-weight: bold; } .comparison-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 1em; margin: 1em 1em; @media (max-width: 1200px) { grid-template-columns: 1fr; } } /* Interactive Image Comparison Slider */ .image-comparison { position: relative; width: 100%; overflow: hidden; border: 1px solid var(--color-border-table-cell); box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); user-select: none; img { display: block; width: 100%; height: auto; max-width: 100%; border: none; box-shadow: none; } } /* Image wrappers with checkered background */ .comparison-image-wrapper { position: relative; width: 100%; display: flex; align-items: flex-start; justify-content: center; /* Very light checkered background pattern */ background-color: var(--color-background); background-image: linear-gradient(45deg, var(--color-table-stripe) 25%, transparent 25%), linear-gradient(-45deg, var(--color-table-stripe) 25%, transparent 25%), linear-gradient(45deg, transparent 75%, var(--color-table-stripe) 75%), linear-gradient(-45deg, transparent 75%, var(--color-table-stripe) 75%); background-size: 20px 20px; background-position: 0 0, 0 10px, 10px -10px, -10px 0px; } .comparison-after { position: absolute; top: 0; left: 0; width: 100%; height: 100%; clip-path: inset(0 0 0 50%); } .comparison-slider { position: absolute; top: 0; left: 50%; width: 4px; height: 100%; background: #0078e7; cursor: ew-resize; transform: translateX(-2px); z-index: 10; } .comparison-handle { position: absolute; top: 50%; left: 50%; width: 48px; height: 48px; background: #0078e7; border: 3px solid white; border-radius: 50%; transform: translate(-50%, -50%); box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3); display: flex; align-items: center; justify-content: center; cursor: ew-resize; transition: top 0.1s ease-out; &::after { content: '⇄'; color: white; font-size: 24px; font-weight: bold; pointer-events: none; } } .comparison-labels { position: absolute; top: 10px; width: 100%; display: flex; justify-content: space-between; padding: 0 0px; z-index: 5; pointer-events: none; } .comparison-label { background: rgba(0, 0, 0, 0.7); color: white; padding: 0.5em 1em; border-radius: 4px; font-size: 0.9em; font-weight: bold; } .screenshot-panel { text-align: center; background: var(--color-background); border: 1px solid var(--color-border-table-cell); border-radius: 4px; padding: 1em; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05); h3 { margin: 0 0 1em 0; font-size: 1.1em; color: var(--color-text); border-bottom: 2px solid var(--color-background-button-primary); padding-bottom: 0.5em; } &.diff h3 { border-bottom-color: #d32f2f; } img { max-width: 100%; height: auto; border: 1px solid var(--color-border-table-cell); box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); } } .version-selector { display: inline-block; margin: 0 0.5em; label { font-weight: bold; margin-right: 0.5em; color: var(--color-text); } } #settings { background: var(--color-background); padding: 1.5em; border-radius: 4px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05); margin-bottom: 2em; border: 1px solid var(--color-border-table-cell); h2 { margin-top: 0; color: var(--color-text); } } .diff-fieldset { border: none; padding: 0; margin: 0; } .edit-link { float: right; margin-top: -0.5em; } .comparison-description { color: var(--color-text-input-description); font-size: 0.9em; margin-bottom: 1em; } .download-link { color: var(--color-link); text-decoration: none; display: inline-flex; align-items: center; gap: 0.3em; font-size: 0.85em; &:hover { text-decoration: underline; } } .diff-section-header { color: #d32f2f; font-size: 0.9em; margin-bottom: 1em; font-weight: bold; display: flex; align-items: center; justify-content: center; gap: 1em; } .comparison-history-section { margin-top: 3em; padding: 1em; background: var(--color-background); border: 1px solid var(--color-border-table-cell); border-radius: 4px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05); h3 { color: var(--color-text); } p { color: var(--color-text-input-description); font-size: 0.9em; } } .history-changed-yes { color: #d32f2f; font-weight: bold; } .history-changed-no { color: #388e3c; } ================================================ FILE: changedetectionio/static/styles/scss/diff.scss ================================================ #diff-form { background: rgba(0, 0, 0, .05); padding: 1em; border-radius: 10px; margin-bottom: 1em; color: #fff; font-size: 0.9rem; text-align: center; label.from-to-label { width: 4rem; text-decoration: none; padding: 0.5rem; &#change-from { color: #b30000; background: #fadad7 } &#change-to { background: #eaf2c2; color: #406619; } } #diff-style { >span { display: inline-block; padding: 0.3em; label { font-weight: normal; } } } * { vertical-align: middle; } } body.difference-page { section.content { padding-top: 40px; } } #diff-ui { background: var(--color-background); padding: 1rem; border-radius: 5px; @media (min-width: 767px) { min-width: 50%; } // The first tab 'text' diff #text { font-size: 11px; } pre { white-space: break-spaces; overflow-wrap: anywhere; } h1 { display: inline; font-size: 100%; } #result { white-space: pre-wrap; word-break: break-word; overflow-wrap: break-word; } .source { position: absolute; right: 1%; top: .2em; } @-moz-document url-prefix() { body { height: 99%; /* Hide scroll bar in Firefox */ } } td#diff-col div { text-align: justify; white-space: pre-wrap; } .ignored { background-color: #ccc; /* border: #0d91fa 1px solid; */ opacity: 0.7; } .triggered { background-color: #1b98f8; } /* ignored and triggered? make it obvious error */ .ignored.triggered { background-color: #ff0000; } .tab-pane-inner#screenshot { text-align: center; img { max-width: 99%; } } // resets button margin to 0px .pure-form button.reset-margin { margin: 0px; } .diff-fieldset { display: flex; align-items: center; gap: 4px; flex-wrap: wrap; } ul#highlightSnippetActions { list-style-type: none; display: flex; align-items: center; justify-content: center; gap: 1.5rem; flex-wrap: wrap; padding: 0; margin: 0; li { display: flex; flex-direction: column; align-items: center; text-align: center; padding: 0.5rem; gap: 0.3rem; button, a { white-space: nowrap; } } span { font-size: 0.8rem; color: var(--color-text-input-description); } } #cell-diff-jump-visualiser { display: flex; flex-direction: row; gap: 1px; background: var(--color-background); border-radius: 3px; overflow-x: hidden; position: sticky; top: 0; z-index: 10; padding-top: 1rem; padding-bottom: 1rem; justify-content: center; > div { flex: 1; min-width: 1px; max-width: 10px; height: 10px; background: var(--color-background-button-cancel); opacity: 0.3; border-radius: 1px; transition: opacity 0.2s; position: relative; &.deletion { background: #b30000; // Red for deletions opacity: 1; } &.insertion { background: #406619; // Green for insertions opacity: 1; } &.note { background: #406619; // Orange for changed/notes opacity: 1; } &.mixed { background: linear-gradient(to right, #b30000 50%, #406619 50%); // Half red, half green opacity: 1; } &.current-position::after { content: ''; position: absolute; bottom: -6px; left: 50%; transform: translateX(-50%); width: 0; height: 0; border-left: 4px solid transparent; border-right: 4px solid transparent; border-bottom: 4px solid var(--color-text); } &:hover { opacity: 0.8; cursor: pointer; } } } } #text-diff-heading-area { .snapshot-age { padding: 4px; margin: 0.5rem 0; background-color: var(--color-background-snapshot-age); border-radius: 3px; font-weight: bold; margin-bottom: 4px; &.error { background-color: var(--color-error-background-snapshot-age); color: var(--color-error-text-snapshot-age); } > * { padding-right: 1rem; } } } ================================================ FILE: changedetectionio/static/styles/scss/parts/_action_sidebar.scss ================================================ // Action Sidebar - Minimal navigation icons with light grey aesthetic .content-wrapper { display: flex; gap: 0; width: 100%; max-width: 100%; position: relative; @media only screen and (max-width: 900px) { flex-direction: column; } } .action-sidebar { position: sticky; top: 100px; flex-shrink: 0; width: 80px; height: fit-content; background: transparent; padding: 1.5rem 0; display: flex; flex-direction: column; gap: 0.5rem; align-items: center; z-index: 0; @media only screen and (max-width: 900px) { position: relative; top: 0; width: 100%; flex-direction: row; justify-content: space-around; padding: 0; overflow-x: auto; } } .action-sidebar-item { position: relative; display: flex; flex-direction: column; align-items: center; justify-content: center; gap: 0.35rem; padding: 0.75rem 0.5rem; min-width: 64px; text-decoration: none; opacity: 0.8; transition: opacity 0.2s ease; &:hover { opacity: 1; } &.active { opacity: 1; .action-icon { stroke: #fff; stroke-width: 2.5; } .action-label { color: #fff; font-weight: 700; } } } .action-icon { width: 28px; height: 28px; stroke: #fff; stroke-width: 2; fill: none; stroke-linecap: round; stroke-linejoin: round; transition: stroke 0.2s ease; } .action-label { font-size: 0.65rem; font-weight: 500; text-align: center; line-height: 1.1; letter-spacing: 0.02em; text-transform: uppercase; color: #fff; transition: color 0.2s ease; max-width: 60px; word-wrap: break-word; } .content-main { flex: 0 1 auto; width: 100%; min-width: 0; padding: 0; display: flex; flex-direction: column; align-items: center; } // Dark mode adjustments html[data-darkmode=true] { .action-icon { /* stroke: #666;*/ } .action-label { /* color: #666;*/ } } ================================================ FILE: changedetectionio/static/styles/scss/parts/_arrows.scss ================================================ .arrow { border: solid #1b98f8; border-width: 0 2px 2px 0; display: inline-block; padding: 3px; &.right { transform: rotate(-45deg); -webkit-transform: rotate(-45deg); } &.left { transform: rotate(135deg); -webkit-transform: rotate(135deg); } &.up, &.asc { transform: rotate(-135deg); -webkit-transform: rotate(-135deg); } &.down, &.desc { transform: rotate(45deg); -webkit-transform: rotate(45deg); } } ================================================ FILE: changedetectionio/static/styles/scss/parts/_browser-steps.scss ================================================ #browser_steps { /* convert rows to horizontal cells */ th { display: none; } li { &.browser-step-with-error { background-color: #ffd6d6; border-radius: 4px; } &:not(:first-child) { &:hover { opacity: 1.0; } } list-style: decimal; padding: 5px; .control { padding-left: 5px; padding-right: 5px; a { font-size: 70%; } } &.empty { padding: 0px; opacity: 0.35; .control { display: none; } } &:hover { background: #eee; } > label { display: none; } } } @media only screen and (min-width: 760px) { #browser-steps .flex-wrapper { display: flex; flex-flow: row; height: 70vh; font-size: 80%; #browser-steps-ui { flex-grow: 1; /* Allow it to grow and fill the available space */ flex-shrink: 1; /* Allow it to shrink if needed */ flex-basis: 0; /* Start with 0 base width so it stretches as much as possible */ background-color: #eee; border-radius: 5px; } } #browser-steps-fieldlist { flex-grow: 0; /* Don't allow it to grow */ flex-shrink: 0; /* Don't allow it to shrink */ flex-basis: auto; /* Base width is determined by the content */ max-width: 400px; /* Set a max width to prevent overflow */ padding-left: 1rem; overflow-y: scroll; } /* this is duplicate :( */ #browsersteps-selector-wrapper { height: 100% !important; } } /* this is duplicate :( */ #browsersteps-selector-wrapper { width: 100%; overflow-y: scroll; position: relative; height: 80vh; > img { position: absolute; max-width: 100%; } > canvas { position: relative; max-width: 100%; &:hover { cursor: pointer; } } .loader { position: absolute; left: 50%; top: 50%; transform: translate(-50%, -50%); z-index: 100; max-width: 350px; text-align: center; } /* nice tall skinny one */ .spinner, .spinner:after { width: 80px; height: 80px; font-size: 3px; } #browsersteps-click-start { &:hover { cursor: pointer; } color: var(--color-grey-400); } } ================================================ FILE: changedetectionio/static/styles/scss/parts/_conditions_table.scss ================================================ /* Styles for the flexbox-based table replacement for conditions */ .fieldlist_formfields { width: 100%; background-color: var(--color-background, #fff); border-radius: 4px; border: 1px solid var(--color-border-table-cell, #cbcbcb); /* Header row */ .fieldlist-header { display: flex; background-color: var(--color-background-table-thead, #e0e0e0); font-weight: bold; border-bottom: 1px solid var(--color-border-table-cell, #cbcbcb); } .fieldlist-header-cell { flex: 1; padding: 0.5em 1em; text-align: left; &:last-child { flex: 0 0 120px; /* Fixed width for actions column */ } } /* Body rows */ .fieldlist-body { display: flex; flex-direction: column; } .fieldlist-row { display: flex; border-bottom: 1px solid var(--color-border-table-cell, #cbcbcb); &:last-child { border-bottom: none; } &:nth-child(2n-1) { background-color: var(--color-table-stripe, #f2f2f2); } &.error-row { background-color: var(--color-error-input, #ffdddd); } } .fieldlist-cell { flex: 1; padding: 0.5em 1em; display: flex; flex-direction: column; justify-content: center; /* Make inputs take up full width of their cell */ input, select { width: 100%; } &.fieldlist-actions { flex: 0 0 120px; /* Fixed width for actions column */ display: flex; flex-direction: row; align-items: center; gap: 4px; } } /* Error styling */ ul.errors { margin-top: 0.5em; margin-bottom: 0; padding: 0.5em; background-color: var(--color-error-background-snapshot-age, #ffdddd); border-radius: 4px; list-style-position: inside; } /* Responsive styles */ @media only screen and (max-width: 760px) { .fieldlist-header, .fieldlist-row { flex-direction: column; } .fieldlist-header-cell { display: none; } .fieldlist-row { padding: 0.5em 0; border-bottom: 2px solid var(--color-border-table-cell, #cbcbcb); } .fieldlist-cell { padding: 0.25em 0.5em; &.fieldlist-actions { flex: 1; justify-content: flex-start; padding-top: 0.5em; } } /* Add some spacing between fields on mobile */ .fieldlist-cell:not(:last-child) { margin-bottom: 0.5em; } /* Label each cell on mobile view */ .fieldlist-cell::before { content: attr(data-label); font-weight: bold; margin-bottom: 0.25em; } } } /* Button styling */ .fieldlist_formfields { .addRuleRow, .removeRuleRow, .verifyRuleRow { cursor: pointer; border: none; padding: 4px 8px; border-radius: 3px; font-weight: bold; background-color: #aaa; color: var(--color-foreground-text, #fff); &:hover { background-color: #999; } } } ================================================ FILE: changedetectionio/static/styles/scss/parts/_darkmode.scss ================================================ .toggle-light-mode { /* default */ .icon-dark { display: none; } } html[data-darkmode="true"] { .toggle-light-mode { .icon-light { display: none; } .icon-dark { display: block; } } } ================================================ FILE: changedetectionio/static/styles/scss/parts/_diff_image.scss ================================================ body.processor-image_ssim_diff { #edit-text-filter { .text-filtering { display: none; } } #conditions-tab { display: none; } } ================================================ FILE: changedetectionio/static/styles/scss/parts/_edit.scss ================================================ ul#conditions_match_logic { list-style: none; input, label, li { display: inline-block; } li { padding-right: 1em; } } ================================================ FILE: changedetectionio/static/styles/scss/parts/_extra_browsers.scss ================================================ ul#requests-extra_browsers { list-style: none; /* tidy up the table to look more "inline" */ li { > label { display: none; } } /* each proxy entry is a `table` */ table { tr { display: table-row; // default display for small screens input[type=text] { width: 100%; } } } // apply inline display for larger screens @media only screen and (min-width: 1280px) { table { tr { display: inline; input[type=text] { width: 100%; } } } } } #extra-browsers-setting { border: 1px solid var(--color-grey-800); border-radius: 4px; margin: 1em; padding: 1em; } ================================================ FILE: changedetectionio/static/styles/scss/parts/_extra_proxies.scss ================================================ ul#requests-extra_proxies { list-style: none; /* tidy up the table to look more "inline" */ li { > label { display: none; } } /* each proxy entry is a `table` */ table { tr { display: table-row; // default display for small screens input[type=text] { width: 100%; } } } // apply inline display for large screens @media only screen and (min-width: 1024px) { table { tr { display: inline; } } } } #request { /* Auto proxy scan/checker */ label[for=proxy] { display: inline-block; } } body.proxy-check-active { #request { // Padding set by flex layout /* .proxy-status { width: 2em; } */ .proxy-check-details { font-size: 80%; color: #555; display: block; padding-left: 2em; max-width: 500px; } .proxy-timing { font-size: 80%; padding-left: 1rem; color: var(--color-link); } } } #recommended-proxy { display: grid; gap: 2rem; padding-bottom: 1em; @media (min-width: 991px) { grid-template-columns: repeat(2, 1fr); } > div { border: 1px #aaa solid; border-radius: 4px; padding: 1em; } } #extra-proxies-setting { border: 1px solid var(--color-grey-800); border-radius: 4px; margin: 1em; padding: 1em; } ================================================ FILE: changedetectionio/static/styles/scss/parts/_hamburger_menu.scss ================================================ // Hamburger Menu for Mobile Navigation @use "../settings" as *; .hamburger-menu { display: none; background: transparent; border: none; cursor: pointer; padding: 0.5rem; z-index: 10001; position: relative; @media only screen and (max-width: $desktop-wide-breakpoint) { display: flex; flex-direction: column; justify-content: center; align-items: center; } } .hamburger-icon { width: 24px; height: 20px; position: relative; display: flex; flex-direction: column; justify-content: space-between; span { display: block; height: 3px; width: 100%; background: var(--color-text); border-radius: 2px; transition: all 0.3s cubic-bezier(0.68, -0.55, 0.265, 1.55); transform-origin: center; } } .hamburger-menu.active { .hamburger-icon span:nth-child(1) { transform: translateY(8.5px) rotate(45deg); } .hamburger-icon span:nth-child(2) { opacity: 0; transform: translateX(-10px); } .hamburger-icon span:nth-child(3) { transform: translateY(-8.5px) rotate(-45deg); } } // Mobile menu overlay .mobile-menu-overlay { display: none; position: fixed; top: 0; left: 0; right: 0; bottom: 0; background: rgba(0, 0, 0, 0.5); z-index: 9999; opacity: 0; transition: opacity 0.3s ease; &.active { display: block; opacity: 1; } } // Mobile menu drawer .mobile-menu-drawer { position: fixed; top: 0; right: -280px; width: 280px; height: 100%; background: var(--color-background); opacity: 1; box-shadow: -2px 0 8px rgba(0, 0, 0, 0.15); z-index: 10000; transition: right 0.3s cubic-bezier(0.68, -0.55, 0.265, 1.55); overflow-y: auto; padding-top: 60px; &.active { right: 0; } .mobile-menu-items { list-style: none; padding: 1rem 0; margin: 0; li { border-bottom: 1px solid var(--color-border-table-cell); >* { display: block; padding: 1rem 1.5rem; color: var(--color-text); text-decoration: none; font-weight: 500; transition: background 0.2s ease; &:hover { background: var(--color-background-menu-link-hover); } } &#menu-pause, &#menu-mute { display: none; } } } } // Logo styling .logo-cdio { font-weight: bold; font-size: 1.1rem; .logo-cd { color: var(--color-grey-500); } .logo-io { color: var(--color-text); } } // Always visible items container .menu-always-visible { display: flex; align-items: center; gap: 0.5rem; margin-left: auto; } // Hide regular menu items on mobile (but not in mobile drawer) @media only screen and (max-width: $desktop-wide-breakpoint) { #top-right-menu .menu-collapsible { display: none !important; } .pure-menu-horizontal { overflow-x: visible !important; } #nav-menu { overflow-x: visible !important; } } // Desktop - hide mobile menu elements @media only screen and (min-width: 1025px) { .hamburger-menu, .mobile-menu-drawer, .mobile-menu-overlay { display: none !important; } } html[data-darkmode=true] { .mobile-menu-drawer { box-shadow: -2px 0 8px rgba(0, 0, 0, 0.4); } } ================================================ FILE: changedetectionio/static/styles/scss/parts/_language.scss ================================================ #language-selector-flag { display: inline-block; width: 1.2em; height: 1.2em; vertical-align: middle; border-radius: 50%; overflow: hidden; opacity: 0.6; &:hover { opacity: 1.0; } } // Language Selector Modal Styles .language-list { display: flex; flex-direction: column; gap: 0.5rem; padding: 0.5rem 0; } .language-option { display: flex; align-items: center; gap: 1rem; padding: 0.25rem; border-radius: 4px; transition: background-color 0.2s ease; text-decoration: none; color: var(--color-text); border: 1px solid transparent; &:hover { background-color: var(--color-background-menu-link-hover); border-color: var(--color-border-table-cell); } &.active { background-color: var(--color-link); color: var(--color-text-button); font-weight: 600; } .flag { font-size: 1.5rem; flex-shrink: 0; } .language-name { flex-grow: 1; font-size: 1rem; } } #language-modal { .language-list { .lang-option { display: inline-block; width: 1.5em; height: 1.5em; vertical-align: middle; margin-right: 0.5em; border-radius: 50%; overflow: hidden; } } } ================================================ FILE: changedetectionio/static/styles/scss/parts/_lister_extra.scss ================================================ .watch-table { &.favicon-not-enabled { tr { .favicon { display: none; } } } tr { /* make the icons and the text inline-ish */ td.inline.title-col { .flex-wrapper { display: flex; align-items: center; gap: 4px; } } } td, th { vertical-align: middle; } tr.has-favicon { &.unviewed { img.favicon { opacity: 1.0 !important; } } } .status-icons { white-space: nowrap; display: flex; align-items: center; /* Vertical centering */ gap: 4px; /* Space between image and text */ > * { vertical-align: middle; } } } .title-col { /* Optional, for spacing */ padding: 10px; } .title-wrapper { display: flex; align-items: center; /* Vertical centering */ gap: 10px; /* Space between image and text */ } /* Make sure .title-col-inner doesn't collapse or misalign */ .title-col-inner { display: inline-block; vertical-align: middle; } /* favicon styling */ .watch-table { img.favicon { vertical-align: middle; max-width: 25px; max-height: 25px; height: 25px; padding-right: 4px; } // Reserved for future use /* &.thumbnail-type-screenshot { tr.has-favicon { td.inline.title-col { img.thumbnail { background-color: #fff; !* fallback bg for SVGs without bg *! border-radius: 4px; !* subtle rounded corners *! border: 1px solid #ddd; !* light border for contrast *! box-shadow: 0 2px 6px rgba(0, 0, 0, 0.15); !* soft shadow *! filter: contrast(1.05) saturate(1.1) drop-shadow(0 0 0.5px rgba(0, 0, 0, 0.2)); object-fit: cover; !* crop/fill if needed *! opacity: 0.8; max-width: 30px; max-height: 30px; height: 30px; } } } }*/ } ================================================ FILE: changedetectionio/static/styles/scss/parts/_login_form.scss ================================================ // Modern Login Form - Friendly and Welcoming Design .login-form { min-height: 52vh; display: flex; align-items: center; justify-content: center; padding: 2rem 1rem; .inner { background: var(--color-background); border-radius: 16px; box-shadow: 0 10px 40px rgba(0, 0, 0, 0.08), 0 2px 8px rgba(0, 0, 0, 0.04); padding: 3rem 2.5rem; width: 100%; max-width: 420px; position: relative; overflow: hidden; transition: transform 0.3s ease, box-shadow 0.3s ease; &:hover { box-shadow: 0 15px 50px rgba(0, 0, 0, 0.12), 0 5px 15px rgba(0, 0, 0, 0.06); } } form { margin: 0; } fieldset { border: none; padding: 0; margin: 0; } .pure-control-group { margin-bottom: 1.75rem; &:last-of-type { margin-bottom: 0; margin-top: 2rem; } } label { display: block; margin-bottom: 0.5rem; font-weight: 600; font-size: 0.9rem; color: var(--color-text); letter-spacing: 0.01em; } input[type="password"] { width: 100%; padding: 0.875rem 1rem; border: 2px solid var(--color-grey-800); border-radius: 8px; font-size: 1rem; background: var(--color-background-input); color: var(--color-text-input); transition: all 0.2s ease; box-sizing: border-box; &:focus { outline: none; border-color: var(--color-link); box-shadow: 0 0 0 3px rgba(27, 152, 248, 0.1); transform: translateY(-1px); } &::placeholder { color: var(--color-text-input-placeholder); } } button[type="submit"] { width: 100%; padding: 0.875rem 1.5rem; font-size: 1rem; font-weight: 600; border-radius: 8px; border: none; background: var(--color-background-button-primary); color: var(--color-text-button); cursor: pointer; transition: all 0.2s ease; box-shadow: 0 2px 8px rgba(27, 152, 248, 0.2); &:hover { box-shadow: 0 4px 12px rgba(27, 152, 248, 0.3); background: #0066cc; } &:active { transform: translateY(0); box-shadow: 0 2px 4px rgba(27, 152, 248, 0.2); } } } // Messages styling for login page .content-main > ul.messages { position: fixed; top: 120px; left: 50%; transform: translateX(-50%); list-style: none; padding: 0; margin: 0; z-index: 1000; min-width: 300px; max-width: 500px; li { padding: 1rem 1.25rem; border-radius: 8px; font-size: 0.95rem; line-height: 1.5; font-weight: 500; box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15); animation: slideDown 0.3s ease-out; border: 2px solid transparent; &.error { background: #fee; border: 2px solid #ef4444; color: #991b1b; font-weight: 600; } &.success { background: #f0fdf4; border: 2px solid #10b981; color: #166534; } &.info, &.message { background: #eff6ff; border: 2px solid #3b82f6; color: #1e40af; } } } @keyframes slideDown { from { opacity: 0; transform: translateY(-20px); } to { opacity: 1; transform: translateY(0); } } // Dark mode adjustments html[data-darkmode="true"] { .login-form { .inner { box-shadow: 0 10px 40px rgba(0, 0, 0, 0.4), 0 2px 8px rgba(0, 0, 0, 0.2); &:hover { box-shadow: 0 15px 50px rgba(0, 0, 0, 0.5), 0 5px 15px rgba(0, 0, 0, 0.3); } } input[type="password"] { border-color: var(--color-grey-400); &:focus { border-color: var(--color-link); } } } .content-main > ul.messages { li { box-shadow: 0 4px 12px rgba(0, 0, 0, 0.4); &.error { background: #4a1d1d; border-color: #ef4444; color: #fca5a5; } &.success { background: #1a3a2a; border-color: #10b981; color: #86efac; } &.info, &.message { background: #1e3a5f; border-color: #3b82f6; color: #93c5fd; } } } } // Mobile adjustments @media only screen and (max-width: 768px) { .login-form { min-height: auto; padding: 1rem 0.5rem; padding-top: 5rem; // Space for error message .inner { padding: 2rem 1.5rem; border-radius: 12px; } } .content-main > ul.messages { top: 70px; // Higher up on mobile to avoid overlap left: 10px; right: 10px; transform: none; min-width: auto; } } ================================================ FILE: changedetectionio/static/styles/scss/parts/_love.scss ================================================ #overlay { opacity: 0.95; position: fixed; width: 350px; max-width: 100%; height: 100%; top: 0; right: -350px; background-color: var(--color-table-stripe); z-index: 2; transform: translateX(0); transition: transform .5s ease; &.visible { transform: translateX(-100%); } .content { font-size: 0.875rem; padding: 1rem; margin-top: 5rem; max-width: 400px; color: var(--color-watch-table-row-text); } } #heartpath { &:hover { fill: #ff0000 !important; transition: all ease 0.3s !important; } transition: all ease 0.3s !important; } ================================================ FILE: changedetectionio/static/styles/scss/parts/_menu.scss ================================================ .pure-menu-link { padding: 0.5rem 1em; line-height: 1.2rem; } #menu-mute, #menu-pause { padding-left: 0.3rem; padding-right: 0.3rem; img { height: 1.2rem; } } .pure-menu-item { svg { height: 1.2rem; } * { vertical-align: middle; } .github-link { height: 1.8rem; display: block; svg { height: 100%; } } .bi-heart { &:hover { cursor: pointer; } } // Active menu item styling &.active { .pure-menu-link { background-color: var(--color-background-menu-link-hover); color: var(--color-text-menu-link-hover); } } } #cdio-logo { padding-left: 0.5em; } #inline-menu-extras-group { >* { display: inline-block; } } ================================================ FILE: changedetectionio/static/styles/scss/parts/_minitabs.scss ================================================ .minitabs-wrapper { width: 100%; > div[id] { padding: 20px; border: 1px solid #ccc; border-top: none; } .minitabs-content { width: 100%; display: flex; > div { flex: 1 1 auto; min-width: 0; overflow: scroll; } } .minitabs { display: flex; border-bottom: 1px solid #ccc; } .minitab { flex: 1; text-align: center; padding: 12px 0; text-decoration: none; color: #333; background-color: #f1f1f1; border: 1px solid #ccc; border-bottom: none; cursor: pointer; transition: background-color 0.3s; } .minitab:hover { background-color: #ddd; } .minitab.active { background-color: #fff; font-weight: bold; } } ================================================ FILE: changedetectionio/static/styles/scss/parts/_modal.scss ================================================ /** * Modal dialog styles using HTML5 element * Provides modern, accessible confirmation dialogs */ .modal-dialog { border: none; border-radius: 10px; padding: 0; background: var(--color-background); color: var(--color-text); box-shadow: 0 5px 20px rgba(0, 0, 0, 0.3); max-width: 500px; width: 90%; &::backdrop { background: rgba(0, 0, 0, 0.6); backdrop-filter: blur(3px); animation: fadeIn 0.2s ease-out; } &[open] { animation: slideIn 0.25s ease-out; } .modal-header { padding: 1.5rem; border-bottom: 1px solid var(--color-border-table-cell); display: flex; align-items: center; gap: 1rem; .modal-icon { font-size: 2rem; line-height: 1; flex-shrink: 0; &.warning { color: var(--color-warning); } &.danger { color: var(--color-background-button-error); } &.info { color: var(--color-background-button-primary); } } .modal-title { font-size: 1.3rem; font-weight: bold; margin: 0; color: var(--color-text); } } .modal-body { padding: 1.5rem; line-height: 1.6; p { margin: 0 0 1rem 0; &:last-child { margin-bottom: 0; } } strong { color: var(--color-text); font-weight: 600; } } .modal-footer { padding: 1rem 1.5rem; border-top: 1px solid var(--color-border-table-cell); display: flex; gap: 0.75rem; justify-content: flex-end; background: var(--color-grey-900); button { padding: 0.6rem 1.5rem; border: none; border-radius: 4px; cursor: pointer; font-weight: 500; transition: all 0.2s ease; font-size: 0.95rem; &:hover { transform: translateY(-1px); box-shadow: 0 2px 8px rgba(0, 0, 0, 0.15); } &:active { transform: translateY(0); } &.modal-btn-cancel { background: var(--color-background-button-cancel); color: var(--color-grey-200); &:hover { background: var(--color-grey-700); } } &.modal-btn-confirm { background: var(--color-background-button-primary); color: var(--color-white); &:hover { opacity: 0.9; } } &.modal-btn-danger { background: var(--color-background-button-error); color: var(--color-white); &:hover { background: var(--color-dark-red); } } &.modal-btn-warning { background: var(--color-background-button-warning); color: var(--color-white); &:hover { opacity: 0.9; } } } } } // Dark mode adjustments html[data-darkmode="true"] { .modal-dialog { box-shadow: 0 5px 30px rgba(0, 0, 0, 0.7); .modal-footer { background: var(--color-grey-200); } } } // Animations @keyframes fadeIn { from { opacity: 0; } to { opacity: 1; } } @keyframes slideIn { from { opacity: 0; transform: translateY(-20px) scale(0.95); } to { opacity: 1; transform: translateY(0) scale(1); } } // Mobile responsive @media only screen and (max-width: 760px) { .modal-dialog { width: 95%; max-width: none; .modal-header { padding: 1rem; .modal-title { font-size: 1.1rem; } } .modal-body { padding: 1rem; font-size: 0.95rem; } .modal-footer { padding: 0.75rem 1rem; flex-wrap: wrap; button { flex: 1; min-width: 120px; } } } } ================================================ FILE: changedetectionio/static/styles/scss/parts/_notification_bubble.scss ================================================ // Reusable notification bubble for action sidebar icons .action-sidebar-item { position: relative; .notification-bubble { position: absolute; top: 8px; left: 8px; min-width: 18px; height: 18px; background: #ff4444; color: #fff; font-size: 10px; font-weight: 700; line-height: 18px; text-align: center; border-radius: 9px; padding: 0 2px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.3); pointer-events: none; transition: all 0.2s ease; display: none; // Red bubble for errors/urgent &.red-bubble { background: #ff4444; } // Blue bubble for informational &.blue-bubble { background: #4a9eff; color: #fff; } &.visible { display: block; } // Pulse animation when value changes &.pulse { animation: bubblePulse 0.4s ease-out; } // Large numbers get smaller font &.large-number { font-size: 8px; min-width: 20px; height: 20px; line-height: 20px; border-radius: 10px; } } } @keyframes bubblePulse { 0% { transform: scale(1); } 50% { transform: scale(1.3); } 100% { transform: scale(1); } } // Dark mode adjustments html[data-darkmode=true] { .notification-bubble { box-shadow: 0 2px 6px rgba(0, 0, 0, 0.6); } } ================================================ FILE: changedetectionio/static/styles/scss/parts/_pagination.scss ================================================ .pagination-page-info { text-transform: capitalize; } .pagination.menu { > * { display: inline-block; } li { display: inline-block; } a { padding: 0.65rem; margin: 3px; border: none; background: #444; border-radius: 2px; color: var(--color-text-button); &.disabled { display: none; } &.active { font-weight: bold; background: #888; } &:hover { background: #999; } } } ================================================ FILE: changedetectionio/static/styles/scss/parts/_preview_text_filter.scss ================================================ @use "minitabs"; body.preview-text-enabled { @media (min-width: 800px) { #filters-and-triggers > div { display: flex; /* Establishes Flexbox layout */ gap: 20px; /* Adds space between the columns */ position: relative; /* Ensures the sticky positioning is relative to this parent */ } } /* layout of the page */ #edit-text-filter, #text-preview { flex: 1; /* Each column takes an equal amount of available space */ align-self: flex-start; /* Aligns the right column to the start, allowing it to maintain its content height */ } #edit-text-filter { #pro-tips { display: none; } } #text-preview { position: sticky; top: 20px; padding-top: 1rem; padding-bottom: 1rem; display: block !important; } #activate-text-preview { background-color: var(--color-grey-500); } /* actual preview area */ .monospace-preview { background: var(--color-background-input); border: 1px solid var(--color-grey-600); padding: 1rem; color: var(--color-text-input); font-family: "Courier New", Courier, monospace; /* Sets the font to a monospace type */ font-size: 70%; word-break: break-word; white-space: pre-wrap; /* Preserves whitespace and line breaks like
 */
  }
}

#activate-text-preview {
  right: 0;
  position: absolute;
  z-index: 3;
  box-shadow: 1px 1px 4px var(--color-shadow-jump);
}


================================================
FILE: changedetectionio/static/styles/scss/parts/_search_modal.scss
================================================
// Search Modal Styles

#search-modal {
  .modal-body {
    padding: 2rem 1.5rem;

    .pure-control-group {
      padding-bottom: 0;

      label {
        display: block;
        margin-bottom: 0.5rem;
        font-size: 0.9rem;
        font-weight: 600;
        color: var(--color-text);
      }

      #search-modal-input {
        width: 100%;
        max-width: 100%;
        box-sizing: border-box;
        padding: 0.6rem 0.8rem;
        font-size: 1rem;
        border: 1px solid var(--color-border-input);
        border-radius: 4px;
        background-color: var(--color-background-input);
        color: var(--color-text-input);
        box-shadow: inset 0 1px 3px var(--color-shadow-input);
        transition: border-color 0.2s ease, box-shadow 0.2s ease;

        &:focus {
          outline: none;
          border-color: var(--color-link);
          box-shadow: 0 0 0 3px rgba(27, 152, 248, 0.1);
        }

        &::placeholder {
          color: var(--color-text-input-placeholder);
          opacity: 0.7;
        }
      }
    }
  }
}

// Dark mode adjustments
html[data-darkmode=true] {
  #search-modal {
    #search-modal-input {
      &:focus {
        box-shadow: 0 0 0 3px rgba(89, 189, 251, 0.15);
      }
    }
  }
}


================================================
FILE: changedetectionio/static/styles/scss/parts/_socket.scss
================================================
// Styles for Socket.IO real-time updates
body.checking-now {
  #checking-now-fixed-tab {
    display: block !important;
  }
}

#checking-now-fixed-tab {
  background: #ccc;
  border-radius: 5px;
  bottom: 0;
  color: var(--color-text);
  display: none;
  font-size: 0.8rem;
  left: 0;
  padding: 5px;
  position: fixed;
}






================================================
FILE: changedetectionio/static/styles/scss/parts/_spinners.scss
================================================

/* spinner */
.spinner,
.spinner:after {
  border-radius: 50%;
  width: 10px;
  height: 10px;
}
.spinner {
  margin: 0px auto;
  font-size: 3px;
  vertical-align: middle;
  display: inline-block;
  text-indent: -9999em;
  border-top: 1.1em solid rgba(38,104,237, 0.2);
  border-right: 1.1em solid rgba(38,104,237, 0.2);
  border-bottom: 1.1em solid rgba(38,104,237, 0.2);
  border-left: 1.1em solid #2668ed;
  -webkit-transform: translateZ(0);
  -ms-transform: translateZ(0);
  transform: translateZ(0);
  -webkit-animation: load8 1.1s infinite linear;
  animation: load8 1.1s infinite linear;
}
@-webkit-keyframes load8 {
  0% {
    -webkit-transform: rotate(0deg);
    transform: rotate(0deg);
  }
  100% {
    -webkit-transform: rotate(360deg);
    transform: rotate(360deg);
  }
}
@keyframes load8 {
  0% {
    -webkit-transform: rotate(0deg);
    transform: rotate(0deg);
  }
  100% {
    -webkit-transform: rotate(360deg);
    transform: rotate(360deg);
  }
}

================================================
FILE: changedetectionio/static/styles/scss/parts/_tabs.scss
================================================
body.wrapped-tabs {
  .tabs {
    ul {
      grid-template-columns: repeat(auto-fill, minmax(var(--tab-width, 180px), 1fr));
      grid-auto-flow: row;
      grid-auto-columns: unset;
      gap: 0;
      column-gap: 5px;
    }

    ul li {
      border-radius: 0;
    }
  }
}

.tabs {
  ul {
    margin: 0px;
    padding: 0px;
    display: grid;
    grid-auto-flow: column;
    grid-auto-columns: max-content;
    gap: 5px;
    list-style: none;

    li {
      white-space: nowrap;
      color: var(--color-text-tab);
      border-top-left-radius: 5px;
      border-top-right-radius: 5px;
      background-color: var(--color-background-tab);

      &:not(.active) {
        &:hover {
          background-color: var(--color-background-tab-hover);
        }
      }

      &.active,
      :target {
        background-color: var(--color-background);

        a {
          color: var(--color-text-tab-active);
          font-weight: bold;
        }
      }

      a {
        display: block;
        padding: 0.7em;
        color: var(--color-text-tab);
      }
    }
  }
}


================================================
FILE: changedetectionio/static/styles/scss/parts/_toast.scss
================================================
// Toast Notification System
// Modern, animated toast notifications

.toast-container {
  position: fixed;
  display: flex;
  flex-direction: column;
  gap: 0.75rem;
  pointer-events: none;
  z-index: 10000;

  // Positioning
  &.toast-top-right {
    top: 20px;
    right: 20px;
  }

  &.toast-top-center {
    top: 100px;
    left: 50%;
    transform: translateX(-50%);
  }

  &.toast-top-left {
    top: 20px;
    left: 20px;
  }

  &.toast-bottom-right {
    bottom: 20px;
    right: 20px;
  }

  &.toast-bottom-center {
    bottom: 20px;
    left: 50%;
    transform: translateX(-50%);
  }

  &.toast-bottom-left {
    bottom: 20px;
    left: 20px;
  }
}

.toast {
  position: relative;
  display: flex;
  align-items: center;
  gap: 0.75rem;
  min-width: 300px;
  max-width: 500px;
  padding: 1rem 1.25rem;
  background: var(--color-background);
  border-radius: 8px;
  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15), 0 0 0 1px rgba(0, 0, 0, 0.05);
  pointer-events: auto;
  overflow: hidden;
  opacity: 0;
  transform: translateY(-50px);
  transition: all 0.3s cubic-bezier(0.68, -0.55, 0.265, 1.55);
  font-family: inherit;

  &.toast-show {
    opacity: 1;
    transform: translateY(0);
  }

  &.toast-hide {
    opacity: 0;
    transform: translateY(-50px) scale(0.95);
  }

  // Toast types
  &.toast-success {
    border-left: 4px solid #10b981;

    .toast-icon {
      color: #10b981;
    }
  }

  &.toast-error {
    border-left: 4px solid #ef4444;

    .toast-icon {
      color: #ef4444;
    }
  }

  &.toast-warning {
    border-left: 4px solid #f59e0b;

    .toast-icon {
      color: #f59e0b;
    }
  }

  &.toast-info {
    border-left: 4px solid #3b82f6;

    .toast-icon {
      color: #3b82f6;
    }
  }

  &.toast-default {
    border-left: 4px solid var(--color-grey-500);
  }
}

.toast-icon {
  flex-shrink: 0;
  width: 24px;
  height: 24px;

  svg {
    width: 100%;
    height: 100%;
  }
}

.toast-message {
  flex: 1;
  font-size: 0.875rem;
  line-height: 1.5;
  color: var(--color-text);
  word-break: break-word;
  font-family: inherit;
}

.toast-close {
  flex-shrink: 0;
  width: 24px;
  height: 24px;
  display: flex;
  align-items: center;
  justify-content: center;
  background: transparent;
  border: none;
  border-radius: 4px;
  color: var(--color-grey-500);
  font-size: 1.5rem;
  line-height: 1;
  cursor: pointer;
  transition: all 0.2s ease;
  padding: 0;
  margin-left: 0.25rem;

  &:hover {
    background: var(--color-grey-800);
    color: var(--color-text);
  }

  &:active {
    transform: scale(0.95);
  }
}

.toast-progress {
  position: absolute;
  bottom: 0;
  left: 0;
  right: 0;
  height: 3px;
  background: currentColor;
  opacity: 0.3;
  transform-origin: left;
  transition: transform linear;
}

// Dark mode adjustments
html[data-darkmode=true] {
  .toast {
    background: var(--color-grey-300);
    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.4), 0 0 0 1px rgba(255, 255, 255, 0.05);
  }

  .toast-close:hover {
    background: var(--color-grey-400);
  }
}

// Mobile adjustments
@media only screen and (max-width: 768px) {
  .toast-container {
    left: 50% !important;
    right: auto !important;
    top: 80px !important;
    transform: translateX(-50%) !important;
    align-items: center;

    &.toast-bottom-right,
    &.toast-bottom-center,
    &.toast-bottom-left {
      top: auto !important;
      bottom: 80px !important;
    }
  }

  .toast {
    min-width: auto;
    max-width: none;
    width: 80vw;
    transform: translateY(-100px);

    &.toast-show {
      transform: translateY(0);
    }

    &.toast-hide {
      transform: translateY(-100px) scale(0.95);
    }
  }
}

// Accessibility
@media (prefers-reduced-motion: reduce) {
  .toast {
    transition: opacity 0.2s ease;
    transform: none !important;

    &.toast-show {
      opacity: 1;
    }

    &.toast-hide {
      opacity: 0;
    }
  }
}


================================================
FILE: changedetectionio/static/styles/scss/parts/_variables.scss
================================================
/**
 * CSS custom properties (aka variables).
 */

:root {
  --color-white: #fff;
  --color-grey-50: #111;
  --color-grey-100: #262626;
  --color-grey-200: #333;
  --color-grey-300: #444;
  --color-grey-325: #555;
  --color-grey-350: #565d64;
  --color-grey-400: #666;
  --color-grey-500: #777;
  --color-grey-600: #999;
  --color-grey-700: #cbcbcb;
  --color-grey-750: #ddd;
  --color-grey-800: #e0e0e0;
  --color-grey-850: #eee;
  --color-grey-900: #f2f2f2;
  --color-black: #000;
  --color-dark-red: #a00;
  --color-light-red: #dd0000;

  --color-background-page: var(--color-grey-100);
  --color-background-gradient-first: #5ad8f7;
  --color-background-gradient-second: #2f50af;
  --color-background-gradient-third: #9150bf;
  --color-background: var(--color-white);
  --color-text: var(--color-grey-200);
  --color-link: #1b98f8;
  --color-menu-accent: #ed5900;
  --color-background-code: var(--color-grey-850);
  --color-error: var(--color-dark-red);
  --color-error-input: #ffebeb;
  --color-error-list: var(--color-light-red);
  --color-table-background: var(--color-background);
  --color-table-stripe: var(--color-grey-900);
  --color-text-tab: var(--color-white);
  --color-background-tab: rgba(255, 255, 255, 0.2);
  --color-background-tab-hover: rgba(255, 255, 255, 0.5);
  --color-text-tab-active: #222;
  --color-api-key: #0078e7;

  --color-background-button-primary: #0078e7;
  --color-background-button-green: #42dd53;
  --color-background-button-red: #dd4242;
  --color-background-button-success: rgb(28, 184, 65);
  --color-background-button-error: rgb(202, 60, 60);
  --color-text-button-error: var(--color-white);
  --color-background-button-warning: rgb(202, 60, 60);
  --color-text-button-warning: var(--color-white);
  --color-background-button-secondary: rgb(66, 184, 221);
  --color-background-button-cancel: rgb(200, 200, 200);
  --color-text-button: var(--color-white);
  --color-background-button-tag: rgb(99, 99, 99);
  --color-background-snapshot-age: #dfdfdf;
  --color-error-text-snapshot-age: var(--color-white);
  --color-error-background-snapshot-age: #ff0000;
  --color-background-button-tag-active: #9c9c9c;

  --color-text-messages: var(--color-white);
  --color-background-messages-message: rgba(255, 255, 255, .2);
  --color-background-messages-error: rgba(255, 1, 1, .5);
  --color-background-messages-notice: rgba(255, 255, 255, .5);
  --color-border-notification: #ccc;

  --color-background-checkbox-operations: rgba(0, 0, 0, 0.05);
  --color-warning: #ff3300;
  --color-border-warning: var(--color-warning);
  --color-text-legend: var(--color-white);

  --color-link-new-version: #e07171;
  --color-last-checked: #bbb;
  --color-text-footer: #444;
  --color-border-watch-table-cell: #eee;

  --color-text-watch-tag-list: rgba(231, 0, 105, 0.4);
  --color-background-new-watch-form: rgba(0, 0, 0, 0.05);
  --color-background-new-watch-input: var(--color-white);
  --color-background-new-watch-input-transparent: rgba(255, 255, 255, 0.1);
  --color-text-new-watch-input: var(--color-text);

  --color-border-input: var(--color-grey-500);
  --color-shadow-input: var(--color-grey-400);
  --color-background-input: var(--color-white);
  --color-text-input: var(--color-text);
  --color-text-input-description: var(--color-grey-500);
  --color-text-input-placeholder: var(--color-grey-600);

  --color-background-table-thead: var(--color-grey-800);
  --color-border-table-cell: var(--color-grey-700);

  --color-text-menu-heading: var(--color-grey-350);
  --color-text-menu-link: var(--color-grey-500);
  --color-background-menu-link-hover: var(--color-grey-850);
  --color-text-menu-link-hover: var(--color-grey-300);

  --color-shadow-jump: var(--color-grey-500);
  --color-icon-github: var(--color-black);
  --color-icon-github-hover: var(--color-grey-300);

  --color-watch-table-error: var(--color-dark-red);
  --color-watch-table-row-text: var(--color-grey-100);

  --highlight-trigger-text-bg-color: #1b98f8;
  --highlight-ignored-text-bg-color: var(--color-grey-700);
  --highlight-blocked-text-bg-color: rgb(202, 60, 60);
}

html[data-darkmode="true"] {
  --color-link: #59bdfb;
  --color-text: var(--color-white);

  --color-background-gradient-first: #3f90a5;
  --color-background-gradient-second: #1e316c;
  --color-background-gradient-third: #4d2c64;

  --color-background-new-watch-input: var(--color-grey-100);
  --color-background-new-watch-input-transparent: var(--color-grey-100);
  --color-text-new-watch-input: var(--color-text);
  --color-background-table-thead: var(--color-grey-200);
  --color-table-background: var(--color-grey-300);
  --color-table-stripe: var(--color-grey-325);
  --color-background: var(--color-grey-300);
  --color-text-menu-heading: var(--color-grey-850);
  --color-text-menu-link: var(--color-grey-800);
  --color-border-table-cell: var(--color-grey-400);
  --color-text-tab-active: var(--color-text);

  --color-border-input: var(--color-grey-400);
  --color-shadow-input: var(--color-grey-50);
  --color-background-input: var(--color-grey-350);
  --color-text-input-description: var(--color-grey-600);
  --color-text-input-placeholder: var(--color-grey-600);
  --color-text-watch-tag-list: rgba(250, 62, 146, 0.4);
  --color-background-code: var(--color-grey-200);

  --color-background-tab: rgba(0, 0, 0, 0.2);
  --color-background-tab-hover: rgba(0, 0, 0, 0.5);

  --color-background-snapshot-age: var(--color-grey-200);
  --color-shadow-jump: var(--color-grey-200);
  --color-icon-github: var(--color-white);
  --color-icon-github-hover: var(--color-grey-700);
  --color-watch-table-error: var(--color-light-red);
  --color-watch-table-row-text: var(--color-grey-800);


  .icon-spread {
    filter: hue-rotate(-10deg) brightness(1.5);
  }

  .watch-table {

    .title-col a[target="_blank"]::after,
    .current-diff-url::after {
      filter: invert(.5) hue-rotate(10deg) brightness(2);
    }

    .status-browsersteps {
      filter: invert(.5) hue-rotate(10deg) brightness(1.5);
    }

    .watch-controls {
      .state-off {
        img {
          opacity: 0.3;
        }
      }
      .state-on {
        img {
          opacity: 1.0;
        }
      }
    }

    .unviewed {
      color: #fff;
      &.error {
        color: var(--color-watch-table-error);
      }
    }
  }
}


================================================
FILE: changedetectionio/static/styles/scss/parts/_visualselector.scss
================================================

#selector-wrapper {
  height: 100%;
  text-align: center;
  
  max-height: 70vh;
  overflow-y: scroll;
  position: relative;

  //width: 100%;
  >img {
    position: absolute;
    z-index: 4;
    max-width: 100%;
  }

  >canvas {
    position: relative;
    z-index: 5;
    max-width: 100%;

    &:hover {
      cursor: pointer;
    }
  }
}

#selector-current-xpath {
  font-size: 80%;
}

================================================
FILE: changedetectionio/static/styles/scss/parts/_watch_table-mobile.scss
================================================
$grid-col-checkbox: 20px;
$grid-col-watch: 100px;
$grid-gap: 0.5rem;


@media (max-width: 767px) {

  /*
  Max width before this PARTICULAR table gets nasty
  This query will take effect for any screen smaller than 760px
  and also iPads specifically.
  */
  .watch-table {
    /* make headings work on mobile */
    thead {
      display: block;

      tr {
        th {
          display: inline-block;
          // Hide the "Last" text for smaller screens
          @media (max-width: 768px) {
            .hide-on-mobile {
              display: none;
            }
          }
        }
      }

      .empty-cell {
        display: none;
      }
    }


    .last-checked {
      margin-left: calc($grid-col-checkbox + $grid-gap);

      > span {
        vertical-align: middle;
      }
    }

    .last-changed {
      margin-left: calc($grid-col-checkbox + $grid-gap);
    }

    .last-checked::before {
      color: var(--color-text);
      content: "Last Checked ";
    }

    .last-changed::before {
      color: var(--color-text);
      content: "Last Changed ";
    }

    /* Force table to not be like tables anymore */
    td.inline {
      display: inline-block;
    }

    .pure-table td,
    .pure-table th {
      border: none;
    }

    td {
      /* Behave  like a "row" */
      border: none;
      border-bottom: 1px solid var(--color-border-watch-table-cell);
      vertical-align: middle;

      &:before {
        /* Top/left values mimic padding */
        top: 6px;
        left: 6px;
        width: 45%;
        padding-right: 10px;
        white-space: nowrap;
      }
    }

    &.pure-table-striped {
      tr {
        background-color: var(--color-table-background);
      }

      tr:nth-child(2n-1) {
        background-color: var(--color-table-stripe);
      }

      tr:nth-child(2n-1) td {
        background-color: inherit;
      }
    }
  }
}

@media (max-width: 767px) {
  .watch-table {
    tbody {
      tr {
        padding-bottom: 10px;
        padding-top: 10px;
        display: grid;
        grid-template-columns: $grid-col-checkbox 1fr $grid-col-watch;
        grid-template-rows: auto auto auto auto;
        gap: $grid-gap;

        .counter-i {
          display: none;
        }

        td.checkbox-uuid {
          display: grid;
          place-items: center;
        }

        td.inline {
          /* display: block !important;;*/
        }

        > td {
          border-bottom: none;
        }

        // Empty state message - span full width on mobile
        > td[colspan] {
          grid-column: 1 / -1;
        }

        > td.title-col {
          grid-column: 1 / -1;
          grid-row: 1;
          .watch-title {
            font-size: 0.92rem;
          }
          .link-spread {
            display: none;
          }
        }

        > td.last-checked {
          grid-column: 1 / -1;
          grid-row: 2;
        }

        > td.last-changed {
          grid-column: 1 / -1;
          grid-row: 3;
        }

        > td.checkbox-uuid {
          grid-column: 1;
          grid-row: 4;
        }

        > td.buttons {
          grid-column: 2;
          grid-row: 4;
          display: flex;
          align-items: center;
          justify-content: flex-start;
        }

        > td.watch-controls {
          grid-column: 3;
          grid-row: 4;
          display: grid;
          place-items: center;

          a img {
            padding: 10px;
          }
        }
      }
    }
  }
  .pure-table td {
    padding: 3px !important;
  }
}

================================================
FILE: changedetectionio/static/styles/scss/parts/_watch_table.scss
================================================
/* table related */
#stats_row {
  display: flex;
  align-items: center;
  width: 100%;
  color: #fff;
  font-size: 0.85rem;
  >* {
    padding-bottom: 0.5rem;
  }
  .left {
    text-align: left;
  }

  .right {
    opacity: 0.5;
    transition: opacity 0.6s ease;
    margin-left: auto; /* pushes it to the far right */
    text-align: right;
  }
}
body.has-queue {
  #stats_row {
    .right {
      opacity: 1.0;
    }
  }
}

.watch-table {
  width: 100%;
  font-size: 80%;

  tr {
    &.unviewed {
      font-weight: bold;
    }

    color: var(--color-watch-table-row-text);
  }


  td {
    white-space: nowrap;

    &.title-col {
      word-break: break-all;
      white-space: normal;
    }

    a.external::after {
      content: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAQElEQVR42qXKwQkAIAxDUUdxtO6/RBQkQZvSi8I/pL4BoGw/XPkh4XigPmsUgh0626AjRsgxHTkUThsG2T/sIlzdTsp52kSS1wAAAABJRU5ErkJggg==);
      margin: 0 3px 0 5px;
    }

  }


  th {
    white-space: nowrap;

    a {
      font-weight: normal;

      &.active {
        font-weight: bolder;
      }

      &.inactive {
        .arrow {
          display: none;
        }
      }
    }
  }

  /* Row with 'checking-now' */
  tr.checking-now {
    td:first-child {
      position: relative;
    }

    td:first-child::before {
      content: "";
      position: absolute;
      top: 0;
      bottom: 0;
      left: 0;
      width: 3px;
      background-color: #293eff;
    }

    td.last-checked {
      .spinner-wrapper {
        display: inline-block !important;
      }

      .innertext {
        display: none !important;
      }
    }
  }

  tr.queued {
    a.recheck {
      display: none !important;
    }

    a.already-in-queue-button {
      display: inline-block !important;
    }
  }

  tr.paused {
    a.pause-toggle {
      &.state-on {
        display: inline !important;
      }

      &.state-off {
        display: none !important;
      }
    }
  }

  tr.notification_muted {
    a.mute-toggle {
      &.state-on {
        display: inline !important;
      }

      &.state-off {
        display: none !important;
      }
    }
  }


  tr.has-error {
    color: var(--color-watch-table-error);

    .error-text {
      display: block !important;
    }
  }

  tr.single-history {
    a.preview-link {
      display: inline-block !important;
    }
  }

  tr.multiple-history {
    a.history-link {
      display: inline-block !important;
    }
  }


}

#watch-table-wrapper {
  /* general styling */
  #post-list-buttons {
    text-align: right;
    padding: 0px;
    margin: 0px;

    li {
      display: inline-block;
    }

    a {
      border-top-left-radius: initial;
      border-top-right-radius: initial;
      border-bottom-left-radius: 5px;
      border-bottom-right-radius: 5px;
    }
  }

  /* post list dynamically on/off stuff */

  &.has-error {
    #post-list-buttons {
      #post-list-with-errors {
        display: inline-block !important;
      }
    }
  }

  &.has-unread-changes {
    #post-list-buttons {
      #post-list-unread, #post-list-mark-views, #post-list-unread {
        display: inline-block !important;
      }
    }
  }
}


================================================
FILE: changedetectionio/static/styles/scss/parts/_widgets.scss
================================================

// Ternary radio button group component
.ternary-radio-group {
  display: flex;
  gap: 0;
  border: 1px solid var(--color-grey-750);
  border-radius: 4px;
  overflow: hidden;
  width: fit-content;
  background: var(--color-background);

  .ternary-radio-option {
    position: relative;
    cursor: pointer;
    margin: 0;
    display: flex;
    align-items: center;

    input[type="radio"] {
      position: absolute;
      opacity: 0;
      width: 0;
      height: 0;
    }

    .ternary-radio-label {
      padding: 8px 16px;
      background: var(--color-grey-900);
      border: none;
      border-right: 1px solid var(--color-grey-750);
      font-size: 13px;
      font-weight: 500;
      color: var(--color-text);
      transition: all 0.2s ease;
      cursor: pointer;
      display: block;
      text-align: center;
    }

    &:last-child .ternary-radio-label {
      border-right: none;
    }

    input:checked + .ternary-radio-label {
      background: var(--color-link);
      color: var(--color-text-button);
      font-weight: 600;

      &.ternary-default {
        background: var(--color-grey-600);
        color: var(--color-text-button);
      }

      &:hover {
        background: #1a7bc4;

        &.ternary-default {
          background: var(--color-grey-500);
        }
      }
    }

    &:hover .ternary-radio-label {
      background: var(--color-grey-800);
    }
  }

  @media (max-width: 480px) {
    width: 100%;

    .ternary-radio-label {
      flex: 1;
      min-width: auto;
    }
  }
}

// Standard radio button styling
input[type="radio"].pure-radio:checked + label,
input[type="radio"].pure-radio:checked {
  background: var(--color-link);
  color: var(--color-text-button);
}

html[data-darkmode="true"] {
  .ternary-radio-group {
    .ternary-radio-option {
      .ternary-radio-label {
        background: var(--color-grey-350);
      }

      &:hover .ternary-radio-label {
        background: var(--color-grey-400);
      }

      input:checked + .ternary-radio-label {
        background: var(--color-link);
        color: var(--color-text-button);

        &.ternary-default {
          background: var(--color-grey-600);
        }

        &:hover {
          background: #1a7bc4;

          &.ternary-default {
            background: var(--color-grey-500);
          }
        }
      }
    }
  }
}

================================================
FILE: changedetectionio/static/styles/scss/styles.scss
================================================
/*
 * -- BASE STYLES --
 */

@use "settings" as *;
@use "parts/variables";
@use "parts/arrows";
@use "parts/browser-steps";
@use "parts/extra_proxies";
@use "parts/extra_browsers";
@use "parts/pagination";
@use "parts/spinners";
@use "parts/darkmode";
@use "parts/menu";
@use "parts/love";
@use "parts/preview_text_filter";
@use "parts/watch_table";
@use "parts/watch_table-mobile";
@use "parts/edit";
@use "parts/conditions_table";
@use "parts/lister_extra";
@use "parts/socket";
@use "parts/visualselector";
@use "parts/widgets";
@use "parts/diff_image";
@use "parts/modal";
@use "parts/language";
@use "parts/action_sidebar";
@use "parts/hamburger_menu";
@use "parts/search_modal";
@use "parts/notification_bubble";
@use "parts/toast";
@use "parts/login_form";
@use "parts/tabs";

// Smooth transitions for theme switching
body,
.pure-table,
.pure-table thead,
.pure-table td,
.pure-table th,
.pure-form input,
.pure-form textarea,
.pure-form select,
.edit-form .inner,
.pure-menu-horizontal,
footer,
.sticky-tab,
#diff-jump,
.button-tag,
#new-watch-form,
#new-watch-form input:not(.pure-button),
code,
.messages li,
#checkbox-operations,
.inline-warning,
a,
.watch-controls img {
  transition: color 0.4s ease, background-color 0.4s ease, background 0.4s ease, border-color 0.4s ease, box-shadow 0.4s ease;
}

body {
  color: var(--color-text);
  background: var(--color-background-page);
  font-family: Helvetica Neue, Helvetica, Lucida Grande, Arial, Ubuntu, Cantarell, Fira Sans, sans-serif;
}

.visually-hidden {
  clip: rect(0 0 0 0);
  clip-path: inset(50%);
  height: 1px;
  overflow: hidden;
  position: absolute;
  white-space: nowrap;
  width: 1px;
}

// Row icons like chrome, pdf, share, etc
.status-icon {
  display: inline-block;
  height: 1rem;
  vertical-align: middle;
}

.pure-table-even {
  background: var(--color-background);
}

/* Some styles from https://css-tricks.com/ */
a {
  text-decoration: none;
  color: var(--color-link);
}

a.github-link {
  color: var(--color-icon-github);
  margin: 0 1rem 0 0.5rem;

  svg {
    fill: currentColor;
  }

  &:hover {
    color: var(--color-icon-github-hover);
  }
}

#search-result-info {
  color: #fff;
}

button.toggle-button {
  vertical-align: middle;
  background: transparent;
  border: none;
  cursor: pointer;

  color: var(--color-icon-github);

  &:hover {
    color: var(--color-icon-github-hover);
  }

  svg {
    fill: currentColor;
  }

  .icon-light {
    display: block;
  }


}

.pure-menu-horizontal {
  background: var(--color-background);
  padding: 5px;
  display: flex;
  justify-content: space-between;
  align-items: center;
}

#pure-menu-horizontal-spinner {
  height: 3px;
  background: linear-gradient(-75deg, #ff6000, #ff8f00, #ffdd00, #ed0000);
  background-size: 400% 400%;
  width: 100%;
  animation: gradient 200s ease infinite;
}

body.spinner-active {
  #pure-menu-horizontal-spinner {
    animation: gradient 1s ease infinite;
  }
}

@keyframes gradient {
	0% {
		background-position: 0% 50%;
	}
	50% {
		background-position: 100% 50%;
	}
	100% {
		background-position: 0% 50%;
	}
}
.pure-menu-heading {
  color: var(--color-text-menu-heading);
}

.pure-menu-link {
  color: var(--color-text-menu-link);

  &:hover {
    background-color: var(--color-background-menu-link-hover);
    color: var(--color-text-menu-link-hover);
  }
}


.tab-pane-inner {
  // .tab-pane-inner will have the #id that the tab button jumps/anchors to
  scroll-margin-top: 200px;
}

section.content {
  @media only screen and (max-width: $desktop-wide-breakpoint) {
    padding-top: 80px;
  }
  @media only screen and (min-width: $desktop-wide-breakpoint) {
    padding-top: 100px;
  }

  padding-bottom: 1em;
  flex-direction: column;
  display: flex;
  align-items: center;
  justify-content: center;
}

code {
  background: var(--color-background-code);
  color: var(--color-text);
}

.inline-tag {
  white-space: nowrap;
  border-radius: 5px;
  padding: 2px 5px;
  margin-right: 4px;
  line-height: 1.2rem;
}

/* Processor type badges - colors auto-generated from processor names */
.processor-badge {
  @extend .inline-tag;
  font-weight: 900;
}

.watch-tag-list {
  color: var(--color-white);
  background: var(--color-text-watch-tag-list);
  @extend .inline-tag;
  
  /* Remove default anchor styling when used as links */
  text-decoration: none;
  
  &:hover {
    text-decoration: none;
    opacity: 0.8;
    cursor: pointer;
  }
  
  &:visited {
    color: var(--color-white);
  }
}

@media (min-width: 768px) {
  .box {
    margin: 0 1em !important;
  }
}

.box {
  max-width: 100%;
  margin: 0 0.3em;
  flex-direction: column;
  display: flex;
  justify-content: center;
}


body:after {
  content: "";
  background: linear-gradient(130deg, var(--color-background-gradient-first), var(--color-background-gradient-second) 41.07%, var(--color-background-gradient-third) 84.05%);
}

body:after,
body:before {
  display: block;
  height: 650px;
  position: absolute;
  top: 0;
  left: 0;
  width: 100%;
  z-index: -1;
}

body::after {
  opacity: 0.91;
}

body::before {
  // background-image set in base.html so it works with reverse proxies etc
  content: "";
}

body:after,
body:before {
  -webkit-clip-path: polygon(100% 0, 0 0, 0 77.5%, 1% 77.4%, 2% 77.1%, 3% 76.6%, 4% 75.9%, 5% 75.05%, 6% 74.05%, 7% 72.95%, 8% 71.75%, 9% 70.55%, 10% 69.3%, 11% 68.05%, 12% 66.9%, 13% 65.8%, 14% 64.8%, 15% 64%, 16% 63.35%, 17% 62.85%, 18% 62.6%, 19% 62.5%, 20% 62.65%, 21% 63%, 22% 63.5%, 23% 64.2%, 24% 65.1%, 25% 66.1%, 26% 67.2%, 27% 68.4%, 28% 69.65%, 29% 70.9%, 30% 72.15%, 31% 73.3%, 32% 74.35%, 33% 75.3%, 34% 76.1%, 35% 76.75%, 36% 77.2%, 37% 77.45%, 38% 77.5%, 39% 77.3%, 40% 76.95%, 41% 76.4%, 42% 75.65%, 43% 74.75%, 44% 73.75%, 45% 72.6%, 46% 71.4%, 47% 70.15%, 48% 68.9%, 49% 67.7%, 50% 66.55%, 51% 65.5%, 52% 64.55%, 53% 63.75%, 54% 63.15%, 55% 62.75%, 56% 62.55%, 57% 62.5%, 58% 62.7%, 59% 63.1%, 60% 63.7%, 61% 64.45%, 62% 65.4%, 63% 66.45%, 64% 67.6%, 65% 68.8%, 66% 70.05%, 67% 71.3%, 68% 72.5%, 69% 73.6%, 70% 74.65%, 71% 75.55%, 72% 76.35%, 73% 76.9%, 74% 77.3%, 75% 77.5%, 76% 77.45%, 77% 77.25%, 78% 76.8%, 79% 76.2%, 80% 75.4%, 81% 74.45%, 82% 73.4%, 83% 72.25%, 84% 71.05%, 85% 69.8%, 86% 68.55%, 87% 67.35%, 88% 66.2%, 89% 65.2%, 90% 64.3%, 91% 63.55%, 92% 63%, 93% 62.65%, 94% 62.5%, 95% 62.55%, 96% 62.8%, 97% 63.3%, 98% 63.9%, 99% 64.75%, 100% 65.7%);
  clip-path: polygon(100% 0, 0 0, 0 77.5%, 1% 77.4%, 2% 77.1%, 3% 76.6%, 4% 75.9%, 5% 75.05%, 6% 74.05%, 7% 72.95%, 8% 71.75%, 9% 70.55%, 10% 69.3%, 11% 68.05%, 12% 66.9%, 13% 65.8%, 14% 64.8%, 15% 64%, 16% 63.35%, 17% 62.85%, 18% 62.6%, 19% 62.5%, 20% 62.65%, 21% 63%, 22% 63.5%, 23% 64.2%, 24% 65.1%, 25% 66.1%, 26% 67.2%, 27% 68.4%, 28% 69.65%, 29% 70.9%, 30% 72.15%, 31% 73.3%, 32% 74.35%, 33% 75.3%, 34% 76.1%, 35% 76.75%, 36% 77.2%, 37% 77.45%, 38% 77.5%, 39% 77.3%, 40% 76.95%, 41% 76.4%, 42% 75.65%, 43% 74.75%, 44% 73.75%, 45% 72.6%, 46% 71.4%, 47% 70.15%, 48% 68.9%, 49% 67.7%, 50% 66.55%, 51% 65.5%, 52% 64.55%, 53% 63.75%, 54% 63.15%, 55% 62.75%, 56% 62.55%, 57% 62.5%, 58% 62.7%, 59% 63.1%, 60% 63.7%, 61% 64.45%, 62% 65.4%, 63% 66.45%, 64% 67.6%, 65% 68.8%, 66% 70.05%, 67% 71.3%, 68% 72.5%, 69% 73.6%, 70% 74.65%, 71% 75.55%, 72% 76.35%, 73% 76.9%, 74% 77.3%, 75% 77.5%, 76% 77.45%, 77% 77.25%, 78% 76.8%, 79% 76.2%, 80% 75.4%, 81% 74.45%, 82% 73.4%, 83% 72.25%, 84% 71.05%, 85% 69.8%, 86% 68.55%, 87% 67.35%, 88% 66.2%, 89% 65.2%, 90% 64.3%, 91% 63.55%, 92% 63%, 93% 62.65%, 94% 62.5%, 95% 62.55%, 96% 62.8%, 97% 63.3%, 98% 63.9%, 99% 64.75%, 100% 65.7%)
}

.button-small {
  font-size: 85%;
}

.button-xsmall {
  font-size: 70%;
}

.fetch-error {
  padding-top: 1em;
  font-size: 80%;
  max-width: 400px;
  display: block;
}

.pure-button-primary,
a.pure-button-primary,
.pure-button-selected,
a.pure-button-selected {
  background-color: var(--color-background-button-primary);
}

.button-secondary {
  color: var(--color-text-button);
  border-radius: 4px;
  text-shadow: 0 1px 1px rgba(0, 0, 0, 0.2);
}

.button-success {
  background: var(--color-background-button-success);
}

.button-tag {
  background: var(--color-background-button-tag);
  color: var(--color-text-button);
  font-size: 65%;
  border-bottom-left-radius: initial;
  border-bottom-right-radius: initial;
  margin-right: 4px;
  &.active {
    background: var(--color-background-button-tag-active);
    font-weight: bold;
  }

}

.button-error {
  background: var(--color-background-button-error);
  color: var(--color-text-button-error);
}

.button-warning {
  background: var(--color-background-button-warning);
  color: var(--color-text-button-warning);
}

.button-secondary {
  background: var(--color-background-button-secondary);
}

.button-cancel {
  background: var(--color-background-button-cancel);
}

.messages {
  li {
    list-style: none;
    padding: 1em;
    border-radius: 10px;
    color: var(--color-text-messages);
    font-weight: bold;

    &.message {
      background: var(--color-background-messages-message);
    }

    &.error {
      background: var(--color-background-messages-error);
    }

    &.notice {
      background: var(--color-background-messages-notice);
    }
  }

  &.with-share-link {
    >*:hover {
      cursor: pointer;
    }
  }
}

.notifications-wrapper {
  padding-top: 0.5rem;
  #notification-test-log {
    margin-top: 1rem;
    padding: 1rem;
    white-space: pre-wrap;
    word-break: break-word;
    overflow-wrap: break-word;
    max-width: 100%;
    box-sizing: border-box;
    max-height: 12rem;
    overflow-y: scroll;
    border: 1px solid var(--color-border-notification);
    border-radius: 5px;

  }
}

label {
 &:hover {
   cursor: pointer;
 }  
}

.grey-form-border {
  border: 1px solid var(--color-border-notification);
  padding: 0.5rem;
  border-radius: 5px;
}

#notification-error-log {
  border: 1px solid var(--color-border-notification);
  padding: 1rem;
  border-radius: 5px;
  overflow-wrap: break-word;
}

#token-table {

  &.pure-table td,
  &.pure-table th {
    font-size: 80%;
  }
}

// Some field colouring for transperant field
.pure-form input[type=text].transparent-field {
  background-color:  var(--color-background-new-watch-input-transparent) !important;
  color: var(--color-white) !important;
  border: 1px solid rgba(255, 255, 255, 0.2) !important;
  box-shadow: none !important;
  -webkit-box-shadow: none !important;
  &::placeholder {
    opacity: 0.5;
    color: rgba(255, 255, 255, 0.7);
    font-weight: lighter;
  }
}

#new-watch-form {
  background: var(--color-background-new-watch-form);
  padding: 1em;
  border-radius: 10px;
  margin-bottom: 1em;
  max-width: 100%;

  #url {
    &::placeholder {
      font-weight: bold;
    }
  }

  input {
    display: inline-block;
    margin-bottom: 5px;
  }

  input:not(.pure-button) {
    background-color: var(--color-background-new-watch-input);
    color: var(--color-text-new-watch-input);
  }

  .label {
    display: none;
  }

  legend {
    color: var(--color-text-legend);
    font-weight: bold;
  }


  #watch-add-wrapper-zone {
    @media only screen and (min-width: 760px) {
      display: flex;
      gap: 0.3rem;
      flex-direction: row;
      min-width: 70vw;
    }
    /* URL field grows always, other stay static in width */
    > span {
      flex-grow: 0;

      input {
        width: 100%;
        padding-right: 1em;
      }

      &:first-child {
        flex-grow: 1;
      }
    }

    @media only screen and (max-width: 760px) {
      #url {
        width: 100%;
      }
    }
  }

  #watch-group-tag {
    font-size: 0.9rem;
    padding: 0.3rem;
    display: flex;
    align-items: center;
    gap: 0.5rem;
    color: var(--color-white);
    label, input {
      margin: 0;
    }

    input {
      flex: 1;
    }
  }
}


#diff-col {
  padding-left: 40px;
}

#diff-jump {
  position: fixed;
  left: 0px;
  top: 120px;
  background: var(--color-background);
  padding: 10px;
  border-top-right-radius: 5px;
  border-bottom-right-radius: 5px;
  box-shadow: 1px 1px 4px var(--color-shadow-jump);

  a {
    color: var(--color-link);
    cursor: pointer;
    -moz-user-select: none;
    -webkit-user-select: none;
    -ms-user-select: none;
    user-select: none;
    -o-user-select: none;
  }
}

footer {
  padding: 10px;
  background: var(--color-background);
  color: var(--color-text-footer);
  text-align: center;
}

#feed-icon {
  vertical-align: middle;
}

#top-right-menu {
  // Just let flex overflow the x axis for now
  /*
      position: absolute;
      right: 0px;
      background: linear-gradient(to right, #fff0, #fff 10%);
      padding-left: 20px;
      padding-right: 10px;
      */
}

.sticky-tab {
  @media only screen and (max-width: $desktop-wide-breakpoint) {
    display: none;
  }
  position: absolute;
  top: 60px;
  font-size: 65%;
  background: var(--color-background);
  padding: 10px;

  &#left-sticky {
    left: 0;
    position: fixed;
    border-top-right-radius: 5px;
    border-bottom-right-radius: 5px;
    box-shadow: 1px 1px 4px var(--color-shadow-jump);
  }

  &#right-sticky {
    right: 0px;
  }

  &#hosted-sticky {
    right: 0px;
    top: 100px;
    font-weight: bold;
  }
}

#new-version-text a {
  color: var(--color-link-new-version);
}

.watch-controls {
  color: #f8321b;

  .state-on {
    img {
      opacity: 0.8;
    }
  }

  /* default */
  img {
    opacity: 0.2;
  }

  img {
    &:hover {
      transition: opacity 0.3s;
      opacity: 0.8;
    }
  }
}

.monospaced-textarea {
  textarea {
    width: 100%;
    font-family: monospace;
    white-space: pre;
    overflow-wrap: normal;
    // No scrollbars until needed.
    overflow-x: auto;
  }
}


.pure-form {
  fieldset {
    padding-top: 0px;

    ul {
      padding-bottom: 0px;
      margin-bottom: 0px;
    }
  }

  .pure-control-group,
  .pure-group,
  .pure-controls {
    padding-bottom: 1em;

    div {
      margin: 0px;
    }

    .checkbox {
      >* {
        display: inline;
        vertical-align: middle;
      }

      >label {
        padding-left: 5px;
      }
    }

    legend {
      color: var(--color-text-legend);
    }
  }

  /* The input fields with errors */
  .error {
    input {
      background-color: var(--color-error-input);
    }
  }

  /* The list of errors */
  ul.errors {
    padding: .5em .6em;
    border: 1px solid var(--color-error-list);
    border-radius: 4px;
    vertical-align: middle;
    -webkit-box-sizing: border-box;
    box-sizing: border-box;

    li {
      margin-left: 1em;
      color: var(--color-error-list);
    }
  }

  label {
    font-weight: bold;
  }

  textarea {
    width: 100%;
  }

  .inline-radio {
    ul {
      margin: 0px;
      list-style: none;

      li {
        display: flex;
        align-items: center;
        gap: 1em;
      }
    }
  }
}


@media only screen and (max-width: 760px),
(min-device-width: 768px) and (max-device-width: $desktop-wide-breakpoint) {
  .edit-form {
    padding: 0.5em;
    margin: 0;
  }

  #nav-menu {
    overflow-x: scroll;
  }
}


@media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: $desktop-wide-breakpoint) {
  input[type='text'] {
    width: 100%;
  }
}

.pure-table {
  border-color: var(--color-border-table-cell);

  thead {
    background-color: var(--color-background-table-thead);
    color: var(--color-text);
    border-bottom: 1px solid var(--color-background-table-thead);
  }

  td,
  th {
    border-left-color: var(--color-border-table-cell);
  }
}

.pure-table-striped {
  tr:nth-child(2n-1) {
    td {
      background-color: var(--color-table-stripe);
    }
  }
}

.pure-form input[type=color],
.pure-form input[type=date],
.pure-form input[type=datetime-local],
.pure-form input[type=datetime],
.pure-form input[type=email],
.pure-form input[type=month],
.pure-form input[type=number],
.pure-form input[type=password],
.pure-form input[type=search],
.pure-form input[type=tel],
.pure-form input[type=text],
.pure-form input[type=time],
.pure-form input[type=url],
.pure-form input[type=week],
.pure-form select,
.pure-form textarea {
  border: var(--color-border-input);
  box-shadow: inset 0 1px 3px var(--color-shadow-input);
  background-color: var(--color-background-input);
  color: var(--color-text-input);

  &:active {
    background-color: var(--color-background-input);
  }
}

input::placeholder,
textarea::placeholder {
  color: var(--color-text-input-placeholder);
}


/** Desktop vs mobile input field strategy
- We dont use 'size' with  because `size` is too unreliable to override, and will often push-out
- Rely always on width in CSS
*/
/** Set max width for input field */
.m-d {
  min-width: 100%;
}

@media only screen and (min-width: 761px) {

  /* m-d is medium-desktop */
  .m-d {
    min-width: 80%;
  }
}


$form-edge-padding: 20px;

.pure-form-stacked {
  >div:first-child {
    display: block;
  }
}

// Login form styles moved to parts/_login_form.scss

.tab-pane-inner {

  &:not(:target) {
    display: none;
  }

  &:target {
    display: block;
  }

  // doesnt need padding because theres another row of buttons/activity
  padding: 0px;
}

.beta-logo {
  height: 50px;
  // looks better when it's hanging off a little
  right: -3px;
  top: -3px;
  position: absolute;
}

#selector-header {
  padding-bottom: 1em;
}

body.full-width {
  .edit-form {
    width: 95%;
  }
}

.edit-form {
  min-width: 70%;
  /* so it cant overflow */
  max-width: 95%;

  .box-wrap {
    position: relative;
  }

  .inner {
    background: var(--color-background);
    padding: $form-edge-padding;
  }

  #actions {
    display: block;
    background: var(--color-background);
  }

  /* Make action buttons have consistent size and spacing */
  #actions .pure-control-group {
    display: flex;
    gap: 0.625em;
    flex-wrap: wrap;
  }

  .pure-form-message-inline {
    padding-left: 0;
    color: var(--color-text-input-description);
    code {
      font-size: .875em;
    }
  }
}

.border-fieldset {
  h3 {
    margin-top: 0;
  }
  border: 1px solid #ccc;
  padding: 1rem;
  border-radius: 5px;
  margin-bottom: 1rem;
  fieldset:last-of-type {
    padding-bottom: 0;
    .pure-control-group {
      padding-bottom: 0;
    }
  }
}



ul {
  padding-left: 1em;
  padding-top: 0px;
  margin-top: 4px;
}

.time-check-widget {
  tr {
    display: inline;

    input[type="number"] {
      width: 5em;
    }
  }
}

@media only screen and (max-width: 760px) {
  .time-check-widget {
    tbody {
      display: grid;
      grid-template-columns: auto 1fr auto 1fr;
      gap: 0.625em 0.3125em;
      align-items: center;
    }    
    tr {
      display: contents; 
      th {
        text-align: right;
        padding-right: 5px;
      }
      input[type="number"] {
        width: 100%;
        max-width: 5em;
      }
    }
  }
}

#webdriver_delay {
    width: 5em;
}

#api-key {
  &:hover {
    cursor: pointer;
  }
}

#api-key-copy {
  color: var(--color-api-key);
}

.button-green {
  background-color: var(--color-background-button-green);
}

.button-red {
  background-color: var(--color-background-button-red);
}

.noselect {
  -webkit-touch-callout: none;
  /* iOS Safari */
  -webkit-user-select: none;
  /* Safari */
  -moz-user-select: none;
  /* Old versions of Firefox */
  -ms-user-select: none;
  /* Internet Explorer/Edge */
  user-select: none;
  /* Non-prefixed version, currently
    supported by Chrome, Edge, Opera and Firefox */
}


#checkbox-operations {
  background: var(--color-background-checkbox-operations);
  padding: 1em;
  border-radius: 10px;
  margin-bottom: 1em;
  display: none;
  button {
    /* some space if they wrap the page */
    margin-bottom: 3px;
    margin-top: 3px;
    /* vertically center icon and text */
    display: inline-flex;
    align-items: center;
  }
}

.checkbox-uuid {
  >* {
    vertical-align: middle;
  }
}

.inline-warning {
  >span {
    display: inline-block;
    vertical-align: middle;
  }

  img.inline-warning-icon {
    display: inline;
    height: 26px;
    vertical-align: middle;
  }

  border: 1px solid var(--color-border-warning);
  padding: 0.5rem;
  border-radius: 5px;
  color: var(--color-warning);
}

/* automatic price following helpers */
.tracking-ldjson-price-data {
  background-color: var(--color-background-button-green);
  color: #000;
  opacity: 0.6;
  @extend .inline-tag;
}

.ldjson-price-track-offer {
  a.pure-button {
    border-radius: 3px;
    padding: 3px;
    background-color: var(--color-background-button-green);
  }

  font-weight: bold;
  font-style: italic;
}

.price-follow-tag-icon {
  display: inline-block;
  height: 0.8rem;
  vertical-align: middle;
}


#quick-watch-processor-type {
  ul#processor {
    color: #fff;
    padding-left: 0px;
    li {
      list-style: none;
      font-size: 0.9rem;
      display: grid;
      grid-template-columns: auto 1fr;
      align-items: center;
      gap: 0.5rem;
      margin-bottom: 0.5rem;
    }
  }
  label, input {
    padding: 0;
    margin: 0;
  }
}

.restock-label {
  &.in-stock {
    background-color: var(--color-background-button-green);
    color: #fff;
  }
  &.not-in-stock {
    background-color: var(--color-background-button-cancel);
    color: #777;
  }
  &.error {
    background-color: var(--color-background-button-error);
    color: #fff;
    opacity: 0.7;
  }

  svg {
    vertical-align: middle;
  }

  @extend .inline-tag;
}

#chrome-extension-link {
  img {
    height: 21px;
    padding: 2px;
    vertical-align: middle;
  }

  padding: 9px;
  border: 1px solid var(--color-grey-800);
  border-radius: 10px;
  vertical-align: middle;
}

#realtime-conn-error {
  position: fixed;
  bottom: 0;
  left: 0;
  background: var(--color-warning);
  padding: 10px;
  font-size: 0.8rem;
  color: #fff;
  opacity: 0.8;
}

#bottom-horizontal-offscreen {
  position: fixed;
  bottom: 0;
  left: 0;
  right: 0;
  width: 100%;
  min-height: 50px;
  max-height: 50vh; // Don't take more than 50% of viewport height
  background: #ffffffb8;
  border-top: 1px solid var(--color-border-table-cell);
  padding: 10px;
  box-shadow: 0 -2px 10px rgba(0, 0, 0, 0.2);
  z-index: 100;
  overflow-y: auto; // Allow scrolling if content exceeds max-height

  // Smooth transition when shown/hidden
  transition: opacity 0.3s ease-in-out;

  // When JavaScript removes display:none, ensure it scrolls into view
  scroll-margin-bottom: 10px;

  // Center contents horizontally
  display: flex;
  justify-content: center;
  align-items: center;
}

ul#highlightSnippetActions {
  list-style: none;
  li {
    display: inline-block;
  }
}




================================================
FILE: changedetectionio/static/styles/styles.css
================================================
:root{--color-white: #fff;--color-grey-50: #111;--color-grey-100: #262626;--color-grey-200: #333;--color-grey-300: #444;--color-grey-325: #555;--color-grey-350: #565d64;--color-grey-400: #666;--color-grey-500: #777;--color-grey-600: #999;--color-grey-700: #cbcbcb;--color-grey-750: #ddd;--color-grey-800: #e0e0e0;--color-grey-850: #eee;--color-grey-900: #f2f2f2;--color-black: #000;--color-dark-red: #a00;--color-light-red: #dd0000;--color-background-page: var(--color-grey-100);--color-background-gradient-first: #5ad8f7;--color-background-gradient-second: #2f50af;--color-background-gradient-third: #9150bf;--color-background: var(--color-white);--color-text: var(--color-grey-200);--color-link: #1b98f8;--color-menu-accent: #ed5900;--color-background-code: var(--color-grey-850);--color-error: var(--color-dark-red);--color-error-input: #ffebeb;--color-error-list: var(--color-light-red);--color-table-background: var(--color-background);--color-table-stripe: var(--color-grey-900);--color-text-tab: var(--color-white);--color-background-tab: rgba(255, 255, 255, 0.2);--color-background-tab-hover: rgba(255, 255, 255, 0.5);--color-text-tab-active: #222;--color-api-key: #0078e7;--color-background-button-primary: #0078e7;--color-background-button-green: #42dd53;--color-background-button-red: #dd4242;--color-background-button-success: rgb(28, 184, 65);--color-background-button-error: rgb(202, 60, 60);--color-text-button-error: var(--color-white);--color-background-button-warning: rgb(202, 60, 60);--color-text-button-warning: var(--color-white);--color-background-button-secondary: rgb(66, 184, 221);--color-background-button-cancel: rgb(200, 200, 200);--color-text-button: var(--color-white);--color-background-button-tag: rgb(99, 99, 99);--color-background-snapshot-age: #dfdfdf;--color-error-text-snapshot-age: var(--color-white);--color-error-background-snapshot-age: #ff0000;--color-background-button-tag-active: #9c9c9c;--color-text-messages: var(--color-white);--color-background-messages-message: rgba(255, 255, 255, .2);--color-background-messages-error: rgba(255, 1, 1, .5);--color-background-messages-notice: rgba(255, 255, 255, .5);--color-border-notification: #ccc;--color-background-checkbox-operations: rgba(0, 0, 0, 0.05);--color-warning: #ff3300;--color-border-warning: var(--color-warning);--color-text-legend: var(--color-white);--color-link-new-version: #e07171;--color-last-checked: #bbb;--color-text-footer: #444;--color-border-watch-table-cell: #eee;--color-text-watch-tag-list: rgba(231, 0, 105, 0.4);--color-background-new-watch-form: rgba(0, 0, 0, 0.05);--color-background-new-watch-input: var(--color-white);--color-background-new-watch-input-transparent: rgba(255, 255, 255, 0.1);--color-text-new-watch-input: var(--color-text);--color-border-input: var(--color-grey-500);--color-shadow-input: var(--color-grey-400);--color-background-input: var(--color-white);--color-text-input: var(--color-text);--color-text-input-description: var(--color-grey-500);--color-text-input-placeholder: var(--color-grey-600);--color-background-table-thead: var(--color-grey-800);--color-border-table-cell: var(--color-grey-700);--color-text-menu-heading: var(--color-grey-350);--color-text-menu-link: var(--color-grey-500);--color-background-menu-link-hover: var(--color-grey-850);--color-text-menu-link-hover: var(--color-grey-300);--color-shadow-jump: var(--color-grey-500);--color-icon-github: var(--color-black);--color-icon-github-hover: var(--color-grey-300);--color-watch-table-error: var(--color-dark-red);--color-watch-table-row-text: var(--color-grey-100);--highlight-trigger-text-bg-color: #1b98f8;--highlight-ignored-text-bg-color: var(--color-grey-700);--highlight-blocked-text-bg-color: rgb(202, 60, 60)}html[data-darkmode=true]{--color-link: #59bdfb;--color-text: var(--color-white);--color-background-gradient-first: #3f90a5;--color-background-gradient-second: #1e316c;--color-background-gradient-third: #4d2c64;--color-background-new-watch-input: var(--color-grey-100);--color-background-new-watch-input-transparent: var(--color-grey-100);--color-text-new-watch-input: var(--color-text);--color-background-table-thead: var(--color-grey-200);--color-table-background: var(--color-grey-300);--color-table-stripe: var(--color-grey-325);--color-background: var(--color-grey-300);--color-text-menu-heading: var(--color-grey-850);--color-text-menu-link: var(--color-grey-800);--color-border-table-cell: var(--color-grey-400);--color-text-tab-active: var(--color-text);--color-border-input: var(--color-grey-400);--color-shadow-input: var(--color-grey-50);--color-background-input: var(--color-grey-350);--color-text-input-description: var(--color-grey-600);--color-text-input-placeholder: var(--color-grey-600);--color-text-watch-tag-list: rgba(250, 62, 146, 0.4);--color-background-code: var(--color-grey-200);--color-background-tab: rgba(0, 0, 0, 0.2);--color-background-tab-hover: rgba(0, 0, 0, 0.5);--color-background-snapshot-age: var(--color-grey-200);--color-shadow-jump: var(--color-grey-200);--color-icon-github: var(--color-white);--color-icon-github-hover: var(--color-grey-700);--color-watch-table-error: var(--color-light-red);--color-watch-table-row-text: var(--color-grey-800)}html[data-darkmode=true] .icon-spread{filter:hue-rotate(-10deg) brightness(1.5)}html[data-darkmode=true] .watch-table .title-col a[target=_blank]::after,html[data-darkmode=true] .watch-table .current-diff-url::after{filter:invert(0.5) hue-rotate(10deg) brightness(2)}html[data-darkmode=true] .watch-table .status-browsersteps{filter:invert(0.5) hue-rotate(10deg) brightness(1.5)}html[data-darkmode=true] .watch-table .watch-controls .state-off img{opacity:.3}html[data-darkmode=true] .watch-table .watch-controls .state-on img{opacity:1}html[data-darkmode=true] .watch-table .unviewed{color:#fff}html[data-darkmode=true] .watch-table .unviewed.error{color:var(--color-watch-table-error)}.arrow{border:solid #1b98f8;border-width:0 2px 2px 0;display:inline-block;padding:3px}.arrow.right{transform:rotate(-45deg);-webkit-transform:rotate(-45deg)}.arrow.left{transform:rotate(135deg);-webkit-transform:rotate(135deg)}.arrow.up,.arrow.asc{transform:rotate(-135deg);-webkit-transform:rotate(-135deg)}.arrow.down,.arrow.desc{transform:rotate(45deg);-webkit-transform:rotate(45deg)}#browser_steps th{display:none}#browser_steps li{list-style:decimal;padding:5px}#browser_steps li.browser-step-with-error{background-color:#ffd6d6;border-radius:4px}#browser_steps li:not(:first-child):hover{opacity:1}#browser_steps li .control{padding-left:5px;padding-right:5px}#browser_steps li .control a{font-size:70%}#browser_steps li.empty{padding:0px;opacity:.35}#browser_steps li.empty .control{display:none}#browser_steps li:hover{background:#eee}#browser_steps li>label{display:none}@media only screen and (min-width: 760px){#browser-steps .flex-wrapper{display:flex;flex-flow:row;height:70vh;font-size:80%}#browser-steps .flex-wrapper #browser-steps-ui{flex-grow:1;flex-shrink:1;flex-basis:0;background-color:#eee;border-radius:5px}#browser-steps-fieldlist{flex-grow:0;flex-shrink:0;flex-basis:auto;max-width:400px;padding-left:1rem;overflow-y:scroll}#browsersteps-selector-wrapper{height:100% !important}}#browsersteps-selector-wrapper{width:100%;overflow-y:scroll;position:relative;height:80vh}#browsersteps-selector-wrapper>img{position:absolute;max-width:100%}#browsersteps-selector-wrapper>canvas{position:relative;max-width:100%}#browsersteps-selector-wrapper>canvas:hover{cursor:pointer}#browsersteps-selector-wrapper .loader{position:absolute;left:50%;top:50%;transform:translate(-50%, -50%);z-index:100;max-width:350px;text-align:center}#browsersteps-selector-wrapper .spinner,#browsersteps-selector-wrapper .spinner:after{width:80px;height:80px;font-size:3px}#browsersteps-selector-wrapper #browsersteps-click-start{color:var(--color-grey-400)}#browsersteps-selector-wrapper #browsersteps-click-start:hover{cursor:pointer}ul#requests-extra_proxies{list-style:none}ul#requests-extra_proxies li>label{display:none}ul#requests-extra_proxies table tr{display:table-row}ul#requests-extra_proxies table tr input[type=text]{width:100%}@media only screen and (min-width: 1024px){ul#requests-extra_proxies table tr{display:inline}}#request label[for=proxy]{display:inline-block}body.proxy-check-active #request .proxy-check-details{font-size:80%;color:#555;display:block;padding-left:2em;max-width:500px}body.proxy-check-active #request .proxy-timing{font-size:80%;padding-left:1rem;color:var(--color-link)}#recommended-proxy{display:grid;gap:2rem;padding-bottom:1em}@media(min-width: 991px){#recommended-proxy{grid-template-columns:repeat(2, 1fr)}}#recommended-proxy>div{border:1px #aaa solid;border-radius:4px;padding:1em}#extra-proxies-setting{border:1px solid var(--color-grey-800);border-radius:4px;margin:1em;padding:1em}ul#requests-extra_browsers{list-style:none}ul#requests-extra_browsers li>label{display:none}ul#requests-extra_browsers table tr{display:table-row}ul#requests-extra_browsers table tr input[type=text]{width:100%}@media only screen and (min-width: 1280px){ul#requests-extra_browsers table tr{display:inline}ul#requests-extra_browsers table tr input[type=text]{width:100%}}#extra-browsers-setting{border:1px solid var(--color-grey-800);border-radius:4px;margin:1em;padding:1em}.pagination-page-info{text-transform:capitalize}.pagination.menu>*{display:inline-block}.pagination.menu li{display:inline-block}.pagination.menu a{padding:.65rem;margin:3px;border:none;background:#444;border-radius:2px;color:var(--color-text-button)}.pagination.menu a.disabled{display:none}.pagination.menu a.active{font-weight:bold;background:#888}.pagination.menu a:hover{background:#999}.spinner,.spinner:after{border-radius:50%;width:10px;height:10px}.spinner{margin:0px auto;font-size:3px;vertical-align:middle;display:inline-block;text-indent:-9999em;border-top:1.1em solid rgba(38,104,237,.2);border-right:1.1em solid rgba(38,104,237,.2);border-bottom:1.1em solid rgba(38,104,237,.2);border-left:1.1em solid #2668ed;-webkit-transform:translateZ(0);-ms-transform:translateZ(0);transform:translateZ(0);-webkit-animation:load8 1.1s infinite linear;animation:load8 1.1s infinite linear}@-webkit-keyframes load8{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(360deg);transform:rotate(360deg)}}@keyframes load8{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(360deg);transform:rotate(360deg)}}.toggle-light-mode .icon-dark{display:none}html[data-darkmode=true] .toggle-light-mode .icon-light{display:none}html[data-darkmode=true] .toggle-light-mode .icon-dark{display:block}.pure-menu-link{padding:.5rem 1em;line-height:1.2rem}#menu-mute,#menu-pause{padding-left:.3rem;padding-right:.3rem}#menu-mute img,#menu-pause img{height:1.2rem}.pure-menu-item svg{height:1.2rem}.pure-menu-item *{vertical-align:middle}.pure-menu-item .github-link{height:1.8rem;display:block}.pure-menu-item .github-link svg{height:100%}.pure-menu-item .bi-heart:hover{cursor:pointer}.pure-menu-item.active .pure-menu-link{background-color:var(--color-background-menu-link-hover);color:var(--color-text-menu-link-hover)}#cdio-logo{padding-left:.5em}#inline-menu-extras-group>*{display:inline-block}#overlay{opacity:.95;position:fixed;width:350px;max-width:100%;height:100%;top:0;right:-350px;background-color:var(--color-table-stripe);z-index:2;transform:translateX(0);transition:transform .5s ease}#overlay.visible{transform:translateX(-100%)}#overlay .content{font-size:.875rem;padding:1rem;margin-top:5rem;max-width:400px;color:var(--color-watch-table-row-text)}#heartpath{transition:all ease .3s !important}#heartpath:hover{fill:red !important;transition:all ease .3s !important}.minitabs-wrapper{width:100%}.minitabs-wrapper>div[id]{padding:20px;border:1px solid #ccc;border-top:none}.minitabs-wrapper .minitabs-content{width:100%;display:flex}.minitabs-wrapper .minitabs-content>div{flex:1 1 auto;min-width:0;overflow:scroll}.minitabs-wrapper .minitabs{display:flex;border-bottom:1px solid #ccc}.minitabs-wrapper .minitab{flex:1;text-align:center;padding:12px 0;text-decoration:none;color:#333;background-color:#f1f1f1;border:1px solid #ccc;border-bottom:none;cursor:pointer;transition:background-color .3s}.minitabs-wrapper .minitab:hover{background-color:#ddd}.minitabs-wrapper .minitab.active{background-color:#fff;font-weight:bold}@media(min-width: 800px){body.preview-text-enabled #filters-and-triggers>div{display:flex;gap:20px;position:relative}}body.preview-text-enabled #edit-text-filter,body.preview-text-enabled #text-preview{flex:1;align-self:flex-start}body.preview-text-enabled #edit-text-filter #pro-tips{display:none}body.preview-text-enabled #text-preview{position:sticky;top:20px;padding-top:1rem;padding-bottom:1rem;display:block !important}body.preview-text-enabled #activate-text-preview{background-color:var(--color-grey-500)}body.preview-text-enabled .monospace-preview{background:var(--color-background-input);border:1px solid var(--color-grey-600);padding:1rem;color:var(--color-text-input);font-family:"Courier New",Courier,monospace;font-size:70%;word-break:break-word;white-space:pre-wrap}#activate-text-preview{right:0;position:absolute;z-index:3;box-shadow:1px 1px 4px var(--color-shadow-jump)}#stats_row{display:flex;align-items:center;width:100%;color:#fff;font-size:.85rem}#stats_row>*{padding-bottom:.5rem}#stats_row .left{text-align:left}#stats_row .right{opacity:.5;transition:opacity .6s ease;margin-left:auto;text-align:right}body.has-queue #stats_row .right{opacity:1}.watch-table{width:100%;font-size:80%}.watch-table tr{color:var(--color-watch-table-row-text)}.watch-table tr.unviewed{font-weight:bold}.watch-table td{white-space:nowrap}.watch-table td.title-col{word-break:break-all;white-space:normal}.watch-table td a.external::after{content:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAQElEQVR42qXKwQkAIAxDUUdxtO6/RBQkQZvSi8I/pL4BoGw/XPkh4XigPmsUgh0626AjRsgxHTkUThsG2T/sIlzdTsp52kSS1wAAAABJRU5ErkJggg==);margin:0 3px 0 5px}.watch-table th{white-space:nowrap}.watch-table th a{font-weight:normal}.watch-table th a.active{font-weight:bolder}.watch-table th a.inactive .arrow{display:none}.watch-table tr.checking-now td:first-child{position:relative}.watch-table tr.checking-now td:first-child::before{content:"";position:absolute;top:0;bottom:0;left:0;width:3px;background-color:#293eff}.watch-table tr.checking-now td.last-checked .spinner-wrapper{display:inline-block !important}.watch-table tr.checking-now td.last-checked .innertext{display:none !important}.watch-table tr.queued a.recheck{display:none !important}.watch-table tr.queued a.already-in-queue-button{display:inline-block !important}.watch-table tr.paused a.pause-toggle.state-on{display:inline !important}.watch-table tr.paused a.pause-toggle.state-off{display:none !important}.watch-table tr.notification_muted a.mute-toggle.state-on{display:inline !important}.watch-table tr.notification_muted a.mute-toggle.state-off{display:none !important}.watch-table tr.has-error{color:var(--color-watch-table-error)}.watch-table tr.has-error .error-text{display:block !important}.watch-table tr.single-history a.preview-link{display:inline-block !important}.watch-table tr.multiple-history a.history-link{display:inline-block !important}#watch-table-wrapper #post-list-buttons{text-align:right;padding:0px;margin:0px}#watch-table-wrapper #post-list-buttons li{display:inline-block}#watch-table-wrapper #post-list-buttons a{border-top-left-radius:initial;border-top-right-radius:initial;border-bottom-left-radius:5px;border-bottom-right-radius:5px}#watch-table-wrapper.has-error #post-list-buttons #post-list-with-errors{display:inline-block !important}#watch-table-wrapper.has-unread-changes #post-list-buttons #post-list-unread,#watch-table-wrapper.has-unread-changes #post-list-buttons #post-list-mark-views,#watch-table-wrapper.has-unread-changes #post-list-buttons #post-list-unread{display:inline-block !important}@media(max-width: 767px){.watch-table thead{display:block}.watch-table thead tr th{display:inline-block}}@media(max-width: 767px)and (max-width: 768px){.watch-table thead tr th .hide-on-mobile{display:none}}@media(max-width: 767px){.watch-table thead .empty-cell{display:none}.watch-table .last-checked{margin-left:calc(20px + .5rem)}.watch-table .last-checked>span{vertical-align:middle}.watch-table .last-changed{margin-left:calc(20px + .5rem)}.watch-table .last-checked::before{color:var(--color-text);content:"Last Checked "}.watch-table .last-changed::before{color:var(--color-text);content:"Last Changed "}.watch-table td.inline{display:inline-block}.watch-table .pure-table td,.watch-table .pure-table th{border:none}.watch-table td{border:none;border-bottom:1px solid var(--color-border-watch-table-cell);vertical-align:middle}.watch-table td:before{top:6px;left:6px;width:45%;padding-right:10px;white-space:nowrap}.watch-table.pure-table-striped tr{background-color:var(--color-table-background)}.watch-table.pure-table-striped tr:nth-child(2n-1){background-color:var(--color-table-stripe)}.watch-table.pure-table-striped tr:nth-child(2n-1) td{background-color:inherit}}@media(max-width: 767px){.watch-table tbody tr{padding-bottom:10px;padding-top:10px;display:grid;grid-template-columns:20px 1fr 100px;grid-template-rows:auto auto auto auto;gap:.5rem}.watch-table tbody tr .counter-i{display:none}.watch-table tbody tr td.checkbox-uuid{display:grid;place-items:center}.watch-table tbody tr>td{border-bottom:none}.watch-table tbody tr>td[colspan]{grid-column:1/-1}.watch-table tbody tr>td.title-col{grid-column:1/-1;grid-row:1}.watch-table tbody tr>td.title-col .watch-title{font-size:.92rem}.watch-table tbody tr>td.title-col .link-spread{display:none}.watch-table tbody tr>td.last-checked{grid-column:1/-1;grid-row:2}.watch-table tbody tr>td.last-changed{grid-column:1/-1;grid-row:3}.watch-table tbody tr>td.checkbox-uuid{grid-column:1;grid-row:4}.watch-table tbody tr>td.buttons{grid-column:2;grid-row:4;display:flex;align-items:center;justify-content:flex-start}.watch-table tbody tr>td.watch-controls{grid-column:3;grid-row:4;display:grid;place-items:center}.watch-table tbody tr>td.watch-controls a img{padding:10px}.pure-table td{padding:3px !important}}ul#conditions_match_logic{list-style:none}ul#conditions_match_logic input,ul#conditions_match_logic label,ul#conditions_match_logic li{display:inline-block}ul#conditions_match_logic li{padding-right:1em}.fieldlist_formfields{width:100%;background-color:var(--color-background, #fff);border-radius:4px;border:1px solid var(--color-border-table-cell, #cbcbcb)}.fieldlist_formfields .fieldlist-header{display:flex;background-color:var(--color-background-table-thead, #e0e0e0);font-weight:bold;border-bottom:1px solid var(--color-border-table-cell, #cbcbcb)}.fieldlist_formfields .fieldlist-header-cell{flex:1;padding:.5em 1em;text-align:left}.fieldlist_formfields .fieldlist-header-cell:last-child{flex:0 0 120px}.fieldlist_formfields .fieldlist-body{display:flex;flex-direction:column}.fieldlist_formfields .fieldlist-row{display:flex;border-bottom:1px solid var(--color-border-table-cell, #cbcbcb)}.fieldlist_formfields .fieldlist-row:last-child{border-bottom:none}.fieldlist_formfields .fieldlist-row:nth-child(2n-1){background-color:var(--color-table-stripe, #f2f2f2)}.fieldlist_formfields .fieldlist-row.error-row{background-color:var(--color-error-input, #ffdddd)}.fieldlist_formfields .fieldlist-cell{flex:1;padding:.5em 1em;display:flex;flex-direction:column;justify-content:center}.fieldlist_formfields .fieldlist-cell input,.fieldlist_formfields .fieldlist-cell select{width:100%}.fieldlist_formfields .fieldlist-cell.fieldlist-actions{flex:0 0 120px;display:flex;flex-direction:row;align-items:center;gap:4px}.fieldlist_formfields ul.errors{margin-top:.5em;margin-bottom:0;padding:.5em;background-color:var(--color-error-background-snapshot-age, #ffdddd);border-radius:4px;list-style-position:inside}@media only screen and (max-width: 760px){.fieldlist_formfields .fieldlist-header,.fieldlist_formfields .fieldlist-row{flex-direction:column}.fieldlist_formfields .fieldlist-header-cell{display:none}.fieldlist_formfields .fieldlist-row{padding:.5em 0;border-bottom:2px solid var(--color-border-table-cell, #cbcbcb)}.fieldlist_formfields .fieldlist-cell{padding:.25em .5em}.fieldlist_formfields .fieldlist-cell.fieldlist-actions{flex:1;justify-content:flex-start;padding-top:.5em}.fieldlist_formfields .fieldlist-cell:not(:last-child){margin-bottom:.5em}.fieldlist_formfields .fieldlist-cell::before{content:attr(data-label);font-weight:bold;margin-bottom:.25em}}.fieldlist_formfields .addRuleRow,.fieldlist_formfields .removeRuleRow,.fieldlist_formfields .verifyRuleRow{cursor:pointer;border:none;padding:4px 8px;border-radius:3px;font-weight:bold;background-color:#aaa;color:var(--color-foreground-text, #fff)}.fieldlist_formfields .addRuleRow:hover,.fieldlist_formfields .removeRuleRow:hover,.fieldlist_formfields .verifyRuleRow:hover{background-color:#999}.watch-table.favicon-not-enabled tr .favicon{display:none}.watch-table tr td.inline.title-col .flex-wrapper{display:flex;align-items:center;gap:4px}.watch-table td,.watch-table th{vertical-align:middle}.watch-table tr.has-favicon.unviewed img.favicon{opacity:1 !important}.watch-table .status-icons{white-space:nowrap;display:flex;align-items:center;gap:4px}.watch-table .status-icons>*{vertical-align:middle}.title-col{padding:10px}.title-wrapper{display:flex;align-items:center;gap:10px}.title-col-inner{display:inline-block;vertical-align:middle}.watch-table img.favicon{vertical-align:middle;max-width:25px;max-height:25px;height:25px;padding-right:4px}body.checking-now #checking-now-fixed-tab{display:block !important}#checking-now-fixed-tab{background:#ccc;border-radius:5px;bottom:0;color:var(--color-text);display:none;font-size:.8rem;left:0;padding:5px;position:fixed}#selector-wrapper{height:100%;text-align:center;max-height:70vh;overflow-y:scroll;position:relative}#selector-wrapper>img{position:absolute;z-index:4;max-width:100%}#selector-wrapper>canvas{position:relative;z-index:5;max-width:100%}#selector-wrapper>canvas:hover{cursor:pointer}#selector-current-xpath{font-size:80%}.ternary-radio-group{display:flex;gap:0;border:1px solid var(--color-grey-750);border-radius:4px;overflow:hidden;width:fit-content;background:var(--color-background)}.ternary-radio-group .ternary-radio-option{position:relative;cursor:pointer;margin:0;display:flex;align-items:center}.ternary-radio-group .ternary-radio-option input[type=radio]{position:absolute;opacity:0;width:0;height:0}.ternary-radio-group .ternary-radio-option .ternary-radio-label{padding:8px 16px;background:var(--color-grey-900);border:none;border-right:1px solid var(--color-grey-750);font-size:13px;font-weight:500;color:var(--color-text);transition:all .2s ease;cursor:pointer;display:block;text-align:center}.ternary-radio-group .ternary-radio-option:last-child .ternary-radio-label{border-right:none}.ternary-radio-group .ternary-radio-option input:checked+.ternary-radio-label{background:var(--color-link);color:var(--color-text-button);font-weight:600}.ternary-radio-group .ternary-radio-option input:checked+.ternary-radio-label.ternary-default{background:var(--color-grey-600);color:var(--color-text-button)}.ternary-radio-group .ternary-radio-option input:checked+.ternary-radio-label:hover{background:#1a7bc4}.ternary-radio-group .ternary-radio-option input:checked+.ternary-radio-label:hover.ternary-default{background:var(--color-grey-500)}.ternary-radio-group .ternary-radio-option:hover .ternary-radio-label{background:var(--color-grey-800)}@media(max-width: 480px){.ternary-radio-group{width:100%}.ternary-radio-group .ternary-radio-label{flex:1;min-width:auto}}input[type=radio].pure-radio:checked+label,input[type=radio].pure-radio:checked{background:var(--color-link);color:var(--color-text-button)}html[data-darkmode=true] .ternary-radio-group .ternary-radio-option .ternary-radio-label{background:var(--color-grey-350)}html[data-darkmode=true] .ternary-radio-group .ternary-radio-option:hover .ternary-radio-label{background:var(--color-grey-400)}html[data-darkmode=true] .ternary-radio-group .ternary-radio-option input:checked+.ternary-radio-label{background:var(--color-link);color:var(--color-text-button)}html[data-darkmode=true] .ternary-radio-group .ternary-radio-option input:checked+.ternary-radio-label.ternary-default{background:var(--color-grey-600)}html[data-darkmode=true] .ternary-radio-group .ternary-radio-option input:checked+.ternary-radio-label:hover{background:#1a7bc4}html[data-darkmode=true] .ternary-radio-group .ternary-radio-option input:checked+.ternary-radio-label:hover.ternary-default{background:var(--color-grey-500)}body.processor-image_ssim_diff #edit-text-filter .text-filtering{display:none}body.processor-image_ssim_diff #conditions-tab{display:none}.modal-dialog{border:none;border-radius:10px;padding:0;background:var(--color-background);color:var(--color-text);box-shadow:0 5px 20px rgba(0,0,0,.3);max-width:500px;width:90%}.modal-dialog::backdrop{background:rgba(0,0,0,.6);backdrop-filter:blur(3px);animation:fadeIn .2s ease-out}.modal-dialog[open]{animation:slideIn .25s ease-out}.modal-dialog .modal-header{padding:1.5rem;border-bottom:1px solid var(--color-border-table-cell);display:flex;align-items:center;gap:1rem}.modal-dialog .modal-header .modal-icon{font-size:2rem;line-height:1;flex-shrink:0}.modal-dialog .modal-header .modal-icon.warning{color:var(--color-warning)}.modal-dialog .modal-header .modal-icon.danger{color:var(--color-background-button-error)}.modal-dialog .modal-header .modal-icon.info{color:var(--color-background-button-primary)}.modal-dialog .modal-header .modal-title{font-size:1.3rem;font-weight:bold;margin:0;color:var(--color-text)}.modal-dialog .modal-body{padding:1.5rem;line-height:1.6}.modal-dialog .modal-body p{margin:0 0 1rem 0}.modal-dialog .modal-body p:last-child{margin-bottom:0}.modal-dialog .modal-body strong{color:var(--color-text);font-weight:600}.modal-dialog .modal-footer{padding:1rem 1.5rem;border-top:1px solid var(--color-border-table-cell);display:flex;gap:.75rem;justify-content:flex-end;background:var(--color-grey-900)}.modal-dialog .modal-footer button{padding:.6rem 1.5rem;border:none;border-radius:4px;cursor:pointer;font-weight:500;transition:all .2s ease;font-size:.95rem}.modal-dialog .modal-footer button:hover{transform:translateY(-1px);box-shadow:0 2px 8px rgba(0,0,0,.15)}.modal-dialog .modal-footer button:active{transform:translateY(0)}.modal-dialog .modal-footer button.modal-btn-cancel{background:var(--color-background-button-cancel);color:var(--color-grey-200)}.modal-dialog .modal-footer button.modal-btn-cancel:hover{background:var(--color-grey-700)}.modal-dialog .modal-footer button.modal-btn-confirm{background:var(--color-background-button-primary);color:var(--color-white)}.modal-dialog .modal-footer button.modal-btn-confirm:hover{opacity:.9}.modal-dialog .modal-footer button.modal-btn-danger{background:var(--color-background-button-error);color:var(--color-white)}.modal-dialog .modal-footer button.modal-btn-danger:hover{background:var(--color-dark-red)}.modal-dialog .modal-footer button.modal-btn-warning{background:var(--color-background-button-warning);color:var(--color-white)}.modal-dialog .modal-footer button.modal-btn-warning:hover{opacity:.9}html[data-darkmode=true] .modal-dialog{box-shadow:0 5px 30px rgba(0,0,0,.7)}html[data-darkmode=true] .modal-dialog .modal-footer{background:var(--color-grey-200)}@keyframes fadeIn{from{opacity:0}to{opacity:1}}@keyframes slideIn{from{opacity:0;transform:translateY(-20px) scale(0.95)}to{opacity:1;transform:translateY(0) scale(1)}}@media only screen and (max-width: 760px){.modal-dialog{width:95%;max-width:none}.modal-dialog .modal-header{padding:1rem}.modal-dialog .modal-header .modal-title{font-size:1.1rem}.modal-dialog .modal-body{padding:1rem;font-size:.95rem}.modal-dialog .modal-footer{padding:.75rem 1rem;flex-wrap:wrap}.modal-dialog .modal-footer button{flex:1;min-width:120px}}#language-selector-flag{display:inline-block;width:1.2em;height:1.2em;vertical-align:middle;border-radius:50%;overflow:hidden;opacity:.6}#language-selector-flag:hover{opacity:1}.language-list{display:flex;flex-direction:column;gap:.5rem;padding:.5rem 0}.language-option{display:flex;align-items:center;gap:1rem;padding:.25rem;border-radius:4px;transition:background-color .2s ease;text-decoration:none;color:var(--color-text);border:1px solid rgba(0,0,0,0)}.language-option:hover{background-color:var(--color-background-menu-link-hover);border-color:var(--color-border-table-cell)}.language-option.active{background-color:var(--color-link);color:var(--color-text-button);font-weight:600}.language-option .flag{font-size:1.5rem;flex-shrink:0}.language-option .language-name{flex-grow:1;font-size:1rem}#language-modal .language-list .lang-option{display:inline-block;width:1.5em;height:1.5em;vertical-align:middle;margin-right:.5em;border-radius:50%;overflow:hidden}.content-wrapper{display:flex;gap:0;width:100%;max-width:100%;position:relative}@media only screen and (max-width: 900px){.content-wrapper{flex-direction:column}}.action-sidebar{position:sticky;top:100px;flex-shrink:0;width:80px;height:fit-content;background:rgba(0,0,0,0);padding:1.5rem 0;display:flex;flex-direction:column;gap:.5rem;align-items:center;z-index:0}@media only screen and (max-width: 900px){.action-sidebar{position:relative;top:0;width:100%;flex-direction:row;justify-content:space-around;padding:0;overflow-x:auto}}.action-sidebar-item{position:relative;display:flex;flex-direction:column;align-items:center;justify-content:center;gap:.35rem;padding:.75rem .5rem;min-width:64px;text-decoration:none;opacity:.8;transition:opacity .2s ease}.action-sidebar-item:hover{opacity:1}.action-sidebar-item.active{opacity:1}.action-sidebar-item.active .action-icon{stroke:#fff;stroke-width:2.5}.action-sidebar-item.active .action-label{color:#fff;font-weight:700}.action-icon{width:28px;height:28px;stroke:#fff;stroke-width:2;fill:none;stroke-linecap:round;stroke-linejoin:round;transition:stroke .2s ease}.action-label{font-size:.65rem;font-weight:500;text-align:center;line-height:1.1;letter-spacing:.02em;text-transform:uppercase;color:#fff;transition:color .2s ease;max-width:60px;word-wrap:break-word}.content-main{flex:0 1 auto;width:100%;min-width:0;padding:0;display:flex;flex-direction:column;align-items:center}.hamburger-menu{display:none;background:rgba(0,0,0,0);border:none;cursor:pointer;padding:.5rem;z-index:10001;position:relative}@media only screen and (max-width: 980px){.hamburger-menu{display:flex;flex-direction:column;justify-content:center;align-items:center}}.hamburger-icon{width:24px;height:20px;position:relative;display:flex;flex-direction:column;justify-content:space-between}.hamburger-icon span{display:block;height:3px;width:100%;background:var(--color-text);border-radius:2px;transition:all .3s cubic-bezier(0.68, -0.55, 0.265, 1.55);transform-origin:center}.hamburger-menu.active .hamburger-icon span:nth-child(1){transform:translateY(8.5px) rotate(45deg)}.hamburger-menu.active .hamburger-icon span:nth-child(2){opacity:0;transform:translateX(-10px)}.hamburger-menu.active .hamburger-icon span:nth-child(3){transform:translateY(-8.5px) rotate(-45deg)}.mobile-menu-overlay{display:none;position:fixed;top:0;left:0;right:0;bottom:0;background:rgba(0,0,0,.5);z-index:9999;opacity:0;transition:opacity .3s ease}.mobile-menu-overlay.active{display:block;opacity:1}.mobile-menu-drawer{position:fixed;top:0;right:-280px;width:280px;height:100%;background:var(--color-background);opacity:1;box-shadow:-2px 0 8px rgba(0,0,0,.15);z-index:10000;transition:right .3s cubic-bezier(0.68, -0.55, 0.265, 1.55);overflow-y:auto;padding-top:60px}.mobile-menu-drawer.active{right:0}.mobile-menu-drawer .mobile-menu-items{list-style:none;padding:1rem 0;margin:0}.mobile-menu-drawer .mobile-menu-items li{border-bottom:1px solid var(--color-border-table-cell)}.mobile-menu-drawer .mobile-menu-items li>*{display:block;padding:1rem 1.5rem;color:var(--color-text);text-decoration:none;font-weight:500;transition:background .2s ease}.mobile-menu-drawer .mobile-menu-items li>*:hover{background:var(--color-background-menu-link-hover)}.mobile-menu-drawer .mobile-menu-items li#menu-pause,.mobile-menu-drawer .mobile-menu-items li#menu-mute{display:none}.logo-cdio{font-weight:bold;font-size:1.1rem}.logo-cdio .logo-cd{color:var(--color-grey-500)}.logo-cdio .logo-io{color:var(--color-text)}.menu-always-visible{display:flex;align-items:center;gap:.5rem;margin-left:auto}@media only screen and (max-width: 980px){#top-right-menu .menu-collapsible{display:none !important}.pure-menu-horizontal{overflow-x:visible !important}#nav-menu{overflow-x:visible !important}}@media only screen and (min-width: 1025px){.hamburger-menu,.mobile-menu-drawer,.mobile-menu-overlay{display:none !important}}html[data-darkmode=true] .mobile-menu-drawer{box-shadow:-2px 0 8px rgba(0,0,0,.4)}#search-modal .modal-body{padding:2rem 1.5rem}#search-modal .modal-body .pure-control-group{padding-bottom:0}#search-modal .modal-body .pure-control-group label{display:block;margin-bottom:.5rem;font-size:.9rem;font-weight:600;color:var(--color-text)}#search-modal .modal-body .pure-control-group #search-modal-input{width:100%;max-width:100%;box-sizing:border-box;padding:.6rem .8rem;font-size:1rem;border:1px solid var(--color-border-input);border-radius:4px;background-color:var(--color-background-input);color:var(--color-text-input);box-shadow:inset 0 1px 3px var(--color-shadow-input);transition:border-color .2s ease,box-shadow .2s ease}#search-modal .modal-body .pure-control-group #search-modal-input:focus{outline:none;border-color:var(--color-link);box-shadow:0 0 0 3px rgba(27,152,248,.1)}#search-modal .modal-body .pure-control-group #search-modal-input::placeholder{color:var(--color-text-input-placeholder);opacity:.7}html[data-darkmode=true] #search-modal #search-modal-input:focus{box-shadow:0 0 0 3px rgba(89,189,251,.15)}.action-sidebar-item{position:relative}.action-sidebar-item .notification-bubble{position:absolute;top:8px;left:8px;min-width:18px;height:18px;background:#f44;color:#fff;font-size:10px;font-weight:700;line-height:18px;text-align:center;border-radius:9px;padding:0 2px;box-shadow:0 2px 4px rgba(0,0,0,.3);pointer-events:none;transition:all .2s ease;display:none}.action-sidebar-item .notification-bubble.red-bubble{background:#f44}.action-sidebar-item .notification-bubble.blue-bubble{background:#4a9eff;color:#fff}.action-sidebar-item .notification-bubble.visible{display:block}.action-sidebar-item .notification-bubble.pulse{animation:bubblePulse .4s ease-out}.action-sidebar-item .notification-bubble.large-number{font-size:8px;min-width:20px;height:20px;line-height:20px;border-radius:10px}@keyframes bubblePulse{0%{transform:scale(1)}50%{transform:scale(1.3)}100%{transform:scale(1)}}html[data-darkmode=true] .notification-bubble{box-shadow:0 2px 6px rgba(0,0,0,.6)}.toast-container{position:fixed;display:flex;flex-direction:column;gap:.75rem;pointer-events:none;z-index:10000}.toast-container.toast-top-right{top:20px;right:20px}.toast-container.toast-top-center{top:100px;left:50%;transform:translateX(-50%)}.toast-container.toast-top-left{top:20px;left:20px}.toast-container.toast-bottom-right{bottom:20px;right:20px}.toast-container.toast-bottom-center{bottom:20px;left:50%;transform:translateX(-50%)}.toast-container.toast-bottom-left{bottom:20px;left:20px}.toast{position:relative;display:flex;align-items:center;gap:.75rem;min-width:300px;max-width:500px;padding:1rem 1.25rem;background:var(--color-background);border-radius:8px;box-shadow:0 4px 12px rgba(0,0,0,.15),0 0 0 1px rgba(0,0,0,.05);pointer-events:auto;overflow:hidden;opacity:0;transform:translateY(-50px);transition:all .3s cubic-bezier(0.68, -0.55, 0.265, 1.55);font-family:inherit}.toast.toast-show{opacity:1;transform:translateY(0)}.toast.toast-hide{opacity:0;transform:translateY(-50px) scale(0.95)}.toast.toast-success{border-left:4px solid #10b981}.toast.toast-success .toast-icon{color:#10b981}.toast.toast-error{border-left:4px solid #ef4444}.toast.toast-error .toast-icon{color:#ef4444}.toast.toast-warning{border-left:4px solid #f59e0b}.toast.toast-warning .toast-icon{color:#f59e0b}.toast.toast-info{border-left:4px solid #3b82f6}.toast.toast-info .toast-icon{color:#3b82f6}.toast.toast-default{border-left:4px solid var(--color-grey-500)}.toast-icon{flex-shrink:0;width:24px;height:24px}.toast-icon svg{width:100%;height:100%}.toast-message{flex:1;font-size:.875rem;line-height:1.5;color:var(--color-text);word-break:break-word;font-family:inherit}.toast-close{flex-shrink:0;width:24px;height:24px;display:flex;align-items:center;justify-content:center;background:rgba(0,0,0,0);border:none;border-radius:4px;color:var(--color-grey-500);font-size:1.5rem;line-height:1;cursor:pointer;transition:all .2s ease;padding:0;margin-left:.25rem}.toast-close:hover{background:var(--color-grey-800);color:var(--color-text)}.toast-close:active{transform:scale(0.95)}.toast-progress{position:absolute;bottom:0;left:0;right:0;height:3px;background:currentColor;opacity:.3;transform-origin:left;transition:transform linear}html[data-darkmode=true] .toast{background:var(--color-grey-300);box-shadow:0 4px 12px rgba(0,0,0,.4),0 0 0 1px hsla(0,0%,100%,.05)}html[data-darkmode=true] .toast-close:hover{background:var(--color-grey-400)}@media only screen and (max-width: 768px){.toast-container{left:50% !important;right:auto !important;top:80px !important;transform:translateX(-50%) !important;align-items:center}.toast-container.toast-bottom-right,.toast-container.toast-bottom-center,.toast-container.toast-bottom-left{top:auto !important;bottom:80px !important}.toast{min-width:auto;max-width:none;width:80vw;transform:translateY(-100px)}.toast.toast-show{transform:translateY(0)}.toast.toast-hide{transform:translateY(-100px) scale(0.95)}}@media(prefers-reduced-motion: reduce){.toast{transition:opacity .2s ease;transform:none !important}.toast.toast-show{opacity:1}.toast.toast-hide{opacity:0}}.login-form{min-height:52vh;display:flex;align-items:center;justify-content:center;padding:2rem 1rem}.login-form .inner{background:var(--color-background);border-radius:16px;box-shadow:0 10px 40px rgba(0,0,0,.08),0 2px 8px rgba(0,0,0,.04);padding:3rem 2.5rem;width:100%;max-width:420px;position:relative;overflow:hidden;transition:transform .3s ease,box-shadow .3s ease}.login-form .inner:hover{box-shadow:0 15px 50px rgba(0,0,0,.12),0 5px 15px rgba(0,0,0,.06)}.login-form form{margin:0}.login-form fieldset{border:none;padding:0;margin:0}.login-form .pure-control-group{margin-bottom:1.75rem}.login-form .pure-control-group:last-of-type{margin-bottom:0;margin-top:2rem}.login-form label{display:block;margin-bottom:.5rem;font-weight:600;font-size:.9rem;color:var(--color-text);letter-spacing:.01em}.login-form input[type=password]{width:100%;padding:.875rem 1rem;border:2px solid var(--color-grey-800);border-radius:8px;font-size:1rem;background:var(--color-background-input);color:var(--color-text-input);transition:all .2s ease;box-sizing:border-box}.login-form input[type=password]:focus{outline:none;border-color:var(--color-link);box-shadow:0 0 0 3px rgba(27,152,248,.1);transform:translateY(-1px)}.login-form input[type=password]::placeholder{color:var(--color-text-input-placeholder)}.login-form button[type=submit]{width:100%;padding:.875rem 1.5rem;font-size:1rem;font-weight:600;border-radius:8px;border:none;background:var(--color-background-button-primary);color:var(--color-text-button);cursor:pointer;transition:all .2s ease;box-shadow:0 2px 8px rgba(27,152,248,.2)}.login-form button[type=submit]:hover{box-shadow:0 4px 12px rgba(27,152,248,.3);background:#06c}.login-form button[type=submit]:active{transform:translateY(0);box-shadow:0 2px 4px rgba(27,152,248,.2)}.content-main>ul.messages{position:fixed;top:120px;left:50%;transform:translateX(-50%);list-style:none;padding:0;margin:0;z-index:1000;min-width:300px;max-width:500px}.content-main>ul.messages li{padding:1rem 1.25rem;border-radius:8px;font-size:.95rem;line-height:1.5;font-weight:500;box-shadow:0 4px 12px rgba(0,0,0,.15);animation:slideDown .3s ease-out;border:2px solid rgba(0,0,0,0)}.content-main>ul.messages li.error{background:#fee;border:2px solid #ef4444;color:#991b1b;font-weight:600}.content-main>ul.messages li.success{background:#f0fdf4;border:2px solid #10b981;color:#166534}.content-main>ul.messages li.info,.content-main>ul.messages li.message{background:#eff6ff;border:2px solid #3b82f6;color:#1e40af}@keyframes slideDown{from{opacity:0;transform:translateY(-20px)}to{opacity:1;transform:translateY(0)}}html[data-darkmode=true] .login-form .inner{box-shadow:0 10px 40px rgba(0,0,0,.4),0 2px 8px rgba(0,0,0,.2)}html[data-darkmode=true] .login-form .inner:hover{box-shadow:0 15px 50px rgba(0,0,0,.5),0 5px 15px rgba(0,0,0,.3)}html[data-darkmode=true] .login-form input[type=password]{border-color:var(--color-grey-400)}html[data-darkmode=true] .login-form input[type=password]:focus{border-color:var(--color-link)}html[data-darkmode=true] .content-main>ul.messages li{box-shadow:0 4px 12px rgba(0,0,0,.4)}html[data-darkmode=true] .content-main>ul.messages li.error{background:#4a1d1d;border-color:#ef4444;color:#fca5a5}html[data-darkmode=true] .content-main>ul.messages li.success{background:#1a3a2a;border-color:#10b981;color:#86efac}html[data-darkmode=true] .content-main>ul.messages li.info,html[data-darkmode=true] .content-main>ul.messages li.message{background:#1e3a5f;border-color:#3b82f6;color:#93c5fd}@media only screen and (max-width: 768px){.login-form{min-height:auto;padding:1rem .5rem;padding-top:5rem}.login-form .inner{padding:2rem 1.5rem;border-radius:12px}.content-main>ul.messages{top:70px;left:10px;right:10px;transform:none;min-width:auto}}body.wrapped-tabs .tabs ul{grid-template-columns:repeat(auto-fill, minmax(var(--tab-width, 180px), 1fr));grid-auto-flow:row;grid-auto-columns:unset;gap:0;column-gap:5px}body.wrapped-tabs .tabs ul li{border-radius:0}.tabs ul{margin:0px;padding:0px;display:grid;grid-auto-flow:column;grid-auto-columns:max-content;gap:5px;list-style:none}.tabs ul li{white-space:nowrap;color:var(--color-text-tab);border-top-left-radius:5px;border-top-right-radius:5px;background-color:var(--color-background-tab)}.tabs ul li:not(.active):hover{background-color:var(--color-background-tab-hover)}.tabs ul li.active,.tabs ul li :target{background-color:var(--color-background)}.tabs ul li.active a,.tabs ul li :target a{color:var(--color-text-tab-active);font-weight:bold}.tabs ul li a{display:block;padding:.7em;color:var(--color-text-tab)}body,.pure-table,.pure-table thead,.pure-table td,.pure-table th,.pure-form input,.pure-form textarea,.pure-form select,.edit-form .inner,.pure-menu-horizontal,footer,.sticky-tab,#diff-jump,.button-tag,#new-watch-form,#new-watch-form input:not(.pure-button),code,.messages li,#checkbox-operations,.inline-warning,a,.watch-controls img{transition:color .4s ease,background-color .4s ease,background .4s ease,border-color .4s ease,box-shadow .4s ease}body{color:var(--color-text);background:var(--color-background-page);font-family:Helvetica Neue,Helvetica,Lucida Grande,Arial,Ubuntu,Cantarell,Fira Sans,sans-serif}.visually-hidden{clip:rect(0 0 0 0);clip-path:inset(50%);height:1px;overflow:hidden;position:absolute;white-space:nowrap;width:1px}.status-icon{display:inline-block;height:1rem;vertical-align:middle}.pure-table-even{background:var(--color-background)}a{text-decoration:none;color:var(--color-link)}a.github-link{color:var(--color-icon-github);margin:0 1rem 0 .5rem}a.github-link svg{fill:currentColor}a.github-link:hover{color:var(--color-icon-github-hover)}#search-result-info{color:#fff}button.toggle-button{vertical-align:middle;background:rgba(0,0,0,0);border:none;cursor:pointer;color:var(--color-icon-github)}button.toggle-button:hover{color:var(--color-icon-github-hover)}button.toggle-button svg{fill:currentColor}button.toggle-button .icon-light{display:block}.pure-menu-horizontal{background:var(--color-background);padding:5px;display:flex;justify-content:space-between;align-items:center}#pure-menu-horizontal-spinner{height:3px;background:linear-gradient(-75deg, #ff6000, #ff8f00, #ffdd00, #ed0000);background-size:400% 400%;width:100%;animation:gradient 200s ease infinite}body.spinner-active #pure-menu-horizontal-spinner{animation:gradient 1s ease infinite}@keyframes gradient{0%{background-position:0% 50%}50%{background-position:100% 50%}100%{background-position:0% 50%}}.pure-menu-heading{color:var(--color-text-menu-heading)}.pure-menu-link{color:var(--color-text-menu-link)}.pure-menu-link:hover{background-color:var(--color-background-menu-link-hover);color:var(--color-text-menu-link-hover)}.tab-pane-inner{scroll-margin-top:200px}section.content{padding-bottom:1em;flex-direction:column;display:flex;align-items:center;justify-content:center}@media only screen and (max-width: 980px){section.content{padding-top:80px}}@media only screen and (min-width: 980px){section.content{padding-top:100px}}code{background:var(--color-background-code);color:var(--color-text)}.inline-tag,.restock-label,.tracking-ldjson-price-data,.watch-tag-list,.processor-badge{white-space:nowrap;border-radius:5px;padding:2px 5px;margin-right:4px;line-height:1.2rem}.processor-badge{font-weight:900}.watch-tag-list{color:var(--color-white);background:var(--color-text-watch-tag-list);text-decoration:none}.watch-tag-list:hover{text-decoration:none;opacity:.8;cursor:pointer}.watch-tag-list:visited{color:var(--color-white)}@media(min-width: 768px){.box{margin:0 1em !important}}.box{max-width:100%;margin:0 .3em;flex-direction:column;display:flex;justify-content:center}body:after{content:"";background:linear-gradient(130deg, var(--color-background-gradient-first), var(--color-background-gradient-second) 41.07%, var(--color-background-gradient-third) 84.05%)}body:after,body:before{display:block;height:650px;position:absolute;top:0;left:0;width:100%;z-index:-1}body::after{opacity:.91}body::before{content:""}body:after,body:before{-webkit-clip-path:polygon(100% 0, 0 0, 0 77.5%, 1% 77.4%, 2% 77.1%, 3% 76.6%, 4% 75.9%, 5% 75.05%, 6% 74.05%, 7% 72.95%, 8% 71.75%, 9% 70.55%, 10% 69.3%, 11% 68.05%, 12% 66.9%, 13% 65.8%, 14% 64.8%, 15% 64%, 16% 63.35%, 17% 62.85%, 18% 62.6%, 19% 62.5%, 20% 62.65%, 21% 63%, 22% 63.5%, 23% 64.2%, 24% 65.1%, 25% 66.1%, 26% 67.2%, 27% 68.4%, 28% 69.65%, 29% 70.9%, 30% 72.15%, 31% 73.3%, 32% 74.35%, 33% 75.3%, 34% 76.1%, 35% 76.75%, 36% 77.2%, 37% 77.45%, 38% 77.5%, 39% 77.3%, 40% 76.95%, 41% 76.4%, 42% 75.65%, 43% 74.75%, 44% 73.75%, 45% 72.6%, 46% 71.4%, 47% 70.15%, 48% 68.9%, 49% 67.7%, 50% 66.55%, 51% 65.5%, 52% 64.55%, 53% 63.75%, 54% 63.15%, 55% 62.75%, 56% 62.55%, 57% 62.5%, 58% 62.7%, 59% 63.1%, 60% 63.7%, 61% 64.45%, 62% 65.4%, 63% 66.45%, 64% 67.6%, 65% 68.8%, 66% 70.05%, 67% 71.3%, 68% 72.5%, 69% 73.6%, 70% 74.65%, 71% 75.55%, 72% 76.35%, 73% 76.9%, 74% 77.3%, 75% 77.5%, 76% 77.45%, 77% 77.25%, 78% 76.8%, 79% 76.2%, 80% 75.4%, 81% 74.45%, 82% 73.4%, 83% 72.25%, 84% 71.05%, 85% 69.8%, 86% 68.55%, 87% 67.35%, 88% 66.2%, 89% 65.2%, 90% 64.3%, 91% 63.55%, 92% 63%, 93% 62.65%, 94% 62.5%, 95% 62.55%, 96% 62.8%, 97% 63.3%, 98% 63.9%, 99% 64.75%, 100% 65.7%);clip-path:polygon(100% 0, 0 0, 0 77.5%, 1% 77.4%, 2% 77.1%, 3% 76.6%, 4% 75.9%, 5% 75.05%, 6% 74.05%, 7% 72.95%, 8% 71.75%, 9% 70.55%, 10% 69.3%, 11% 68.05%, 12% 66.9%, 13% 65.8%, 14% 64.8%, 15% 64%, 16% 63.35%, 17% 62.85%, 18% 62.6%, 19% 62.5%, 20% 62.65%, 21% 63%, 22% 63.5%, 23% 64.2%, 24% 65.1%, 25% 66.1%, 26% 67.2%, 27% 68.4%, 28% 69.65%, 29% 70.9%, 30% 72.15%, 31% 73.3%, 32% 74.35%, 33% 75.3%, 34% 76.1%, 35% 76.75%, 36% 77.2%, 37% 77.45%, 38% 77.5%, 39% 77.3%, 40% 76.95%, 41% 76.4%, 42% 75.65%, 43% 74.75%, 44% 73.75%, 45% 72.6%, 46% 71.4%, 47% 70.15%, 48% 68.9%, 49% 67.7%, 50% 66.55%, 51% 65.5%, 52% 64.55%, 53% 63.75%, 54% 63.15%, 55% 62.75%, 56% 62.55%, 57% 62.5%, 58% 62.7%, 59% 63.1%, 60% 63.7%, 61% 64.45%, 62% 65.4%, 63% 66.45%, 64% 67.6%, 65% 68.8%, 66% 70.05%, 67% 71.3%, 68% 72.5%, 69% 73.6%, 70% 74.65%, 71% 75.55%, 72% 76.35%, 73% 76.9%, 74% 77.3%, 75% 77.5%, 76% 77.45%, 77% 77.25%, 78% 76.8%, 79% 76.2%, 80% 75.4%, 81% 74.45%, 82% 73.4%, 83% 72.25%, 84% 71.05%, 85% 69.8%, 86% 68.55%, 87% 67.35%, 88% 66.2%, 89% 65.2%, 90% 64.3%, 91% 63.55%, 92% 63%, 93% 62.65%, 94% 62.5%, 95% 62.55%, 96% 62.8%, 97% 63.3%, 98% 63.9%, 99% 64.75%, 100% 65.7%)}.button-small{font-size:85%}.button-xsmall{font-size:70%}.fetch-error{padding-top:1em;font-size:80%;max-width:400px;display:block}.pure-button-primary,a.pure-button-primary,.pure-button-selected,a.pure-button-selected{background-color:var(--color-background-button-primary)}.button-secondary{color:var(--color-text-button);border-radius:4px;text-shadow:0 1px 1px rgba(0,0,0,.2)}.button-success{background:var(--color-background-button-success)}.button-tag{background:var(--color-background-button-tag);color:var(--color-text-button);font-size:65%;border-bottom-left-radius:initial;border-bottom-right-radius:initial;margin-right:4px}.button-tag.active{background:var(--color-background-button-tag-active);font-weight:bold}.button-error{background:var(--color-background-button-error);color:var(--color-text-button-error)}.button-warning{background:var(--color-background-button-warning);color:var(--color-text-button-warning)}.button-secondary{background:var(--color-background-button-secondary)}.button-cancel{background:var(--color-background-button-cancel)}.messages li{list-style:none;padding:1em;border-radius:10px;color:var(--color-text-messages);font-weight:bold}.messages li.message{background:var(--color-background-messages-message)}.messages li.error{background:var(--color-background-messages-error)}.messages li.notice{background:var(--color-background-messages-notice)}.messages.with-share-link>*:hover{cursor:pointer}.notifications-wrapper{padding-top:.5rem}.notifications-wrapper #notification-test-log{margin-top:1rem;padding:1rem;white-space:pre-wrap;word-break:break-word;overflow-wrap:break-word;max-width:100%;box-sizing:border-box;max-height:12rem;overflow-y:scroll;border:1px solid var(--color-border-notification);border-radius:5px}label:hover{cursor:pointer}.grey-form-border{border:1px solid var(--color-border-notification);padding:.5rem;border-radius:5px}#notification-error-log{border:1px solid var(--color-border-notification);padding:1rem;border-radius:5px;overflow-wrap:break-word}#token-table.pure-table td,#token-table.pure-table th{font-size:80%}.pure-form input[type=text].transparent-field{background-color:var(--color-background-new-watch-input-transparent) !important;color:var(--color-white) !important;border:1px solid hsla(0,0%,100%,.2) !important;box-shadow:none !important;-webkit-box-shadow:none !important}.pure-form input[type=text].transparent-field::placeholder{opacity:.5;color:hsla(0,0%,100%,.7);font-weight:lighter}#new-watch-form{background:var(--color-background-new-watch-form);padding:1em;border-radius:10px;margin-bottom:1em;max-width:100%}#new-watch-form #url::placeholder{font-weight:bold}#new-watch-form input{display:inline-block;margin-bottom:5px}#new-watch-form input:not(.pure-button){background-color:var(--color-background-new-watch-input);color:var(--color-text-new-watch-input)}#new-watch-form .label{display:none}#new-watch-form legend{color:var(--color-text-legend);font-weight:bold}@media only screen and (min-width: 760px){#new-watch-form #watch-add-wrapper-zone{display:flex;gap:.3rem;flex-direction:row;min-width:70vw}}#new-watch-form #watch-add-wrapper-zone>span{flex-grow:0}#new-watch-form #watch-add-wrapper-zone>span input{width:100%;padding-right:1em}#new-watch-form #watch-add-wrapper-zone>span:first-child{flex-grow:1}@media only screen and (max-width: 760px){#new-watch-form #watch-add-wrapper-zone #url{width:100%}}#new-watch-form #watch-group-tag{font-size:.9rem;padding:.3rem;display:flex;align-items:center;gap:.5rem;color:var(--color-white)}#new-watch-form #watch-group-tag label,#new-watch-form #watch-group-tag input{margin:0}#new-watch-form #watch-group-tag input{flex:1}#diff-col{padding-left:40px}#diff-jump{position:fixed;left:0px;top:120px;background:var(--color-background);padding:10px;border-top-right-radius:5px;border-bottom-right-radius:5px;box-shadow:1px 1px 4px var(--color-shadow-jump)}#diff-jump a{color:var(--color-link);cursor:pointer;-moz-user-select:none;-webkit-user-select:none;-ms-user-select:none;user-select:none;-o-user-select:none}footer{padding:10px;background:var(--color-background);color:var(--color-text-footer);text-align:center}#feed-icon{vertical-align:middle}.sticky-tab{position:absolute;top:60px;font-size:65%;background:var(--color-background);padding:10px}@media only screen and (max-width: 980px){.sticky-tab{display:none}}.sticky-tab#left-sticky{left:0;position:fixed;border-top-right-radius:5px;border-bottom-right-radius:5px;box-shadow:1px 1px 4px var(--color-shadow-jump)}.sticky-tab#right-sticky{right:0px}.sticky-tab#hosted-sticky{right:0px;top:100px;font-weight:bold}#new-version-text a{color:var(--color-link-new-version)}.watch-controls{color:#f8321b}.watch-controls .state-on img{opacity:.8}.watch-controls img{opacity:.2}.watch-controls img:hover{transition:opacity .3s;opacity:.8}.monospaced-textarea textarea{width:100%;font-family:monospace;white-space:pre;overflow-wrap:normal;overflow-x:auto}.pure-form fieldset{padding-top:0px}.pure-form fieldset ul{padding-bottom:0px;margin-bottom:0px}.pure-form .pure-control-group,.pure-form .pure-group,.pure-form .pure-controls{padding-bottom:1em}.pure-form .pure-control-group div,.pure-form .pure-group div,.pure-form .pure-controls div{margin:0px}.pure-form .pure-control-group .checkbox>*,.pure-form .pure-group .checkbox>*,.pure-form .pure-controls .checkbox>*{display:inline;vertical-align:middle}.pure-form .pure-control-group .checkbox>label,.pure-form .pure-group .checkbox>label,.pure-form .pure-controls .checkbox>label{padding-left:5px}.pure-form .pure-control-group legend,.pure-form .pure-group legend,.pure-form .pure-controls legend{color:var(--color-text-legend)}.pure-form .error input{background-color:var(--color-error-input)}.pure-form ul.errors{padding:.5em .6em;border:1px solid var(--color-error-list);border-radius:4px;vertical-align:middle;-webkit-box-sizing:border-box;box-sizing:border-box}.pure-form ul.errors li{margin-left:1em;color:var(--color-error-list)}.pure-form label{font-weight:bold}.pure-form textarea{width:100%}.pure-form .inline-radio ul{margin:0px;list-style:none}.pure-form .inline-radio ul li{display:flex;align-items:center;gap:1em}@media only screen and (max-width: 760px),(min-device-width: 768px)and (max-device-width: 980px){.edit-form{padding:.5em;margin:0}#nav-menu{overflow-x:scroll}}@media only screen and (max-width: 760px),(min-device-width: 768px)and (max-device-width: 980px){input[type=text]{width:100%}}.pure-table{border-color:var(--color-border-table-cell)}.pure-table thead{background-color:var(--color-background-table-thead);color:var(--color-text);border-bottom:1px solid var(--color-background-table-thead)}.pure-table td,.pure-table th{border-left-color:var(--color-border-table-cell)}.pure-table-striped tr:nth-child(2n-1) td{background-color:var(--color-table-stripe)}.pure-form input[type=color],.pure-form input[type=date],.pure-form input[type=datetime-local],.pure-form input[type=datetime],.pure-form input[type=email],.pure-form input[type=month],.pure-form input[type=number],.pure-form input[type=password],.pure-form input[type=search],.pure-form input[type=tel],.pure-form input[type=text],.pure-form input[type=time],.pure-form input[type=url],.pure-form input[type=week],.pure-form select,.pure-form textarea{border:var(--color-border-input);box-shadow:inset 0 1px 3px var(--color-shadow-input);background-color:var(--color-background-input);color:var(--color-text-input)}.pure-form input[type=color]:active,.pure-form input[type=date]:active,.pure-form input[type=datetime-local]:active,.pure-form input[type=datetime]:active,.pure-form input[type=email]:active,.pure-form input[type=month]:active,.pure-form input[type=number]:active,.pure-form input[type=password]:active,.pure-form input[type=search]:active,.pure-form input[type=tel]:active,.pure-form input[type=text]:active,.pure-form input[type=time]:active,.pure-form input[type=url]:active,.pure-form input[type=week]:active,.pure-form select:active,.pure-form textarea:active{background-color:var(--color-background-input)}input::placeholder,textarea::placeholder{color:var(--color-text-input-placeholder)}.m-d{min-width:100%}@media only screen and (min-width: 761px){.m-d{min-width:80%}}.pure-form-stacked>div:first-child{display:block}.tab-pane-inner{padding:0px}.tab-pane-inner:not(:target){display:none}.tab-pane-inner:target{display:block}.beta-logo{height:50px;right:-3px;top:-3px;position:absolute}#selector-header{padding-bottom:1em}body.full-width .edit-form{width:95%}.edit-form{min-width:70%;max-width:95%}.edit-form .box-wrap{position:relative}.edit-form .inner{background:var(--color-background);padding:20px}.edit-form #actions{display:block;background:var(--color-background)}.edit-form #actions .pure-control-group{display:flex;gap:.625em;flex-wrap:wrap}.edit-form .pure-form-message-inline{padding-left:0;color:var(--color-text-input-description)}.edit-form .pure-form-message-inline code{font-size:.875em}.border-fieldset{border:1px solid #ccc;padding:1rem;border-radius:5px;margin-bottom:1rem}.border-fieldset h3{margin-top:0}.border-fieldset fieldset:last-of-type{padding-bottom:0}.border-fieldset fieldset:last-of-type .pure-control-group{padding-bottom:0}ul{padding-left:1em;padding-top:0px;margin-top:4px}.time-check-widget tr{display:inline}.time-check-widget tr input[type=number]{width:5em}@media only screen and (max-width: 760px){.time-check-widget tbody{display:grid;grid-template-columns:auto 1fr auto 1fr;gap:.625em .3125em;align-items:center}.time-check-widget tr{display:contents}.time-check-widget tr th{text-align:right;padding-right:5px}.time-check-widget tr input[type=number]{width:100%;max-width:5em}}#webdriver_delay{width:5em}#api-key:hover{cursor:pointer}#api-key-copy{color:var(--color-api-key)}.button-green{background-color:var(--color-background-button-green)}.button-red{background-color:var(--color-background-button-red)}.noselect{-webkit-touch-callout:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}#checkbox-operations{background:var(--color-background-checkbox-operations);padding:1em;border-radius:10px;margin-bottom:1em;display:none}#checkbox-operations button{margin-bottom:3px;margin-top:3px;display:inline-flex;align-items:center}.checkbox-uuid>*{vertical-align:middle}.inline-warning{border:1px solid var(--color-border-warning);padding:.5rem;border-radius:5px;color:var(--color-warning)}.inline-warning>span{display:inline-block;vertical-align:middle}.inline-warning img.inline-warning-icon{display:inline;height:26px;vertical-align:middle}.tracking-ldjson-price-data{background-color:var(--color-background-button-green);color:#000;opacity:.6}.ldjson-price-track-offer{font-weight:bold;font-style:italic}.ldjson-price-track-offer a.pure-button{border-radius:3px;padding:3px;background-color:var(--color-background-button-green)}.price-follow-tag-icon{display:inline-block;height:.8rem;vertical-align:middle}#quick-watch-processor-type ul#processor{color:#fff;padding-left:0px}#quick-watch-processor-type ul#processor li{list-style:none;font-size:.9rem;display:grid;grid-template-columns:auto 1fr;align-items:center;gap:.5rem;margin-bottom:.5rem}#quick-watch-processor-type label,#quick-watch-processor-type input{padding:0;margin:0}.restock-label.in-stock{background-color:var(--color-background-button-green);color:#fff}.restock-label.not-in-stock{background-color:var(--color-background-button-cancel);color:#777}.restock-label.error{background-color:var(--color-background-button-error);color:#fff;opacity:.7}.restock-label svg{vertical-align:middle}#chrome-extension-link{padding:9px;border:1px solid var(--color-grey-800);border-radius:10px;vertical-align:middle}#chrome-extension-link img{height:21px;padding:2px;vertical-align:middle}#realtime-conn-error{position:fixed;bottom:0;left:0;background:var(--color-warning);padding:10px;font-size:.8rem;color:#fff;opacity:.8}#bottom-horizontal-offscreen{position:fixed;bottom:0;left:0;right:0;width:100%;min-height:50px;max-height:50vh;background:hsla(0,0%,100%,.7215686275);border-top:1px solid var(--color-border-table-cell);padding:10px;box-shadow:0 -2px 10px rgba(0,0,0,.2);z-index:100;overflow-y:auto;transition:opacity .3s ease-in-out;scroll-margin-bottom:10px;display:flex;justify-content:center;align-items:center}ul#highlightSnippetActions{list-style:none}ul#highlightSnippetActions li{display:inline-block}


================================================
FILE: changedetectionio/store/__init__.py
================================================
import shutil

from changedetectionio.strtobool import strtobool

from changedetectionio.validate_url import is_safe_valid_url

from flask import (
    flash
)
from flask_babel import gettext

from ..model import App, Watch
from copy import deepcopy
from os import path, unlink
import json
import os
import re
import secrets
import sys
import time
import uuid as uuid_builder
from loguru import logger
from blinker import signal

from ..model.Tags import TagsDict

# Try to import orjson for faster JSON serialization
try:
    import orjson

    HAS_ORJSON = True
except ImportError:
    HAS_ORJSON = False

from ..processors import get_custom_watch_obj_for_processor

# Import the base class and helpers
from .file_saving_datastore import FileSavingDataStore, load_all_watches, load_all_tags, save_json_atomic
from .updates import DatastoreUpdatesMixin

# Because the server will run as a daemon and wont know the URL for notification links when firing off a notification
BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'

dictfilt = lambda x, y: dict([(i, x[i]) for i in x if i in set(y)])


# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
# Open a github issue if you know something :)
# https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change
class ChangeDetectionStore(DatastoreUpdatesMixin, FileSavingDataStore):
    __version_check = True

    def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
        # Initialize parent class
        super().__init__()

        # Should only be active for docker
        # logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
        self.datastore_path = datastore_path
        self.start_time = time.time()
        self.save_version_copy_json_db(version_tag)
        self.reload_state(datastore_path=datastore_path, include_default_watches=include_default_watches, version_tag=version_tag)

    def save_version_copy_json_db(self, version_tag):
        """
        Create version-tagged backup of changedetection.json.

        This is called on version upgrades to preserve a backup in case
        the new version has issues.
        """
        import re

        version_text = re.sub(r'\D+', '-', version_tag)
        db_path = os.path.join(self.datastore_path, "changedetection.json")
        db_path_version_backup = os.path.join(self.datastore_path, f"changedetection-{version_text}.json")

        if not os.path.isfile(db_path_version_backup) and os.path.isfile(db_path):
            from shutil import copyfile
            logger.info(f"Backing up changedetection.json due to new version to '{db_path_version_backup}'.")
            copyfile(db_path, db_path_version_backup)

    def _load_settings(self, filename="changedetection.json"):
        """
        Load settings from storage.

        File backend implementation: reads from changedetection.json

        Returns:
            dict: Settings data loaded from storage
        """
        changedetection_json = os.path.join(self.datastore_path, filename)

        logger.info(f"Loading settings from {changedetection_json}")

        if HAS_ORJSON:
            with open(changedetection_json, 'rb') as f:
                return orjson.loads(f.read())
        else:
            with open(changedetection_json, 'r', encoding='utf-8') as f:
                return json.load(f)

    def _apply_settings(self, settings_data):
        """
        Apply loaded settings data to internal data structure.

        Args:
            settings_data: Dictionary loaded from changedetection.json
        """
        # Apply top-level fields
        if 'app_guid' in settings_data:
            self.__data['app_guid'] = settings_data['app_guid']
        if 'build_sha' in settings_data:
            self.__data['build_sha'] = settings_data['build_sha']
        if 'version_tag' in settings_data:
            self.__data['version_tag'] = settings_data['version_tag']

        # Apply settings sections
        if 'settings' in settings_data:
            if 'headers' in settings_data['settings']:
                self.__data['settings']['headers'].update(settings_data['settings']['headers'])
            if 'requests' in settings_data['settings']:
                self.__data['settings']['requests'].update(settings_data['settings']['requests'])
            if 'application' in settings_data['settings']:
                self.__data['settings']['application'].update(settings_data['settings']['application'])

                # Use our Tags dict with cleanup helpers etc
                # @todo Same for Watches
                existing_tags = settings_data.get('settings', {}).get('application', {}).get('tags') or {}
                self.__data['settings']['application']['tags'] = TagsDict(existing_tags, datastore_path=self.datastore_path)

        # More or less for the old format which had this data in the one url-watches.json
        # cant hurt to leave it here,
        if 'watching' in settings_data:
            self.__data['watching'].update(settings_data['watching'])

    def _rehydrate_tags(self):
        """Rehydrate tag entities from stored data into Tag objects with restock_diff processor."""
        from ..model import Tag

        for uuid, tag in self.__data['settings']['application']['tags'].items():
            # Force processor to restock_diff for override functionality (technical debt)
            tag['processor'] = 'restock_diff'

            self.__data['settings']['application']['tags'][uuid] = Tag.model(
                datastore_path=self.datastore_path,
                __datastore=self.__data,
                default=tag
            )
            logger.info(f"Tag: {uuid} {tag['title']}")

    def _rehydrate_watches(self):
        """Rehydrate watch entities from stored data (converts dicts to Watch objects)."""
        watch_count = len(self.__data.get('watching', {}))
        if watch_count == 0:
            return

        logger.info(f"Rehydrating {watch_count} watches...")
        watching_rehydrated = {}
        for uuid, watch_dict in self.__data.get('watching', {}).items():
            if isinstance(watch_dict, dict):
                watching_rehydrated[uuid] = self.rehydrate_entity(uuid, watch_dict)
            else:
                logger.error(f"Watch UUID {uuid} already rehydrated")

        self.__data['watching'] = watching_rehydrated
        logger.success(f"Rehydrated {watch_count} watches into Watch objects")


    def _load_state(self, main_settings_filename="changedetection.json"):
        """
        Load complete datastore state from storage.

        Orchestrates loading of settings, watches, and tags using polymorphic methods.
        """
        # Load settings
        settings_data = self._load_settings(filename=main_settings_filename)
        self._apply_settings(settings_data)

        # Load watches, scan them from the disk
        self._load_watches()
        self._rehydrate_watches()

        # Load tags from individual tag.json files
        # These will override any tags in settings (migration path)
        self._load_tags()

        # Rehydrate any remaining tags from settings (legacy/fallback)
        self._rehydrate_tags()

    def reload_state(self, datastore_path, include_default_watches, version_tag):
        """
        Load datastore from storage or create new one.

        Supports two scenarios:
        1. NEW format: changedetection.json exists → load and run updates if needed
        2. EMPTY: No changedetection.json → create new OR trigger migration from legacy

        Note: Legacy url-watches.json migration happens in update_26, not here.
        """
        logger.info(f"Datastore path is '{datastore_path}'")

        # CRITICAL: Update datastore_path (was using old path from __init__)
        self.datastore_path = datastore_path

        # Initialize data structure
        self.__data = App.model(datastore_path=datastore_path)
        self.json_store_path = os.path.join(self.datastore_path, "changedetection.json")

        # Base definition for all watchers (deepcopy part of #569)
        self.generic_definition = deepcopy(Watch.model(datastore_path=datastore_path, __datastore=self.__data, default={}))

        # Load build SHA if available (Docker deployments)
        if path.isfile('changedetectionio/source.txt'):
            with open('changedetectionio/source.txt') as f:
                self.__data['build_sha'] = f.read()

        # Check if datastore already exists
        changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
        changedetection_json_old_schema = os.path.join(self.datastore_path, "url-watches.json")

        if os.path.exists(changedetection_json):
            # Run schema updates if needed
            # Pass current schema version from loaded datastore (defaults to 0 if not set)
            # Load existing datastore (changedetection.json + watch.json files)
            logger.info("Loading existing datastore")
            self._load_state()
            current_schema = self.data['settings']['application'].get('schema_version', 0)
            self.run_updates(current_schema_version=current_schema)

        # Legacy datastore detected - trigger migration, even works if the schema is much before the migration step.
        elif os.path.exists(changedetection_json_old_schema):

            logger.critical(f"Legacy datastore detected at {changedetection_json_old_schema}, loading and running updates")
            self._load_state(main_settings_filename="url-watches.json")
            # update 26 will load the whole old config from disk to __data
            current_schema = self.__data['settings']['application'].get('schema_version', 0)
            self.run_updates(current_schema_version=current_schema)
            # Probably tags were also shifted to disk and many other changes, so best to reload here.
            self._load_state()

        else:
            # No datastore yet - check if this is a fresh install or legacy migration
            self.init_fresh_install(include_default_watches=include_default_watches,
                                    version_tag=version_tag)
            # Maybe they copied a bunch of watch subdirs across too
            self._load_state()

    def init_fresh_install(self, include_default_watches, version_tag):
      # Generate app_guid FIRST (required for all operations)
        if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
            self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
        else:
            self.__data['app_guid'] = str(uuid_builder.uuid4())

        # Generate RSS access token
        self.__data['settings']['application']['rss_access_token'] = secrets.token_hex(16)

        # Generate API access token
        self.__data['settings']['application']['api_access_token'] = secrets.token_hex(16)
        logger.warning(f"No datastore found, creating new datastore at {self.datastore_path}")

        # Set schema version to latest (no updates needed)
        latest_update_available = self.get_updates_available().pop()
        logger.info(f"Marking fresh install to schema version {latest_update_available}")
        self.__data['settings']['application']['schema_version'] = latest_update_available

        # Add default watches if requested
        if include_default_watches:
            self.add_watch(
                url='https://news.ycombinator.com/',
                tag='Tech news',
                extras={'fetch_backend': 'html_requests'}
            )
            self.add_watch(
                url='https://changedetection.io/CHANGELOG.txt',
                tag='changedetection.io',
                extras={'fetch_backend': 'html_requests'}
            )

        # Create changedetection.json immediately
        try:
            self._save_settings()
            logger.info("Created changedetection.json for new datastore")
        except Exception as e:
            logger.error(f"Failed to create initial changedetection.json: {e}")



        # Set version tag
        self.__data['version_tag'] = version_tag

        # Validate proxies.json if it exists
        _ = self.proxy_list  # Just to test parsing

        # Ensure app_guid exists (for datastores loaded from existing files)
        if 'app_guid' not in self.__data:
            if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
                self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
            else:
                self.__data['app_guid'] = str(uuid_builder.uuid4())
            self.commit()

        # Ensure RSS access token exists
        if not self.__data['settings']['application'].get('rss_access_token'):
            secret = secrets.token_hex(16)
            self.__data['settings']['application']['rss_access_token'] = secret
            self.commit()

        # Ensure API access token exists
        if not self.__data['settings']['application'].get('api_access_token'):
            secret = secrets.token_hex(16)
            self.__data['settings']['application']['api_access_token'] = secret
            self.commit()

        # Handle password reset lockfile
        password_reset_lockfile = os.path.join(self.datastore_path, "removepassword.lock")
        if path.isfile(password_reset_lockfile):
            self.remove_password()
            unlink(password_reset_lockfile)

    def rehydrate_entity(self, uuid, entity, processor_override=None):
        """Set the dict back to the dict Watch object"""
        entity['uuid'] = uuid

        if processor_override:
            watch_class = get_custom_watch_obj_for_processor(processor_override)
            entity['processor'] = processor_override
        else:
            watch_class = get_custom_watch_obj_for_processor(entity.get('processor'))

        if entity.get('processor') != 'text_json_diff':
            logger.trace(f"Loading Watch object '{watch_class.__module__}.{watch_class.__name__}' for UUID {uuid}")

        entity = watch_class(datastore_path=self.datastore_path, __datastore=self.__data, default=entity)
        return entity

    # ============================================================================
    # FileSavingDataStore Abstract Method Implementations
    # ============================================================================

    def _watch_exists(self, uuid):
        """Check if watch exists in datastore."""
        return uuid in self.__data['watching']

    def _get_watch_dict(self, uuid):
        """Get watch as dictionary."""
        return dict(self.__data['watching'][uuid])

    def _build_settings_data(self):
        """
        Build settings data structure for saving.

        Tags behavior depends on schema version:
        - Before update_28 (schema < 28): Tags saved in settings for migration
        - After update_28 (schema >= 28): Tags excluded from settings (in individual files)

        Returns:
            dict: Settings data ready for serialization
        """
        import copy

        # Deep copy settings to avoid modifying the original
        settings_copy = copy.deepcopy(self.__data['settings'])

        # Is saved as {uuid}/tag.json
        settings_copy['application']['tags'] = {}

        return {
            'note': 'Settings file - watches are in {uuid}/watch.json, tags are in {uuid}/tag.json',
            'app_guid': self.__data.get('app_guid'),
            'settings': settings_copy,
            'build_sha': self.__data.get('build_sha'),
            'version_tag': self.__data.get('version_tag')
        }

    def _save_settings(self):
        """
        Save settings to storage.

        File backend implementation: saves to changedetection.json
        Implementation of abstract method from FileSavingDataStore.
        Uses the generic save_json_atomic helper.

        Raises:
            OSError: If disk is full or other I/O error
        """
        settings_data = self._build_settings_data()
        changedetection_json = os.path.join(self.datastore_path, "changedetection.json")
        save_json_atomic(changedetection_json, settings_data, label="settings")

    def _load_watches(self):
        """
        Load all watches from storage.

        File backend implementation: reads individual watch.json files
        Implementation of abstract method from FileSavingDataStore.
        Delegates to helper function and stores results in internal data structure.
        """

        # Store loaded data
        # @note this will also work for the old legacy format because self.__data['watching'] should already have them loaded by this point.
        self.__data['watching'].update(load_all_watches(
            self.datastore_path,
            self.rehydrate_entity
        ))
        logger.debug(f"Loaded {len(self.__data['watching'])} watches")

    def _load_tags(self):
        """
        Load all tags from storage.

        File backend implementation: reads individual tag.json files.
        Tags loaded from files override any tags in settings (migration path).
        """
        from ..model import Tag

        def rehydrate_tag(uuid, entity_dict):
            """Rehydrate tag as Tag object with forced restock_diff processor."""
            entity_dict['uuid'] = uuid
            entity_dict['processor'] = 'restock_diff'  # Force processor for override functionality

            return Tag.model(
                datastore_path=self.datastore_path,
                __datastore=self.__data,
                default=entity_dict
            )

        tags = load_all_tags(
            self.datastore_path,
            rehydrate_tag
        )

        # Override settings tags with loaded tags
        # This ensures tag.json files take precedence over settings
        if tags:
            self.__data['settings']['application']['tags'].update(tags)
            logger.info(f"Loaded {len(tags)} tags from individual tag.json files")

    def _delete_watch(self, uuid):
        """
        Delete a watch from storage.

        File backend implementation: deletes entire {uuid}/ directory recursively.
        Implementation of abstract method from FileSavingDataStore.

        Args:
            uuid: Watch UUID to delete
        """
        watch_dir = os.path.join(self.datastore_path, uuid)
        if os.path.exists(watch_dir):
            shutil.rmtree(watch_dir)
            logger.info(f"Deleted watch directory: {watch_dir}")

    # ============================================================================
    # Watch Management Methods
    # ============================================================================

    def set_last_viewed(self, uuid, timestamp):
        logger.debug(f"Setting watch UUID: {uuid} last viewed to {int(timestamp)}")
        self.data['watching'][uuid].update({'last_viewed': int(timestamp)})
        self.data['watching'][uuid].commit()

        watch_check_update = signal('watch_check_update')
        if watch_check_update:
            watch_check_update.send(watch_uuid=uuid)

    def remove_password(self):
        self.__data['settings']['application']['password'] = False
        self.commit()

    def clear_all_last_checksums(self):
        """
        Delete all last-checksum.txt files to force reprocessing of all watches.

        This should be called when global settings change, since watches inherit
        configuration and need to reprocess even if their individual watch dict
        hasn't been modified.

        Note: We delete the checksum file rather than setting was_edited=True because:
        - was_edited is not persisted across restarts
        - File deletion ensures reprocessing works across app restarts
        """
        deleted_count = 0
        for uuid in self.__data['watching'].keys():
            watch = self.__data['watching'][uuid]
            if watch.data_dir:
                checksum_file = os.path.join(watch.data_dir, 'last-checksum.txt')
                if os.path.isfile(checksum_file):
                    try:
                        os.remove(checksum_file)
                        deleted_count += 1
                        logger.debug(f"Cleared checksum for watch {uuid}")
                    except OSError as e:
                        logger.warning(f"Failed to delete checksum file for {uuid}: {e}")

        logger.info(f"Cleared {deleted_count} checksum files to force reprocessing")
        return deleted_count

    def clear_checksums_for_tag(self, tag_uuid):
        """
        Delete last-checksum.txt files for all watches using a specific tag.

        This should be called when a tag configuration is edited, since watches
        inherit tag settings and need to reprocess.

        Args:
            tag_uuid: UUID of the tag that was modified

        Returns:
            int: Number of checksum files deleted
        """
        deleted_count = 0
        for uuid, watch in self.__data['watching'].items():
            if watch.get('tags') and tag_uuid in watch['tags']:
                if watch.data_dir:
                    checksum_file = os.path.join(watch.data_dir, 'last-checksum.txt')
                    if os.path.isfile(checksum_file):
                        try:
                            os.remove(checksum_file)
                            deleted_count += 1
                            logger.debug(f"Cleared checksum for watch {uuid} (tag {tag_uuid})")
                        except OSError as e:
                            logger.warning(f"Failed to delete checksum file for {uuid}: {e}")

        logger.info(f"Cleared {deleted_count} checksum files for tag {tag_uuid}")
        return deleted_count

    def commit(self):
        """
        Save settings immediately to disk using atomic write.

        Uses atomic write pattern (temp file + rename) for crash safety.

        Fire-and-forget: Logs errors but does not raise exceptions.
        Settings data remains in memory even if save fails, so next commit will retry.
        """
        try:
            self._save_settings()
            logger.debug("Committed settings")
        except Exception as e:
            logger.error(f"Failed to commit settings: {e}")

    def update_watch(self, uuid, update_obj):

        # It's possible that the watch could be deleted before update
        if not self.__data['watching'].get(uuid):
            return

        with self.lock:

            # In python 3.9 we have the |= dict operator, but that still will lose data on nested structures...
            for dict_key, d in self.generic_definition.items():
                if isinstance(d, dict):
                    if update_obj is not None and dict_key in update_obj:
                        self.__data['watching'][uuid][dict_key].update(update_obj[dict_key])
                        del (update_obj[dict_key])

            self.__data['watching'][uuid].update(update_obj)

        # Immediate save
        self.__data['watching'][uuid].commit()

    @property
    def threshold_seconds(self):
        seconds = 0
        for m, n in Watch.mtable.items():
            x = self.__data['settings']['requests']['time_between_check'].get(m)
            if x:
                seconds += x * n
        return seconds

    @property
    def unread_changes_count(self):
        unread_changes_count = 0
        for uuid, watch in self.__data['watching'].items():
            if watch.history_n >= 2 and watch.viewed == False:
                unread_changes_count += 1

        return unread_changes_count

    @property
    def data(self):
        # Re #152, Return env base_url if not overriden
        # Re #148 - Some people have just {{ base_url }} in the body or title, but this may break some notification services
        #           like 'Join', so it's always best to atleast set something obvious so that they are not broken.

        active_base_url = BASE_URL_NOT_SET_TEXT
        if self.__data['settings']['application'].get('base_url'):
            active_base_url = self.__data['settings']['application'].get('base_url')
        elif os.getenv('BASE_URL'):
            active_base_url = os.getenv('BASE_URL')

        # I looked at various ways todo the following, but in the end just copying the dict seemed simplest/most reliable
        # even given the memory tradeoff - if you know a better way.. maybe return d|self.__data.. or something
        d = self.__data
        d['settings']['application']['active_base_url'] = active_base_url.strip('" ')
        return d

    # Delete a single watch by UUID
    def delete(self, uuid):
        """
        Delete a watch by UUID.

        Uses abstracted storage method for backend-agnostic deletion.
        Supports 'all' to delete all watches (mainly for testing).

        Args:
            uuid: Watch UUID to delete, or 'all' to delete all watches
        """
        with self.lock:
            if uuid == 'all':
                # Delete all watches - capture UUIDs first before modifying dict
                all_uuids = list(self.__data['watching'].keys())

                for watch_uuid in all_uuids:
                    # Delete from storage using polymorphic method
                    try:
                        self._delete_watch(watch_uuid)
                    except Exception as e:
                        logger.error(f"Failed to delete watch {watch_uuid} from storage: {e}")

                    # Send delete signal
                    watch_delete_signal = signal('watch_deleted')
                    if watch_delete_signal:
                        watch_delete_signal.send(watch_uuid=watch_uuid)

                # Clear the dict
                self.__data['watching'] = {}

                # Mainly used for testing to allow all items to flush before running next test
                time.sleep(1)

            else:
                # Delete single watch from storage using polymorphic method
                try:
                    self._delete_watch(uuid)
                except Exception as e:
                    logger.error(f"Failed to delete watch {uuid} from storage: {e}")

                # Remove from watching dict
                del self.data['watching'][uuid]

                # Send delete signal
                watch_delete_signal = signal('watch_deleted')
                if watch_delete_signal:
                    watch_delete_signal.send(watch_uuid=uuid)

    # Clone a watch by UUID
    def clone(self, uuid):
        url = self.data['watching'][uuid].get('url')
        # No need to deepcopy here - add_watch() will deepcopy extras anyway (line 569)
        # Just pass a dict copy (with lock for thread safety)
        # NOTE: dict() is shallow copy but safe since add_watch() deepcopies it
        with self.lock:
            extras = dict(self.data['watching'][uuid])
        new_uuid = self.add_watch(url=url, extras=extras)
        watch = self.data['watching'][new_uuid]
        return new_uuid

    def url_exists(self, url):

        # Probably their should be dict...
        for watch in self.data['watching'].values():
            if watch['url'].lower() == url.lower():
                return True

        return False

    # Remove a watchs data but keep the entry (URL etc)
    def clear_watch_history(self, uuid):
        self.__data['watching'][uuid].clear_watch()
        self.__data['watching'][uuid].commit()

    def add_watch(self, url, tag='', extras=None, tag_uuids=None, save_immediately=True):

        if extras is None:
            extras = {}

        # Incase these are copied across, assume it's a reference and deepcopy()
        apply_extras = deepcopy(extras)
        apply_extras['tags'] = [] if not apply_extras.get('tags') else apply_extras.get('tags')

        # Was it a share link? try to fetch the data
        if (url.startswith("https://changedetection.io/share/")):
            import requests

            try:
                r = requests.request(method="GET",
                                     url=url,
                                     # So we know to return the JSON instead of the human-friendly "help" page
                                     headers={'App-Guid': self.__data['app_guid']},
                                     timeout=5.0)  # 5 second timeout to prevent blocking
                res = r.json()

                # List of permissible attributes we accept from the wild internet
                for k in [
                    'body',
                    'browser_steps',
                    'css_filter',
                    'extract_text',
                    'headers',
                    'ignore_text',
                    'include_filters',
                    'method',
                    'paused',
                    'previous_md5',
                    'processor',
                    'subtractive_selectors',
                    'tag',
                    'tags',
                    'text_should_not_be_present',
                    'title',
                    'trigger_text',
                    'url',
                    'use_page_title_in_list',
                    'webdriver_js_execute_code',
                ]:
                    if res.get(k):
                        if k != 'css_filter':
                            apply_extras[k] = res[k]
                        else:
                            # We renamed the field and made it a list
                            apply_extras['include_filters'] = [res['css_filter']]

            except Exception as e:
                logger.error(f"Error fetching metadata for shared watch link {url} {str(e)}")
                flash(gettext("Error fetching metadata for {}").format(url), 'error')
                return False

        if not is_safe_valid_url(url):
            from flask import has_request_context
            if has_request_context():
                flash(gettext('Watch protocol is not permitted or invalid URL format'), 'error')
            else:
                logger.error(f"add_watch: URL '{url}' is not permitted or invalid, skipping.")
            return None

        # Check PAGE_WATCH_LIMIT if set
        page_watch_limit = os.getenv('PAGE_WATCH_LIMIT')
        if page_watch_limit:
            try:
                page_watch_limit = int(page_watch_limit)
                current_watch_count = len(self.__data['watching'])
                if current_watch_count >= page_watch_limit:
                    logger.error(f"Watch limit reached: {current_watch_count}/{page_watch_limit} watches. Cannot add {url}")
                    flash(gettext("Watch limit reached ({}/{} watches). Cannot add more watches.").format(current_watch_count, page_watch_limit), 'error')
                    return None
            except ValueError:
                logger.warning(f"Invalid PAGE_WATCH_LIMIT value: {page_watch_limit}, ignoring limit check")

        if tag and type(tag) == str:
            # Then it's probably a string of the actual tag by name, split and add it
            for t in tag.split(','):
                # for each stripped tag, add tag as UUID
                for a_t in t.split(','):
                    tag_uuid = self.add_tag(a_t)
                    apply_extras['tags'].append(tag_uuid)

        # Or if UUIDs given directly
        if tag_uuids:
            for t in tag_uuids:
                apply_extras['tags'] = list(set(apply_extras['tags'] + [t.strip()]))

        # Make any uuids unique
        if apply_extras.get('tags'):
            apply_extras['tags'] = list(set(apply_extras.get('tags')))

        # If the processor also has its own Watch implementation
        watch_class = get_custom_watch_obj_for_processor(apply_extras.get('processor'))
        new_watch = watch_class(datastore_path=self.datastore_path, __datastore=self.__data, url=url)

        new_uuid = new_watch.get('uuid')

        logger.debug(f"Adding URL '{url}' - {new_uuid}")

        for k in ['uuid', 'history', 'last_checked', 'last_changed', 'newest_history_key', 'previous_md5', 'viewed']:
            if k in apply_extras:
                del apply_extras[k]

        if not apply_extras.get('date_created'):
            apply_extras['date_created'] = int(time.time())

        new_watch.update(apply_extras)
        new_watch.ensure_data_dir_exists()
        self.__data['watching'][new_uuid] = new_watch

        if save_immediately:
            # Save immediately using commit
            new_watch.commit()
            logger.debug(f"Saved new watch {new_uuid}")

        logger.debug(f"Added '{url}'")

        return new_uuid

    def _watch_resource_exists(self, watch_uuid, resource_name):
        """
        Check if a watch-related resource exists.

        File backend implementation: checks if file exists in watch directory.

        Args:
            watch_uuid: Watch UUID
            resource_name: Name of resource (e.g., "last-screenshot.png")

        Returns:
            bool: True if resource exists
        """
        resource_path = os.path.join(self.datastore_path, watch_uuid, resource_name)
        return path.isfile(resource_path)

    def visualselector_data_is_ready(self, watch_uuid):
        """
        Check if visual selector data (screenshot + elements) is ready.

        Returns:
            bool: True if both screenshot and elements data exist
        """
        has_screenshot = self._watch_resource_exists(watch_uuid, "last-screenshot.png")
        has_elements = self._watch_resource_exists(watch_uuid, "elements.deflate")
        return has_screenshot and has_elements

    # Old sync_to_json and save_datastore methods removed - now handled by FileSavingDataStore parent class

    @property
    def proxy_list(self):
        proxy_list = {}
        proxy_list_file = os.path.join(self.datastore_path, 'proxies.json')

        # Load from external config file
        if path.isfile(proxy_list_file):
            if HAS_ORJSON:
                # orjson.loads() expects UTF-8 encoded bytes #3611
                with open(os.path.join(self.datastore_path, "proxies.json"), 'rb') as f:
                    proxy_list = orjson.loads(f.read())
            else:
                with open(os.path.join(self.datastore_path, "proxies.json"), encoding='utf-8') as f:
                    proxy_list = json.load(f)

        # Mapping from UI config if available
        extras = self.data['settings']['requests'].get('extra_proxies')
        if extras:
            i = 0
            for proxy in extras:
                i += 0
                if proxy.get('proxy_name') and proxy.get('proxy_url'):
                    k = "ui-" + str(i) + proxy.get('proxy_name')
                    proxy_list[k] = {'label': proxy.get('proxy_name'), 'url': proxy.get('proxy_url')}

        if proxy_list and strtobool(os.getenv('ENABLE_NO_PROXY_OPTION', 'True')):
            proxy_list["no-proxy"] = {'label': "No proxy", 'url': ''}

        return proxy_list if len(proxy_list) else None

    def get_preferred_proxy_for_watch(self, uuid):
        """
        Returns the preferred proxy by ID key
        :param uuid: UUID
        :return: proxy "key" id
        """

        if self.proxy_list is None:
            return None

        # If it's a valid one
        watch = self.data['watching'].get(uuid)

        if strtobool(os.getenv('ENABLE_NO_PROXY_OPTION', 'True')) and watch.get('proxy') == "no-proxy":
            return None

        if watch.get('proxy') and watch.get('proxy') in list(self.proxy_list.keys()):
            return watch.get('proxy')

        # not valid (including None), try the system one
        else:
            system_proxy_id = self.data['settings']['requests'].get('proxy')
            # Is not None and exists
            if self.proxy_list.get(system_proxy_id):
                return system_proxy_id

        # Fallback - Did not resolve anything, or doesnt exist, use the first available
        if system_proxy_id is None or not self.proxy_list.get(system_proxy_id):
            first_default = list(self.proxy_list)[0]
            return first_default

        return None

    @property
    def has_extra_headers_file(self):
        filepath = os.path.join(self.datastore_path, 'headers.txt')
        return os.path.isfile(filepath)

    def get_all_base_headers(self):
        headers = {}
        # Global app settings
        headers.update(self.data['settings'].get('headers', {}))

        return headers

    def get_all_headers_in_textfile_for_watch(self, uuid):
        from ..model.App import parse_headers_from_text_file
        headers = {}

        # Global in /datastore/headers.txt
        filepath = os.path.join(self.datastore_path, 'headers.txt')
        try:
            if os.path.isfile(filepath):
                headers.update(parse_headers_from_text_file(filepath))
        except Exception as e:
            logger.error(f"ERROR reading headers.txt at {filepath} {str(e)}")

        watch = self.data['watching'].get(uuid)
        if watch:

            # In /datastore/xyz-xyz/headers.txt
            filepath = os.path.join(watch.data_dir, 'headers.txt')
            try:
                if os.path.isfile(filepath):
                    headers.update(parse_headers_from_text_file(filepath))
            except Exception as e:
                logger.error(f"ERROR reading headers.txt at {filepath} {str(e)}")

            # In /datastore/tag-name.txt
            tags = self.get_all_tags_for_watch(uuid=uuid)
            for tag_uuid, tag in tags.items():
                fname = "headers-" + re.sub(r'[\W_]', '', tag.get('title')).lower().strip() + ".txt"
                filepath = os.path.join(self.datastore_path, fname)
                try:
                    if os.path.isfile(filepath):
                        headers.update(parse_headers_from_text_file(filepath))
                except Exception as e:
                    logger.error(f"ERROR reading headers.txt at {filepath} {str(e)}")

        return headers

    def get_tag_overrides_for_watch(self, uuid, attr):
        tags = self.get_all_tags_for_watch(uuid=uuid)
        ret = []

        if tags:
            for tag_uuid, tag in tags.items():
                if attr in tag and tag[attr]:
                    ret = [*ret, *tag[attr]]

        return ret

    def add_tag(self, title):
        # If name exists, return that
        n = title.strip().lower()
        logger.debug(f">>> Adding new tag - '{n}'")
        if not n:
            return False

        for uuid, tag in self.__data['settings']['application'].get('tags', {}).items():
            if n == tag.get('title', '').lower().strip():
                logger.warning(f"Tag '{title}' already exists, skipping creation.")
                return uuid

        # Eventually almost everything todo with a watch will apply as a Tag
        # So we use the same model as a Watch
        with self.lock:
            from ..model import Tag
            new_tag = Tag.model(
                datastore_path=self.datastore_path,
                __datastore=self.__data,
                default={
                    'title': title.strip(),
                    'date_created': int(time.time())
                }
            )

            new_uuid = new_tag.get('uuid')

            self.__data['settings']['application']['tags'][new_uuid] = new_tag

        # Save tag to its own tag.json file instead of settings
        new_tag.commit()
        return new_uuid

    def get_all_tags_for_watch(self, uuid):
        """This should be in Watch model but Watch doesn't have access to datastore, not sure how to solve that yet"""
        watch = self.data['watching'].get(uuid)

        # Should return a dict of full tag info linked by UUID
        if watch:
            return dictfilt(self.__data['settings']['application']['tags'], watch.get('tags', []))

        return {}

    @property
    def extra_browsers(self):
        res = []
        p = list(filter(
            lambda s: (s.get('browser_name') and s.get('browser_connection_url')),
            self.__data['settings']['requests'].get('extra_browsers', [])))
        if p:
            for i in p:
                res.append(("extra_browser_" + i['browser_name'], i['browser_name']))

        return res

    def tag_exists_by_name(self, tag_name):
        # Check if any tag dictionary has a 'title' attribute matching the provided tag_name
        tags = self.__data['settings']['application']['tags'].values()
        return next((v for v in tags if v.get('title', '').lower() == tag_name.lower()),
                    None)

    def any_watches_have_processor_by_name(self, processor_name):
        for watch in self.data['watching'].values():
            if watch.get('processor') == processor_name:
                return True
        return False

    def search_watches_for_url(self, query, tag_limit=None, partial=False):
        """Search watches by URL, title, or error messages

        Args:
            query (str): Search term to match against watch URLs, titles, and error messages
            tag_limit (str, optional): Optional tag name to limit search results
            partial: (bool, optional): sub-string matching

        Returns:
            list: List of UUIDs of watches that match the search criteria
        """
        matching_uuids = []
        query = query.lower().strip()
        tag = self.tag_exists_by_name(tag_limit) if tag_limit else False

        for uuid, watch in self.data['watching'].items():
            # Filter by tag if requested
            if tag_limit:
                if not tag.get('uuid') in watch.get('tags', []):
                    continue

            # Search in URL, title, or error messages
            if partial:
                if ((watch.get('title') and query in watch.get('title').lower()) or
                        query in watch.get('url', '').lower() or
                        (watch.get('last_error') and query in watch.get('last_error').lower())):
                    matching_uuids.append(uuid)
            else:
                if ((watch.get('title') and query == watch.get('title').lower()) or
                        query == watch.get('url', '').lower() or
                        (watch.get('last_error') and query == watch.get('last_error').lower())):
                    matching_uuids.append(uuid)

        return matching_uuids

    def get_unique_notification_tokens_available(self):
        # Ask each type of watch if they have any extra notification token to add to the validation
        extra_notification_tokens = {}
        watch_processors_checked = set()

        for watch_uuid, watch in self.__data['watching'].items():
            processor = watch.get('processor')
            if processor not in watch_processors_checked:
                extra_notification_tokens.update(watch.extra_notification_token_values())
                watch_processors_checked.add(processor)

        return extra_notification_tokens

    def get_unique_notification_token_placeholders_available(self):
        # The actual description of the tokens, could be combined with get_unique_notification_tokens_available instead of doing this twice
        extra_notification_tokens = []
        watch_processors_checked = set()

        for watch_uuid, watch in self.__data['watching'].items():
            processor = watch.get('processor')
            if processor not in watch_processors_checked:
                extra_notification_tokens += watch.extra_notification_token_placeholder_info()
                watch_processors_checked.add(processor)

        return extra_notification_tokens

    def add_notification_url(self, notification_url):

        logger.debug(f">>> Adding new notification_url - '{notification_url}'")

        notification_urls = self.data['settings']['application'].get('notification_urls', [])

        if notification_url in notification_urls:
            return notification_url

        with self.lock:
            notification_urls = self.__data['settings']['application'].get('notification_urls', [])

            if notification_url in notification_urls:
                return notification_url

            # Append and update the datastore
            notification_urls.append(notification_url)
            self.__data['settings']['application']['notification_urls'] = notification_urls

        self.commit()
        return notification_url

    # Schema update methods moved to store/updates.py (DatastoreUpdatesMixin)
    # This includes: get_updates_available(), run_updates(), and update_1() through update_26()


================================================
FILE: changedetectionio/store/base.py
================================================
"""
Base classes for the datastore.

This module defines the abstract interfaces that all datastore implementations must follow.
"""

from abc import ABC, abstractmethod
from threading import Lock
from loguru import logger


class DataStore(ABC):
    """
    Abstract base class for all datastore implementations.

    Defines the core interface that all datastores must implement for:
    - Loading and saving data
    - Managing watches
    - Handling settings
    - Providing data access
    """

    lock = Lock()
    datastore_path = None

    @abstractmethod
    def reload_state(self, datastore_path, include_default_watches, version_tag):
        """
        Load data from persistent storage.

        Args:
            datastore_path: Path to the datastore directory
            include_default_watches: Whether to create default watches if none exist
            version_tag: Application version string
        """
        pass

    @abstractmethod
    def add_watch(self, url, **kwargs):
        """
        Add a new watch.

        Args:
            url: URL to watch
            **kwargs: Additional watch parameters

        Returns:
            UUID of the created watch
        """
        pass

    @abstractmethod
    def update_watch(self, uuid, update_obj):
        """
        Update an existing watch.

        Args:
            uuid: Watch UUID
            update_obj: Dictionary of fields to update
        """
        pass

    @abstractmethod
    def delete(self, uuid):
        """
        Delete a watch.

        Args:
            uuid: Watch UUID to delete
        """
        pass

    @property
    @abstractmethod
    def data(self):
        """
        Access to the underlying data structure.

        Returns:
            Dictionary containing all datastore data
        """
        pass



================================================
FILE: changedetectionio/store/file_saving_datastore.py
================================================
"""
File-based datastore with individual watch persistence and immediate commits.

This module provides the FileSavingDataStore abstract class that implements:
- Individual watch.json file persistence
- Immediate commit-based persistence (watch.commit(), datastore.commit())
- Atomic file writes safe for NFS/NAS
"""

import glob
import json
import os
import tempfile
import time
from loguru import logger

from .base import DataStore
from .. import strtobool

# Try to import orjson for faster JSON serialization
try:
    import orjson
    HAS_ORJSON = True
except ImportError:
    HAS_ORJSON = False

# Fsync configuration: Force file data to disk for crash safety
# Default False to match legacy behavior (write-and-rename without fsync)
# Set to True for mission-critical deployments requiring crash consistency
FORCE_FSYNC_DATA_IS_CRITICAL = bool(strtobool(os.getenv('FORCE_FSYNC_DATA_IS_CRITICAL', 'False')))

# ============================================================================
# Helper Functions for Atomic File Operations
# ============================================================================

def save_json_atomic(file_path, data_dict, label="file", max_size_mb=10):
    """
    Save JSON data to disk using atomic write pattern.

    Generic helper for saving any JSON data (settings, watches, etc.) with:
    - Atomic write (temp file + rename)
    - Directory fsync for crash consistency (only for new files)
    - Size validation
    - Proper error handling

    Thread safety: Caller must hold datastore.lock to prevent concurrent modifications.
    Multi-process safety: Not supported - run only one app instance per datastore.

    Args:
        file_path: Full path to target JSON file
        data_dict: Dictionary to serialize
        label: Human-readable label for error messages (e.g., "watch", "settings")
        max_size_mb: Maximum allowed file size in MB

    Raises:
        ValueError: If serialized data exceeds max_size_mb
        OSError: If disk is full (ENOSPC) or other I/O error
    """
    # Check if file already exists (before we start writing)
    # Directory fsync only needed for NEW files to persist the filename
    file_exists = os.path.exists(file_path)

    # Ensure parent directory exists
    parent_dir = os.path.dirname(file_path)
    os.makedirs(parent_dir, exist_ok=True)

    # Create temp file in same directory (required for NFS atomicity)
    fd, temp_path = tempfile.mkstemp(
        suffix='.tmp',
        prefix='json-',
        dir=parent_dir,
        text=False
    )

    fd_closed = False
    try:
        # Serialize data
        t0 = time.time()
        if HAS_ORJSON:
            data = orjson.dumps(data_dict, option=orjson.OPT_INDENT_2)
        else:
            data = json.dumps(data_dict, indent=2, ensure_ascii=False).encode('utf-8')
        serialize_ms = (time.time() - t0) * 1000

        # Safety check: validate size
        MAX_SIZE = max_size_mb * 1024 * 1024
        data_size = len(data)
        if data_size > MAX_SIZE:
            raise ValueError(
                f"{label.capitalize()} data is unexpectedly large: {data_size / 1024 / 1024:.2f}MB "
                f"(max: {max_size_mb}MB). This indicates a bug or data corruption."
            )

        # Write to temp file
        t1 = time.time()
        os.write(fd, data)
        write_ms = (time.time() - t1) * 1000

        # Optional fsync: Force file data to disk for crash safety
        # Only if FORCE_FSYNC_DATA_IS_CRITICAL=True (default: False, matches legacy behavior)
        t2 = time.time()
        if FORCE_FSYNC_DATA_IS_CRITICAL:
            os.fsync(fd)
        file_fsync_ms = (time.time() - t2) * 1000

        os.close(fd)
        fd_closed = True

        # Atomic rename
        t3 = time.time()
        os.replace(temp_path, file_path)
        rename_ms = (time.time() - t3) * 1000

        # Sync directory to ensure filename metadata is durable
        # OPTIMIZATION: Only needed for NEW files. Existing files already have
        # directory entry persisted, so we only need file fsync for data durability.
        dir_fsync_ms = 0
        if not file_exists:
            try:
                dir_fd = os.open(parent_dir, os.O_RDONLY)
                try:
                    t4 = time.time()
                    os.fsync(dir_fd)
                    dir_fsync_ms = (time.time() - t4) * 1000
                finally:
                    os.close(dir_fd)
            except (OSError, AttributeError):
                # Windows doesn't support fsync on directories
                pass

        # Log timing breakdown for slow saves
#        total_ms = serialize_ms + write_ms + file_fsync_ms + rename_ms + dir_fsync_ms
#        if total_ms:  # Log if save took more than 10ms
#            file_status = "new" if not file_exists else "update"
#            logger.trace(
#                f"Save timing breakdown ({total_ms:.1f}ms total, {file_status}): "
#                f"serialize={serialize_ms:.1f}ms, write={write_ms:.1f}ms, "
#                f"file_fsync={file_fsync_ms:.1f}ms, rename={rename_ms:.1f}ms, "
#                f"dir_fsync={dir_fsync_ms:.1f}ms, using_orjson={HAS_ORJSON}"
#            )

    except OSError as e:
        # Cleanup temp file
        if not fd_closed:
            try:
                os.close(fd)
            except:
                pass
        if os.path.exists(temp_path):
            try:
                os.unlink(temp_path)
            except:
                pass

        # Provide helpful error messages
        if e.errno == 28:  # ENOSPC
            raise OSError(f"Disk full: Cannot save {label}") from e
        elif e.errno == 122:  # EDQUOT
            raise OSError(f"Disk quota exceeded: Cannot save {label}") from e
        else:
            raise OSError(f"I/O error saving {label}: {e}") from e

    except Exception as e:
        # Cleanup temp file
        if not fd_closed:
            try:
                os.close(fd)
            except:
                pass
        if os.path.exists(temp_path):
            try:
                os.unlink(temp_path)
            except:
                pass
        raise e


def save_entity_atomic(entity_dir, uuid, entity_dict, filename, entity_type, max_size_mb):
    """
    Save an entity (watch/tag) to disk using atomic write pattern.

    Generic function for saving any watch_base subclass (Watch, Tag, etc.).

    Args:
        entity_dir: Directory for this entity (e.g., /datastore/{uuid})
        uuid: Entity UUID (for logging)
        entity_dict: Dictionary representation of the entity
        filename: JSON filename (e.g., 'watch.json', 'tag.json')
        entity_type: Type label for logging (e.g., 'watch', 'tag')
        max_size_mb: Maximum allowed file size in MB

    Raises:
        ValueError: If serialized data exceeds max_size_mb
        OSError: If disk is full (ENOSPC) or other I/O error
    """
    entity_json = os.path.join(entity_dir, filename)
    save_json_atomic(entity_json, entity_dict, label=f"{entity_type} {uuid}", max_size_mb=max_size_mb)


def save_watch_atomic(watch_dir, uuid, watch_dict):
    """
    Save a watch to disk using atomic write pattern.

    Convenience wrapper around save_entity_atomic for watches.
    Kept for backwards compatibility.
    """
    save_entity_atomic(watch_dir, uuid, watch_dict, "watch.json", "watch", max_size_mb=10)



def load_watch_from_file(watch_json, uuid, rehydrate_entity_func):
    """
    Load a watch from its JSON file.

    Args:
        watch_json: Path to the watch.json file
        uuid: Watch UUID
        rehydrate_entity_func: Function to convert dict to Watch object

    Returns:
        Watch object or None if failed
    """
    try:
        # Check file size before reading
        file_size = os.path.getsize(watch_json)
        MAX_WATCH_SIZE = 10 * 1024 * 1024  # 10MB
        if file_size > MAX_WATCH_SIZE:
            logger.critical(
                f"CORRUPTED WATCH DATA: Watch {uuid} file is unexpectedly large: "
                f"{file_size / 1024 / 1024:.2f}MB (max: {MAX_WATCH_SIZE / 1024 / 1024}MB). "
                f"File: {watch_json}. This indicates a bug or data corruption. "
                f"Watch will be skipped."
            )
            return None

        if HAS_ORJSON:
            with open(watch_json, 'rb') as f:
                watch_data = orjson.loads(f.read())
        else:
            with open(watch_json, 'r', encoding='utf-8') as f:
                watch_data = json.load(f)

        # Rehydrate and return watch object
        watch_obj = rehydrate_entity_func(uuid, watch_data)
        return watch_obj

    except json.JSONDecodeError as e:
        logger.critical(
            f"CORRUPTED WATCH DATA: Failed to parse JSON for watch {uuid}. "
            f"File: {watch_json}. Error: {e}. "
            f"Watch will be skipped and may need manual recovery from backup."
        )
        return None
    except ValueError as e:
        # orjson raises ValueError for invalid JSON
        if "invalid json" in str(e).lower() or HAS_ORJSON:
            logger.critical(
                f"CORRUPTED WATCH DATA: Failed to parse JSON for watch {uuid}. "
                f"File: {watch_json}. Error: {e}. "
                f"Watch will be skipped and may need manual recovery from backup."
            )
            return None
        # Re-raise if it's not a JSON parsing error
        raise
    except FileNotFoundError:
        logger.error(f"Watch file not found: {watch_json} for watch {uuid}")
        return None
    except Exception as e:
        logger.error(f"Failed to load watch {uuid} from {watch_json}: {e}")
        return None


def load_all_watches(datastore_path, rehydrate_entity_func):
    """
    Load all watches from individual watch.json files.

    SYNCHRONOUS loading: Blocks until all watches are loaded.
    This ensures data consistency - web server won't accept requests
    until all watches are available. Progress logged every 100 watches.

    Args:
        datastore_path: Path to the datastore directory
        rehydrate_entity_func: Function to convert dict to Watch object

    Returns:
        Dictionary of uuid -> Watch object
    """
    start_time = time.time()
    logger.info("Loading watches from individual watch.json files...")

    watching = {}

    if not os.path.exists(datastore_path):
        return watching

    # Find all watch.json files using glob (faster than manual directory traversal)
    glob_start = time.time()
    watch_files = glob.glob(os.path.join(datastore_path, "*", "watch.json"))
    glob_time = time.time() - glob_start

    total = len(watch_files)
    logger.debug(f"Found {total} watch.json files in {glob_time:.3f}s")

    loaded = 0
    failed = 0

    for watch_json in watch_files:
        # Extract UUID from path: /datastore/{uuid}/watch.json
        uuid_dir = os.path.basename(os.path.dirname(watch_json))
        watch = load_watch_from_file(watch_json, uuid_dir, rehydrate_entity_func)
        if watch:
            watching[uuid_dir] = watch
            loaded += 1

            if loaded % 100 == 0:
                logger.info(f"Loaded {loaded}/{total} watches...")
        else:
            # load_watch_from_file already logged the specific error
            failed += 1

    elapsed = time.time() - start_time

    if failed > 0:
        logger.critical(
            f"LOAD COMPLETE: {loaded} watches loaded successfully, "
            f"{failed} watches FAILED to load (corrupted or invalid) "
            f"in {elapsed:.2f}s ({loaded/elapsed:.0f} watches/sec)"
        )
    else:
        logger.info(f"Loaded {loaded} watches from disk in {elapsed:.2f}s ({loaded/elapsed:.0f} watches/sec)")

    return watching


def load_tag_from_file(tag_json, uuid, rehydrate_entity_func):
    """
    Load a tag from its JSON file.

    Args:
        tag_json: Path to the tag.json file
        uuid: Tag UUID
        rehydrate_entity_func: Function to convert dict to Tag object

    Returns:
        Tag object or None if failed
    """
    try:
        # Check file size before reading
        file_size = os.path.getsize(tag_json)
        MAX_TAG_SIZE = 1 * 1024 * 1024  # 1MB
        if file_size > MAX_TAG_SIZE:
            logger.critical(
                f"CORRUPTED TAG DATA: Tag {uuid} file is unexpectedly large: "
                f"{file_size / 1024 / 1024:.2f}MB (max: {MAX_TAG_SIZE / 1024 / 1024}MB). "
                f"File: {tag_json}. This indicates a bug or data corruption. "
                f"Tag will be skipped."
            )
            return None

        if HAS_ORJSON:
            with open(tag_json, 'rb') as f:
                tag_data = orjson.loads(f.read())
        else:
            with open(tag_json, 'r', encoding='utf-8') as f:
                tag_data = json.load(f)

        tag_data['processor'] = 'restock_diff'
        # Rehydrate tag (convert dict to Tag object)
        # processor_override is set inside the rehydration function
        tag_obj = rehydrate_entity_func(uuid, tag_data)
        return tag_obj

    except json.JSONDecodeError as e:
        logger.critical(
            f"CORRUPTED TAG DATA: Failed to parse JSON for tag {uuid}. "
            f"File: {tag_json}. Error: {e}. "
            f"Tag will be skipped and may need manual recovery from backup."
        )
        return None
    except ValueError as e:
        # orjson raises ValueError for invalid JSON
        if "invalid json" in str(e).lower() or HAS_ORJSON:
            logger.critical(
                f"CORRUPTED TAG DATA: Failed to parse JSON for tag {uuid}. "
                f"File: {tag_json}. Error: {e}. "
                f"Tag will be skipped and may need manual recovery from backup."
            )
            return None
        # Re-raise if it's not a JSON parsing error
        raise
    except FileNotFoundError:
        logger.debug(f"Tag file not found: {tag_json} for tag {uuid}")
        return None
    except Exception as e:
        logger.error(f"Failed to load tag {uuid} from {tag_json}: {e}")
        return None


def load_all_tags(datastore_path, rehydrate_entity_func):
    """
    Load all tags from individual tag.json files.

    Tags are stored separately from settings in {uuid}/tag.json files.

    Args:
        datastore_path: Path to the datastore directory
        rehydrate_entity_func: Function to convert dict to Tag object

    Returns:
        Dictionary of uuid -> Tag object
    """
    logger.info("Loading tags from individual tag.json files...")

    tags = {}

    if not os.path.exists(datastore_path):
        return tags

    # Find all tag.json files using glob
    tag_files = glob.glob(os.path.join(datastore_path, "*", "tag.json"))

    total = len(tag_files)
    if total == 0:
        logger.debug("No tag.json files found")
        return tags

    logger.debug(f"Found {total} tag.json files")

    loaded = 0
    failed = 0

    for tag_json in tag_files:
        # Extract UUID from path: /datastore/{uuid}/tag.json
        uuid_dir = os.path.basename(os.path.dirname(tag_json))
        tag = load_tag_from_file(tag_json, uuid_dir, rehydrate_entity_func)
        if tag:
            tags[uuid_dir] = tag
            loaded += 1
        else:
            # load_tag_from_file already logged the specific error
            failed += 1

    if failed > 0:
        logger.warning(f"Loaded {loaded} tags, {failed} tags FAILED to load")
    else:
        logger.info(f"Loaded {loaded} tags from disk")

    return tags


# ============================================================================
# FileSavingDataStore Class
# ============================================================================

class FileSavingDataStore(DataStore):
    """
    Abstract datastore that provides file persistence with immediate commits.

    Features:
    - Individual watch.json files (one per watch)
    - Immediate persistence via watch.commit() and datastore.commit()
    - Atomic file writes for crash safety

    Subclasses must implement:
    - rehydrate_entity(): Convert dict to Watch object
    - Access to internal __data structure for watch management
    """

    def __init__(self):
        super().__init__()

    def _save_settings(self):
        """
        Save settings to storage (polymorphic).

        Subclasses must implement for their backend.
        - File: changedetection.json
        - Redis: SET settings
        - SQL: UPDATE settings table
        """
        raise NotImplementedError("Subclass must implement _save_settings")


    def _load_watches(self):
        """
        Load all watches from storage (polymorphic).

        Subclasses must implement for their backend.
        - File: Read individual watch.json files
        - Redis: SCAN watch:* keys
        - SQL: SELECT * FROM watches
        """
        raise NotImplementedError("Subclass must implement _load_watches")

    def _delete_watch(self, uuid):
        """
        Delete a watch from storage (polymorphic).

        Subclasses must implement for their backend.
        - File: Delete {uuid}/ directory recursively
        - Redis: DEL watch:{uuid}
        - SQL: DELETE FROM watches WHERE uuid=?

        Args:
            uuid: Watch UUID to delete
        """
        raise NotImplementedError("Subclass must implement _delete_watch")




================================================
FILE: changedetectionio/store/updates.py
================================================
"""
Schema update migrations for the datastore.

This module contains all schema version upgrade methods (update_1 through update_N).
These are mixed into ChangeDetectionStore to keep the main store file focused.

IMPORTANT: Each update could be run even when they have a new install and the schema is correct.
Therefore - each `update_n` should be very careful about checking if it needs to actually run.
"""

import os
import re
import shutil
import tarfile
import time
from loguru import logger
from copy import deepcopy


# Try to import orjson for faster JSON serialization
try:
    import orjson
    HAS_ORJSON = True
except ImportError:
    HAS_ORJSON = False

from ..html_tools import TRANSLATE_WHITESPACE_TABLE
from ..processors.restock_diff import Restock
from ..blueprint.rss import RSS_CONTENT_FORMAT_DEFAULT
from ..model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH

def create_backup_tarball(datastore_path, update_number):
    """
    Create a tarball backup of the entire datastore structure before running an update.

    Includes:
    - All {uuid}/watch.json files
    - All {uuid}/tag.json files
    - changedetection.json (settings, if it exists)
    - url-watches.json (legacy format, if it exists)
    - Directory structure preserved

    Args:
        datastore_path: Path to datastore directory
        update_number: Update number being applied

    Returns:
        str: Path to created tarball, or None if backup failed

    Restoration:
    To restore from a backup:
        cd /path/to/datastore
        tar -xzf before-update-N-timestamp.tar.gz
    This will restore all watch.json and tag.json files and settings to their pre-update state.
    """
    timestamp = int(time.time())
    backup_filename = f"before-update-{update_number}-{timestamp}.tar.gz"
    backup_path = os.path.join(datastore_path, backup_filename)

    try:
        logger.info(f"Creating backup tarball: {backup_filename}")

        with tarfile.open(backup_path, "w:gz") as tar:
            # Backup changedetection.json if it exists (new format)
            changedetection_json = os.path.join(datastore_path, "changedetection.json")
            if os.path.isfile(changedetection_json):
                tar.add(changedetection_json, arcname="changedetection.json")
                logger.debug("Added changedetection.json to backup")

            # Backup url-watches.json if it exists (legacy format)
            url_watches_json = os.path.join(datastore_path, "url-watches.json")
            if os.path.isfile(url_watches_json):
                tar.add(url_watches_json, arcname="url-watches.json")
                logger.debug("Added url-watches.json to backup")

            # Backup all watch/tag directories with their JSON files
            # This preserves the UUID directory structure
            watch_count = 0
            tag_count = 0
            for entry in os.listdir(datastore_path):
                entry_path = os.path.join(datastore_path, entry)

                # Skip if not a directory
                if not os.path.isdir(entry_path):
                    continue

                # Skip hidden directories and backup directories
                if entry.startswith('.') or entry.startswith('before-update-'):
                    continue

                # Backup watch.json if exists
                watch_json = os.path.join(entry_path, "watch.json")
                if os.path.isfile(watch_json):
                    tar.add(watch_json, arcname=f"{entry}/watch.json")
                    watch_count += 1

                    if watch_count % 100 == 0:
                        logger.debug(f"Backed up {watch_count} watch.json files...")

                # Backup tag.json if exists
                tag_json = os.path.join(entry_path, "tag.json")
                if os.path.isfile(tag_json):
                    tar.add(tag_json, arcname=f"{entry}/tag.json")
                    tag_count += 1

            logger.success(f"Backup created: {backup_filename} ({watch_count} watches from disk, {tag_count} tags from disk)")
            return backup_path

    except Exception as e:
        logger.error(f"Failed to create backup tarball: {e}")
        # Try to clean up partial backup
        if os.path.exists(backup_path):
            try:
                os.unlink(backup_path)
            except:
                pass
        return None


class DatastoreUpdatesMixin:
    """
    Mixin class containing all schema update methods.

    This class is inherited by ChangeDetectionStore to provide schema migration functionality.
    Each update_N method upgrades the schema from version N-1 to version N.
    """

    def get_updates_available(self):
        """
        Discover all available update methods.

        Returns:
            list: Sorted list of update version numbers (e.g., [1, 2, 3, ..., 26])
        """
        import inspect
        updates_available = []
        for i, o in inspect.getmembers(self, predicate=inspect.ismethod):
            m = re.search(r'update_(\d+)$', i)
            if m:
                updates_available.append(int(m.group(1)))
        updates_available.sort()

        return updates_available

    def run_updates(self, current_schema_version=None):
        import sys
        """
        Run all pending schema updates sequentially.

        Args:
            current_schema_version: Optional current schema version. If provided, only run updates
                                   greater than this version. If None, uses the schema version from
                                   the datastore. If no schema version exists in datastore and it appears
                                   to be a fresh install, sets to latest update number (no updates needed).

        IMPORTANT: Each update could be run even when they have a new install and the schema is correct.
        Therefore - each `update_n` should be very careful about checking if it needs to actually run.

        Process:
        1. Get list of available updates
        2. For each update > current schema version:
           - Create backup of datastore
           - Run update method
           - Update schema version and commit settings
           - Commit all watches and tags
        3. If any update fails, stop processing
        4. All changes saved via individual .commit() calls
        """
        updates_available = self.get_updates_available()
        if self.data.get('watching'):
            test_watch = self.data['watching'].get(next(iter(self.data.get('watching', {}))))
            from ..model.Watch import model

            if not isinstance(test_watch, model):
                import sys
                logger.critical("Cannot run updates! Watch structure must be re-hydrated back to a Watch model object!")
                sys.exit(1)

        if self.data['settings']['application'].get('tags',{}):
            test_tag = self.data['settings']['application'].get('tags',{}).get(next(iter(self.data['settings']['application'].get('tags',{}))))
            from ..model.Tag import model as tag_model

            if not isinstance(test_tag, tag_model):
                import sys
                logger.critical("Cannot run updates! Watch tag/group structure must be re-hydrated back to a Tag model object!")
                sys.exit(1)

        # Determine current schema version
        if current_schema_version is None:
            # Check if schema_version exists in datastore
            current_schema_version = self.data['settings']['application'].get('schema_version')

            if current_schema_version is None:
                # No schema version found - could be a fresh install or very old datastore
                # If this is a fresh/new config with no watches, assume it's up-to-date
                # and set to latest update number (no updates needed)
                if len(self.data['watching']) == 0:
                    # Get the highest update number from available update methods
                    latest_update = updates_available[-1] if updates_available else 0
                    logger.info(f"No schema version found and no watches exist - assuming fresh install, setting schema_version to {latest_update}")
                    self.data['settings']['application']['schema_version'] = latest_update
                    self.commit()
                    return  # No updates needed for fresh install
                else:
                    # Has watches but no schema version - likely old datastore, run all updates
                    logger.warning("No schema version found but watches exist - running all updates from version 0")
                    current_schema_version = 0

        logger.info(f"Current schema version: {current_schema_version}")

        updates_ran = []

        for update_n in updates_available:
            if update_n > current_schema_version:
                logger.critical(f"Applying update_{update_n}")

                # Create tarball backup of entire datastore structure
                # This includes all watch.json files, settings, and preserves directory structure
                backup_path = create_backup_tarball(self.datastore_path, update_n)
                if backup_path:
                    logger.info(f"Backup created at: {backup_path}")
                else:
                    logger.warning("Backup creation failed, but continuing with update")

                try:
                    update_method = getattr(self, f"update_{update_n}")()
                except Exception as e:
                    logger.critical(f"Error while trying update_{update_n}")
                    logger.exception(e)
                    sys.exit(1)
                else:
                    # Bump the version
                    self.data['settings']['application']['schema_version'] = update_n
                    self.commit()

                    logger.success(f"Update {update_n} completed")

                    # Track which updates ran
                    updates_ran.append(update_n)

    # ============================================================================
    # Individual Update Methods
    # ============================================================================

    def update_1(self):
        """Convert minutes to seconds on settings and each watch."""
        if self.data['settings']['requests'].get('minutes_between_check'):
            self.data['settings']['requests']['time_between_check']['minutes'] = self.data['settings']['requests']['minutes_between_check']
            # Remove the default 'hours' that is set from the model
            self.data['settings']['requests']['time_between_check']['hours'] = None

        for uuid, watch in self.data['watching'].items():
            if 'minutes_between_check' in watch:
                # Only upgrade individual watch time if it was set
                if watch.get('minutes_between_check', False):
                    self.data['watching'][uuid]['time_between_check']['minutes'] = watch['minutes_between_check']

    def update_2(self):
        """
        Move the history list to a flat text file index.
        Better than SQLite because this list is only appended to, and works across NAS / NFS type setups.
        """
        # @todo test running this on a newly updated one (when this already ran)
        for uuid, watch in self.data['watching'].items():
            history = []

            if watch.get('history', False):
                for d, p in watch['history'].items():
                    d = int(d)  # Used to be keyed as str, we'll fix this now too
                    history.append("{},{}\n".format(d, p))

                if len(history):
                    target_path = os.path.join(self.datastore_path, uuid)
                    if os.path.exists(target_path):
                        with open(os.path.join(target_path, "history.txt"), "w") as f:
                            f.writelines(history)
                    else:
                        logger.warning(f"Datastore history directory {target_path} does not exist, skipping history import.")

                # No longer needed, dynamically pulled from the disk when needed.
                # But we should set it back to a empty dict so we don't break if this schema runs on an earlier version.
                # In the distant future we can remove this entirely
                self.data['watching'][uuid]['history'] = {}

    def update_3(self):
        """We incorrectly stored last_changed when there was not a change, and then confused the output list table."""
        # see https://github.com/dgtlmoon/changedetection.io/pull/835
        return

    def update_4(self):
        """`last_changed` not needed, we pull that information from the history.txt index."""
        for uuid, watch in self.data['watching'].items():
            try:
                # Remove it from the struct
                del(watch['last_changed'])
            except:
                continue
        return

    def update_5(self):
        """
        If the watch notification body, title look the same as the global one, unset it, so the watch defaults back to using the main settings.
        In other words - the watch notification_title and notification_body are not needed if they are the same as the default one.
        """
        current_system_body = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
        current_system_title = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
        for uuid, watch in self.data['watching'].items():
            try:
                watch_body = watch.get('notification_body', '')
                if watch_body and watch_body.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_body:
                    # Looks the same as the default one, so unset it
                    watch['notification_body'] = None

                watch_title = watch.get('notification_title', '')
                if watch_title and watch_title.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_title:
                    # Looks the same as the default one, so unset it
                    watch['notification_title'] = None
            except Exception as e:
                continue
        return

    def update_7(self):
        """
        We incorrectly used common header overrides that should only apply to Requests.
        These are now handled in content_fetcher::html_requests and shouldnt be passed to Playwright/Selenium.
        """
        # These were hard-coded in early versions
        for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
            if self.data['settings']['headers'].get(v):
                del self.data['settings']['headers'][v]

    def update_8(self):
        """Convert filters to a list of filters css_filter -> include_filters."""
        for uuid, watch in self.data['watching'].items():
            try:
                existing_filter = watch.get('css_filter', '')
                if existing_filter:
                    watch['include_filters'] = [existing_filter]
            except:
                continue
        return

    def update_9(self):
        """Convert old static notification tokens to jinja2 tokens."""
        # Each watch
        # only { } not {{ or }}
        r = r'(? '{key_exists_as_value}'")
                self.data['settings']['application']['notification_format'] = key_exists_as_value

            for uuid, watch in self.data['watching'].items():
                n_format = self.data['watching'][uuid].get('notification_format')
                key_exists_as_value = next((k for k, v in formats.items() if v == n_format), None)
                if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:  # key of "Plain text"
                    logger.success(f"['watching'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'")
                    self.data['watching'][uuid]['notification_format'] = key_exists_as_value  # should be 'text' or whatever

            for uuid, tag in self.data['settings']['application']['tags'].items():
                n_format = self.data['settings']['application']['tags'][uuid].get('notification_format')
                key_exists_as_value = next((k for k, v in formats.items() if v == n_format), None)
                if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:  # key of "Plain text"
                    logger.success(
                        f"['settings']['application']['tags'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'")
                    self.data['settings']['application']['tags'][uuid][
                        'notification_format'] = key_exists_as_value  # should be 'text' or whatever

        from ..notification import valid_notification_formats
        formats = deepcopy(valid_notification_formats)
        re_run(formats)
        # And in previous versions, it was "text" instead of Plain text, Markdown instead of "Markdown to HTML"
        formats['text'] = 'Text'
        formats['markdown'] = 'Markdown'
        re_run(formats)

    def update_24(self):
        """RSS types should be inline with the same names as notification types."""
        rss_format = self.data['settings']['application'].get('rss_content_format')
        if not rss_format or 'text' in rss_format:
            # might have been 'plaintext, 'plain text' or something
            self.data['settings']['application']['rss_content_format'] = RSS_CONTENT_FORMAT_DEFAULT
        elif 'html' in rss_format:
            self.data['settings']['application']['rss_content_format'] = 'htmlcolor'
        else:
            # safe fallback to text
            self.data['settings']['application']['rss_content_format'] = RSS_CONTENT_FORMAT_DEFAULT

    def update_25(self):
        """Different processors now hold their own history.txt."""
        for uuid, watch in self.data['watching'].items():
            processor = self.data['watching'][uuid].get('processor')
            if processor != 'text_json_diff':
                old_history_txt = os.path.join(self.datastore_path, "history.txt")
                target_history_name = f"history-{processor}.txt"
                if os.path.isfile(old_history_txt) and not os.path.isfile(target_history_name):
                    new_history_txt = os.path.join(self.datastore_path, target_history_name)
                    logger.debug(f"Renaming history index {old_history_txt} to {new_history_txt}...")
                    shutil.move(old_history_txt, new_history_txt)

    def migrate_legacy_db_format(self):
        """
        Migration: Individual watch persistence (COPY-based, safe rollback).

        Loads legacy url-watches.json format and migrates to:
        - {uuid}/watch.json (per watch)
        - changedetection.json (settings only)

        IMPORTANT:
        - A tarball backup (before-update-26-timestamp.tar.gz) is created before migration
        - url-watches.json is LEFT INTACT for rollback safety
        - Users can roll back by simply downgrading to the previous version
        - Or restore from tarball: tar -xzf before-update-26-*.tar.gz

        This is a dedicated migration release - users upgrade at their own pace.
        """
        logger.critical("=" * 80)
        logger.critical("Running migration: Individual watch persistence (update_26)")
        logger.critical("COPY-based migration: url-watches.json will remain intact for rollback")
        logger.critical("=" * 80)

        # Populate settings from legacy data
        logger.info("Populating settings from legacy data...")
        watch_count = len(self.data['watching'])
        logger.success(f"Loaded {watch_count} watches from legacy format")

        # Phase 1: Save all watches to individual files
        logger.critical(f"Phase 1/4: Saving {watch_count} watches to individual watch.json files...")

        saved_count = 0
        for uuid, watch in self.data['watching'].items():
            try:
                watch.commit()
                saved_count += 1

                if saved_count % 100 == 0:
                    logger.info(f"  Progress: {saved_count}/{watch_count} watches migrated...")

            except Exception as e:
                logger.error(f"Failed to save watch {uuid}: {e}")
                raise Exception(
                    f"Migration failed: Could not save watch {uuid}. "
                    f"url-watches.json remains intact, safe to retry. Error: {e}"
                )

        logger.critical(f"Phase 1 complete: Saved {saved_count} watches")

        # Phase 2: Verify all files exist
        logger.critical("Phase 2/4: Verifying all watch.json files were created...")

        missing = []
        for uuid in self.data['watching'].keys():
            watch_json = os.path.join(self.datastore_path, uuid, "watch.json")
            if not os.path.isfile(watch_json):
                missing.append(uuid)

        if missing:
            raise Exception(
                f"Migration failed: {len(missing)} watch files missing: {missing[:5]}... "
                f"url-watches.json remains intact, safe to retry."
            )

        logger.critical(f"Phase 2 complete: Verified {watch_count} watch files")

        # Phase 3: Create new settings file
        logger.critical("Phase 3/4: Creating changedetection.json...")

        try:
            self._save_settings()
        except Exception as e:
            logger.error(f"Failed to create changedetection.json: {e}")
            raise Exception(
                f"Migration failed: Could not create changedetection.json. "
                f"url-watches.json remains intact, safe to retry. Error: {e}"
            )

        # Phase 4: Verify settings file exists
        logger.critical("Phase 4/4: Verifying changedetection.json exists...")
        changedetection_json_new_schema=os.path.join(self.datastore_path, "changedetection.json")
        if not os.path.isfile(changedetection_json_new_schema):
            import sys
            logger.critical("Migration failed, changedetection.json not found after update ran!")
            sys.exit(1)


        logger.critical("Phase 4 complete: Verified changedetection.json exists")

        # Success! Now reload from new format
        logger.critical("Reloading datastore from new format...")
        # write it to disk, it will be saved without ['watching'] in the JSON db because we find it from disk glob
        self._save_settings()
        logger.success("Datastore reloaded from new format successfully")
        logger.critical("=" * 80)
        logger.critical("MIGRATION COMPLETED SUCCESSFULLY!")
        logger.critical("=" * 80)
        logger.info("")
        logger.info("New format:")
        logger.info(f"  - {watch_count} individual watch.json files created")
        logger.info(f"  - changedetection.json created (settings only)")
        logger.info("")
        logger.info("Rollback safety:")
        logger.info("  - url-watches.json preserved for rollback")
        logger.info("  - To rollback: downgrade to previous version and restart")
        logger.info("  - No manual file operations needed")
        logger.info("")
        logger.info("Optional cleanup (after testing new version):")
        logger.info(f"  - rm {os.path.join(self.datastore_path, 'url-watches.json')}")
        logger.info("")

    def update_26(self):
        self.migrate_legacy_db_format()

    # Re-run tag to JSON migration
    def update_29(self):

        """
        Migrate tags to individual tag.json files.

        Tags are currently saved only in changedetection.json (settings).
        This migration ALSO saves them to individual {uuid}/tag.json files,
        similar to how watches are stored (dual storage).

        Benefits:
        - Allows atomic tag updates without rewriting entire settings
        - Enables independent tag versioning/backup
        - Maintains backwards compatibility (tags stay in settings too)
        """
        logger.critical("=" * 80)
        logger.critical("Running migration: Individual tag persistence (update_28)")
        logger.critical("Creating individual tag.json files")
        logger.critical("=" * 80)

        tags = self.data['settings']['application'].get('tags', {})
        tag_count = len(tags)

        if tag_count == 0:
            logger.info("No tags found, skipping migration")
            return

        logger.info(f"Migrating {tag_count} tags to individual tag.json files...")

        saved_count = 0
        failed_count = 0

        for uuid, tag_data in tags.items():
            if os.path.isfile(os.path.join(self.datastore_path, uuid, "tag.json")):
                logger.debug(f"Tag {uuid} tag.json exists, skipping")
                continue
            try:
                tag_data.commit()
                saved_count += 1
                if saved_count % 10 == 0:
                    logger.info(f"  Progress: {saved_count}/{tag_count} tags migrated...")

            except Exception as e:
                logger.error(f"Failed to save tag {uuid} ({tag_data.get('title', 'unknown')}): {e}")
                failed_count += 1

        if failed_count > 0:
            logger.warning(f"Migration complete: {saved_count} tags saved, {failed_count} tags FAILED")
        else:
            logger.success(f"Migration complete: {saved_count} tags saved to individual tag.json files")

        # Tags remain in settings for backwards compatibility AND easy access
        # On next load, _load_tags() will read from tag.json files and merge with settings
        logger.info("Tags saved to both settings AND individual tag.json files")
        logger.info("Future tag edits will update both locations (dual storage)")
        logger.critical("=" * 80)

        # write it to disk, it will be saved without ['tags'] in the JSON db because we find it from disk glob
        # (left this out by accident in previous update, added tags={} in the changedetection.json save_to_disk)
        self._save_settings()

    def update_30(self):
        """Migrate restock_settings out of watch.json into restock_diff.json processor config file.

        Previously, restock_diff processor settings (in_stock_processing, follow_price_changes, etc.)
        were stored directly in the watch dict (watch.json). They now belong in a separate per-watch
        processor config file (restock_diff.json) consistent with the processor_config_* API system.

        For tags: restock_settings key is renamed to processor_config_restock_diff in the tag dict,
        matching what the API writes when updating a tag.

        Safe to re-run: skips watches that already have a restock_diff.json, skips tags that already
        have processor_config_restock_diff set.
        """
        import json

        # --- Watches ---
        for uuid, watch in self.data['watching'].items():
            if watch.get('processor') != 'restock_diff':
                continue
            restock_settings = watch.get('restock_settings')
            if not restock_settings:
                continue

            data_dir = watch.data_dir
            if data_dir:
                watch.ensure_data_dir_exists()
                filepath = os.path.join(data_dir, 'restock_diff.json')
                if not os.path.isfile(filepath):
                    with open(filepath, 'w', encoding='utf-8') as f:
                        json.dump({'restock_diff': restock_settings}, f, indent=2)
                    logger.info(f"update_30: migrated restock_settings → {filepath}")

            del self.data['watching'][uuid]['restock_settings']
            watch.commit()

        # --- Tags ---
        for tag_uuid, tag in self.data['settings']['application']['tags'].items():
            restock_settings = tag.get('restock_settings')
            if not restock_settings or tag.get('processor_config_restock_diff'):
                continue
            tag['processor_config_restock_diff'] = restock_settings
            del tag['restock_settings']
            tag.commit()
            logger.info(f"update_30: migrated tag {tag_uuid} restock_settings → processor_config_restock_diff")



================================================
FILE: changedetectionio/strtobool.py
================================================
# Because strtobool was removed in python 3.12 distutils

_MAP = {
    'y': True,
    'yes': True,
    't': True,
    'true': True,
    'on': True,
    '1': True,
    'n': False,
    'no': False,
    'f': False,
    'false': False,
    'off': False,
    '0': False
}


def strtobool(value):
    if not value:
        return False
    try:
        return _MAP[str(value).lower()]
    except KeyError:
        raise ValueError('"{}" is not a valid bool value'.format(value))


================================================
FILE: changedetectionio/templates/IMPORTANT.md
================================================
# Important notes about templates

Template names should always end in ".html", ".htm", ".xml", ".xhtml", ".svg", even the `import`'ed templates.

Jinja2's `def select_jinja_autoescape(self, filename: str) -> bool:` will check the filename extension and enable autoescaping



================================================
FILE: changedetectionio/templates/_common_fields.html
================================================

{% from '_helpers.html' import render_field %}

{% macro show_token_placeholders(extra_notification_token_placeholder_info, suffix="") %}


    
{{ _('Body for all notifications — You can use') }} Jinja2 {{ _('templating in the notification title, body and URL, and tokens from below.') }}
{{ _('Show token/placeholders') }}