[
  {
    "path": ".githooks/pre-commit",
    "content": "#!/bin/bash\n# Pre-commit hook: run cargo fmt, clippy, and tests before committing.\nset -e\n\necho \"=== Pre-commit: cargo fmt --check ===\"\ncargo fmt -- --check\n\necho \"=== Pre-commit: cargo clippy ===\"\ncargo clippy -- -D warnings\n\necho \"=== Pre-commit: cargo test ===\"\ncargo test\n\necho \"=== Pre-commit checks passed ===\"\n"
  },
  {
    "path": ".github/workflows/ci.yml",
    "content": "name: CI\r\n\r\non:\r\n  push:\r\n    branches: [main]\r\n  pull_request:\r\n    branches: [main]\r\n\r\nenv:\r\n  CARGO_TERM_COLOR: always\r\n\r\njobs:\r\n  check:\r\n    name: Check & Lint\r\n    runs-on: ubuntu-latest\r\n    steps:\r\n      - uses: actions/checkout@v6\r\n\r\n      - uses: dtolnay/rust-toolchain@stable\r\n        with:\r\n          components: rustfmt, clippy\r\n\r\n      - uses: actions/cache@v5\r\n        with:\r\n          path: |\r\n            ~/.cargo/registry\r\n            ~/.cargo/git\r\n            target\r\n          key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}\r\n          restore-keys: |\r\n            ${{ runner.os }}-cargo-\r\n\r\n      - name: Check formatting\r\n        run: cargo fmt -- --check\r\n\r\n      - name: Clippy\r\n        run: cargo clippy -- -D warnings\r\n\r\n      - name: Build\r\n        run: cargo build\r\n\r\n      - name: Run tests\r\n        run: cargo test\r\n"
  },
  {
    "path": ".github/workflows/publish.yml",
    "content": "name: Publish to package managers\n\n# Triggers when a draft release is published (manually via GitHub UI)\non:\n  release:\n    types: [published]\n  workflow_dispatch:\n    inputs:\n      tag:\n        description: 'Release tag (e.g. v2.0.1)'\n        required: true\n\npermissions:\n  contents: write\n\njobs:\n  # ─────────────────────────────────────────────────────────────────\n  # Publish to crates.io\n  # ─────────────────────────────────────────────────────────────────\n  publish-crates:\n    name: Publish to crates.io\n    runs-on: ubuntu-latest\n    if: vars.PUBLISH_CRATES == 'true'\n    steps:\n      - name: Checkout\n        uses: actions/checkout@v6\n        with:\n          ref: ${{ github.event.release.tag_name || inputs.tag }}\n\n      - name: Determine version\n        id: version\n        run: |\n          TAG=\"${{ github.event.release.tag_name || inputs.tag }}\"\n          echo \"version=${TAG#v}\" >> \"$GITHUB_OUTPUT\"\n\n      - name: Ensure Cargo.toml has correct version\n        env:\n          VERSION: ${{ steps.version.outputs.version }}\n        run: sed -i \"s/^version = .*/version = \\\"${VERSION}\\\"/\" Cargo.toml\n\n      - name: Install Rust toolchain\n        uses: dtolnay/rust-toolchain@stable\n\n      - name: Publish\n        env:\n          CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}\n        run: cargo publish --no-verify --allow-dirty || echo \"Already published (skipping)\"\n\n  # ─────────────────────────────────────────────────────────────────\n  # Update Homebrew tap\n  # ─────────────────────────────────────────────────────────────────\n  publish-homebrew:\n    name: Update Homebrew formula\n    runs-on: ubuntu-latest\n    if: vars.PUBLISH_HOMEBREW == 'true'\n    steps:\n      - name: Determine version\n        id: version\n        run: |\n          TAG=\"${{ github.event.release.tag_name || inputs.tag }}\"\n          echo \"version=${TAG#v}\" >> \"$GITHUB_OUTPUT\"\n\n      - name: Download release archives and compute SHA256\n        env:\n          VERSION: ${{ steps.version.outputs.version }}\n        run: |\n          BASE_URL=\"https://github.com/${{ github.repository }}/releases/download/v${VERSION}\"\n          for SUFFIX in linux-x64 linux-arm64 macos-x64 macos-arm64; do\n            FILE=\"siteone-crawler-v${VERSION}-${SUFFIX}.tar.gz\"\n            curl -sfL \"${BASE_URL}/${FILE}\" -o \"${FILE}\"\n            SHA=$(sha256sum \"${FILE}\" | cut -d' ' -f1)\n            VAR_NAME=\"SHA_$(echo \"${SUFFIX}\" | tr '[:lower:]-' '[:upper:]_')\"\n            echo \"${VAR_NAME}=${SHA}\" >> \"$GITHUB_ENV\"\n            echo \"${VAR_NAME}=${SHA}\"\n          done\n\n      - name: Clone Homebrew tap\n        env:\n          TAP_TOKEN: ${{ secrets.HOMEBREW_TAP_TOKEN }}\n        run: |\n          git clone \"https://x-access-token:${TAP_TOKEN}@github.com/janreges/homebrew-tap.git\" tap\n\n      - name: Update formula\n        env:\n          VERSION: ${{ steps.version.outputs.version }}\n        run: |\n          cat > tap/Formula/siteone-crawler.rb <<'FORMULA'\n          class SiteoneCrawler < Formula\n            desc \"Website crawler and QA toolkit in Rust for security, performance, SEO, and accessibility audits, offline cloning, markdown export, sitemap generation, cache warming, and CI/CD gating — one dependency-free binary for all major platforms, 10 tools in one.\"\n            homepage \"https://crawler.siteone.io/\"\n            version \"VERSION_PLACEHOLDER\"\n            license \"MIT\"\n\n            on_macos do\n              if Hardware::CPU.arm?\n                url \"https://github.com/janreges/siteone-crawler/releases/download/v#{version}/siteone-crawler-v#{version}-macos-arm64.tar.gz\"\n                sha256 \"SHA_MACOS_ARM64_PLACEHOLDER\"\n              else\n                url \"https://github.com/janreges/siteone-crawler/releases/download/v#{version}/siteone-crawler-v#{version}-macos-x64.tar.gz\"\n                sha256 \"SHA_MACOS_X64_PLACEHOLDER\"\n              end\n            end\n\n            on_linux do\n              if Hardware::CPU.arm?\n                url \"https://github.com/janreges/siteone-crawler/releases/download/v#{version}/siteone-crawler-v#{version}-linux-arm64.tar.gz\"\n                sha256 \"SHA_LINUX_ARM64_PLACEHOLDER\"\n              else\n                url \"https://github.com/janreges/siteone-crawler/releases/download/v#{version}/siteone-crawler-v#{version}-linux-x64.tar.gz\"\n                sha256 \"SHA_LINUX_X64_PLACEHOLDER\"\n              end\n            end\n\n            def install\n              bin.install \"siteone-crawler\"\n            end\n\n            test do\n              assert_match \"SiteOne Crawler\", shell_output(\"#{bin}/siteone-crawler --version\")\n            end\n          end\n          FORMULA\n          sed -i \"s/VERSION_PLACEHOLDER/${VERSION}/g\" tap/Formula/siteone-crawler.rb\n          sed -i \"s/SHA_MACOS_ARM64_PLACEHOLDER/${SHA_MACOS_ARM64}/g\" tap/Formula/siteone-crawler.rb\n          sed -i \"s/SHA_MACOS_X64_PLACEHOLDER/${SHA_MACOS_X64}/g\" tap/Formula/siteone-crawler.rb\n          sed -i \"s/SHA_LINUX_ARM64_PLACEHOLDER/${SHA_LINUX_ARM64}/g\" tap/Formula/siteone-crawler.rb\n          sed -i \"s/SHA_LINUX_X64_PLACEHOLDER/${SHA_LINUX_X64}/g\" tap/Formula/siteone-crawler.rb\n\n      - name: Push updated formula\n        run: |\n          cd tap\n          git config user.name \"github-actions[bot]\"\n          git config user.email \"github-actions[bot]@users.noreply.github.com\"\n          git add Formula/siteone-crawler.rb\n          git diff --cached --quiet && echo \"Formula already up to date\" && exit 0\n          git commit -m \"chore: update siteone-crawler to v${{ steps.version.outputs.version }}\"\n          git push\n\n  # ─────────────────────────────────────────────────────────────────\n  # Update Scoop bucket\n  # ─────────────────────────────────────────────────────────────────\n  publish-scoop:\n    name: Update Scoop manifest\n    runs-on: ubuntu-latest\n    if: vars.PUBLISH_SCOOP == 'true'\n    steps:\n      - name: Determine version\n        id: version\n        run: |\n          TAG=\"${{ github.event.release.tag_name || inputs.tag }}\"\n          echo \"version=${TAG#v}\" >> \"$GITHUB_OUTPUT\"\n\n      - name: Download Windows archives and compute SHA256\n        env:\n          VERSION: ${{ steps.version.outputs.version }}\n        run: |\n          BASE_URL=\"https://github.com/${{ github.repository }}/releases/download/v${VERSION}\"\n          for SUFFIX in win-x64 win-arm64; do\n            FILE=\"siteone-crawler-v${VERSION}-${SUFFIX}.zip\"\n            curl -sfL \"${BASE_URL}/${FILE}\" -o \"${FILE}\"\n            SHA=$(sha256sum \"${FILE}\" | cut -d' ' -f1)\n            VAR_NAME=\"SHA_$(echo \"${SUFFIX}\" | tr '[:lower:]-' '[:upper:]_')\"\n            echo \"${VAR_NAME}=${SHA}\" >> \"$GITHUB_ENV\"\n          done\n\n      - name: Clone Scoop bucket\n        env:\n          BUCKET_TOKEN: ${{ secrets.SCOOP_BUCKET_TOKEN }}\n        run: |\n          git clone \"https://x-access-token:${BUCKET_TOKEN}@github.com/janreges/scoop-siteone.git\" bucket\n\n      - name: Update manifest\n        env:\n          VERSION: ${{ steps.version.outputs.version }}\n        run: |\n          mkdir -p bucket/bucket\n          cat > bucket/bucket/siteone-crawler.json << 'TEMPLATE'\n          {\n              \"version\": \"VERSION_PLACEHOLDER\",\n              \"description\": \"Website crawler and QA toolkit in Rust for security, performance, SEO, and accessibility audits, offline cloning, markdown export, sitemap generation, cache warming, and CI/CD gating — one dependency-free binary for all major platforms, 10 tools in one.\",\n              \"homepage\": \"https://crawler.siteone.io/\",\n              \"license\": \"MIT\",\n              \"architecture\": {\n                  \"64bit\": {\n                      \"url\": \"https://github.com/janreges/siteone-crawler/releases/download/vVERSION_PLACEHOLDER/siteone-crawler-vVERSION_PLACEHOLDER-win-x64.zip\",\n                      \"hash\": \"HASH_X64_PLACEHOLDER\"\n                  },\n                  \"arm64\": {\n                      \"url\": \"https://github.com/janreges/siteone-crawler/releases/download/vVERSION_PLACEHOLDER/siteone-crawler-vVERSION_PLACEHOLDER-win-arm64.zip\",\n                      \"hash\": \"HASH_ARM64_PLACEHOLDER\"\n                  }\n              },\n              \"extract_dir\": \"siteone-crawler\",\n              \"bin\": \"siteone-crawler.exe\",\n              \"checkver\": \"github\",\n              \"autoupdate\": {\n                  \"architecture\": {\n                      \"64bit\": {\n                          \"url\": \"https://github.com/janreges/siteone-crawler/releases/download/v$version/siteone-crawler-v$version-win-x64.zip\"\n                      },\n                      \"arm64\": {\n                          \"url\": \"https://github.com/janreges/siteone-crawler/releases/download/v$version/siteone-crawler-v$version-win-arm64.zip\"\n                      }\n                  }\n              }\n          }\n          TEMPLATE\n          sed -i \"s/VERSION_PLACEHOLDER/${VERSION}/g\" bucket/bucket/siteone-crawler.json\n          sed -i \"s/HASH_X64_PLACEHOLDER/${SHA_WIN_X64}/g\" bucket/bucket/siteone-crawler.json\n          sed -i \"s/HASH_ARM64_PLACEHOLDER/${SHA_WIN_ARM64}/g\" bucket/bucket/siteone-crawler.json\n\n      - name: Push updated manifest\n        run: |\n          cd bucket\n          git config user.name \"github-actions[bot]\"\n          git config user.email \"github-actions[bot]@users.noreply.github.com\"\n          git add bucket/siteone-crawler.json\n          git commit -m \"chore: update siteone-crawler to v${{ steps.version.outputs.version }}\"\n          git push\n\n  # ─────────────────────────────────────────────────────────────────\n  # Submit to WinGet\n  # ─────────────────────────────────────────────────────────────────\n  publish-winget:\n    name: Submit to WinGet\n    runs-on: windows-latest\n    # Requires initial manual submission to microsoft/winget-pkgs first.\n    # Once JanReges.SiteOneCrawler exists in winget-pkgs, set PUBLISH_WINGET=true.\n    if: vars.PUBLISH_WINGET == 'true'\n    steps:\n      - name: Determine version\n        id: version\n        shell: bash\n        run: |\n          TAG=\"${{ github.event.release.tag_name || inputs.tag }}\"\n          echo \"version=${TAG#v}\" >> \"$GITHUB_OUTPUT\"\n\n      - name: Install wingetcreate\n        run: winget install Microsoft.WingetCreate --accept-source-agreements --accept-package-agreements\n\n      - name: Update WinGet manifest\n        env:\n          VERSION: ${{ steps.version.outputs.version }}\n          WINGET_TOKEN: ${{ secrets.WINGET_TOKEN }}\n        run: |\n          $url_x64 = \"https://github.com/janreges/siteone-crawler/releases/download/v$env:VERSION/siteone-crawler-v$env:VERSION-win-x64.zip\"\n          $url_arm64 = \"https://github.com/janreges/siteone-crawler/releases/download/v$env:VERSION/siteone-crawler-v$env:VERSION-win-arm64.zip\"\n          wingetcreate update JanReges.SiteOneCrawler `\n            --version $env:VERSION `\n            --urls $url_x64 $url_arm64 `\n            --token $env:WINGET_TOKEN `\n            --submit\n\n  # ─────────────────────────────────────────────────────────────────\n  # Update AUR package\n  # ─────────────────────────────────────────────────────────────────\n  publish-aur:\n    name: Update AUR package\n    runs-on: ubuntu-latest\n    if: vars.PUBLISH_AUR == 'true'\n    steps:\n      - name: Determine version\n        id: version\n        run: |\n          TAG=\"${{ github.event.release.tag_name || inputs.tag }}\"\n          echo \"version=${TAG#v}\" >> \"$GITHUB_OUTPUT\"\n\n      - name: Compute SHA256 for Linux archives\n        env:\n          VERSION: ${{ steps.version.outputs.version }}\n        run: |\n          BASE_URL=\"https://github.com/${{ github.repository }}/releases/download/v${VERSION}\"\n          for SUFFIX in linux-x64 linux-arm64; do\n            FILE=\"siteone-crawler-v${VERSION}-${SUFFIX}.tar.gz\"\n            curl -sfL \"${BASE_URL}/${FILE}\" -o \"${FILE}\"\n            SHA=$(sha256sum \"${FILE}\" | cut -d' ' -f1)\n            VAR_NAME=\"SHA_$(echo \"${SUFFIX}\" | tr '[:lower:]-' '[:upper:]_')\"\n            echo \"${VAR_NAME}=${SHA}\" >> \"$GITHUB_ENV\"\n          done\n\n      - name: Setup SSH for AUR\n        env:\n          AUR_SSH_KEY: ${{ secrets.AUR_SSH_KEY }}\n        run: |\n          mkdir -p ~/.ssh\n          echo \"$AUR_SSH_KEY\" > ~/.ssh/aur\n          chmod 600 ~/.ssh/aur\n          echo \"Host aur.archlinux.org\" >> ~/.ssh/config\n          echo \"  IdentityFile ~/.ssh/aur\" >> ~/.ssh/config\n          echo \"  User aur\" >> ~/.ssh/config\n          ssh-keyscan aur.archlinux.org >> ~/.ssh/known_hosts\n\n      - name: Clone AUR repo and update PKGBUILD\n        env:\n          VERSION: ${{ steps.version.outputs.version }}\n        run: |\n          git clone ssh://aur@aur.archlinux.org/siteone-crawler-bin.git aur\n          cd aur\n\n          cat > PKGBUILD << PKGBUILD\n          # Maintainer: Jan Reges <jan.reges@siteone.cz>\n          pkgname=siteone-crawler-bin\n          pkgver=${VERSION}\n          pkgrel=1\n          pkgdesc=\"Website crawler and QA toolkit in Rust for security, performance, SEO, and accessibility audits, offline cloning, markdown export, sitemap generation, cache warming, and CI/CD gating — one dependency-free binary for all major platforms, 10 tools in one.\"\n          arch=('x86_64' 'aarch64')\n          url=\"https://crawler.siteone.io/\"\n          license=('MIT')\n          provides=('siteone-crawler')\n          conflicts=('siteone-crawler')\n\n          source_x86_64=(\"https://github.com/janreges/siteone-crawler/releases/download/v\\${pkgver}/siteone-crawler-v\\${pkgver}-linux-x64.tar.gz\")\n          source_aarch64=(\"https://github.com/janreges/siteone-crawler/releases/download/v\\${pkgver}/siteone-crawler-v\\${pkgver}-linux-arm64.tar.gz\")\n          sha256sums_x86_64=('${SHA_LINUX_X64}')\n          sha256sums_aarch64=('${SHA_LINUX_ARM64}')\n\n          package() {\n              install -Dm755 \"\\${srcdir}/siteone-crawler/siteone-crawler\" \"\\${pkgdir}/usr/bin/siteone-crawler\"\n              install -Dm644 \"\\${srcdir}/siteone-crawler/LICENSE\" \"\\${pkgdir}/usr/share/licenses/\\${pkgname}/LICENSE\"\n          }\n          PKGBUILD\n\n          cat > .SRCINFO << SRCINFO\n          pkgbase = siteone-crawler-bin\n          \tpkgdesc = Website crawler and QA toolkit in Rust for security, performance, SEO, and accessibility audits, offline cloning, markdown export, sitemap generation, cache warming, and CI/CD gating — one dependency-free binary for all major platforms, 10 tools in one.\n          \tpkgver = ${VERSION}\n          \tpkgrel = 1\n          \turl = https://crawler.siteone.io/\n          \tarch = x86_64\n          \tarch = aarch64\n          \tlicense = MIT\n          \tprovides = siteone-crawler\n          \tconflicts = siteone-crawler\n          \tsource_x86_64 = https://github.com/janreges/siteone-crawler/releases/download/v${VERSION}/siteone-crawler-v${VERSION}-linux-x64.tar.gz\n          \tsha256sums_x86_64 = ${SHA_LINUX_X64}\n          \tsource_aarch64 = https://github.com/janreges/siteone-crawler/releases/download/v${VERSION}/siteone-crawler-v${VERSION}-linux-arm64.tar.gz\n          \tsha256sums_aarch64 = ${SHA_LINUX_ARM64}\n\n          pkgname = siteone-crawler-bin\n          SRCINFO\n\n          git config user.name \"Jan Reges\"\n          git config user.email \"jan.reges@siteone.cz\"\n          git add PKGBUILD .SRCINFO\n          git commit -m \"chore: update siteone-crawler to v${VERSION}\"\n          git push\n\n  # ─────────────────────────────────────────────────────────────────\n  # Publish .deb and .rpm to Cloudsmith (APT + DNF repository)\n  # ─────────────────────────────────────────────────────────────────\n  publish-cloudsmith:\n    name: Publish to Cloudsmith\n    runs-on: ubuntu-latest\n    if: vars.PUBLISH_CLOUDSMITH == 'true'\n    steps:\n      - name: Determine version\n        id: version\n        run: |\n          TAG=\"${{ github.event.release.tag_name || inputs.tag }}\"\n          echo \"version=${TAG#v}\" >> \"$GITHUB_OUTPUT\"\n\n      - name: Download .deb, .rpm and .apk from release\n        env:\n          GH_TOKEN: ${{ github.token }}\n          VERSION: ${{ steps.version.outputs.version }}\n        run: |\n          mkdir -p packages\n          BASE_URL=\"https://github.com/${{ github.repository }}/releases/download/v${VERSION}\"\n          # Download all .deb, .rpm and .apk assets from the release\n          for file in $(gh release view \"v${VERSION}\" --repo \"${{ github.repository }}\" --json assets -q '.assets[].name' | grep -E '\\.(deb|rpm|apk)$'); do\n            echo \"Downloading ${file} ...\"\n            curl -sfL \"${BASE_URL}/${file}\" -o \"packages/${file}\"\n          done\n\n      - name: List packages\n        run: ls -lhR packages/\n\n      - name: Install Cloudsmith CLI\n        run: pip install cloudsmith-cli\n\n      - name: Upload .deb packages\n        env:\n          CLOUDSMITH_API_KEY: ${{ secrets.CLOUDSMITH_API_KEY }}\n        run: |\n          for deb in packages/*.deb; do\n            [ -f \"$deb\" ] || continue\n            echo \"Uploading $deb ...\"\n            cloudsmith push deb janreges/siteone-crawler/any-distro/any-version \"$deb\" --republish\n          done\n\n      - name: Upload .rpm packages\n        env:\n          CLOUDSMITH_API_KEY: ${{ secrets.CLOUDSMITH_API_KEY }}\n        run: |\n          for rpm in packages/*.rpm; do\n            [ -f \"$rpm\" ] || continue\n            echo \"Uploading $rpm ...\"\n            cloudsmith push rpm janreges/siteone-crawler/any-distro/any-version \"$rpm\" --republish\n          done\n\n      - name: Upload .apk packages\n        env:\n          CLOUDSMITH_API_KEY: ${{ secrets.CLOUDSMITH_API_KEY }}\n        run: |\n          for apk in packages/*.apk; do\n            [ -f \"$apk\" ] || continue\n            echo \"Uploading $apk ...\"\n            cloudsmith push alpine janreges/siteone-crawler/alpine/any-version \"$apk\" --republish\n          done\n"
  },
  {
    "path": ".github/workflows/release.yml",
    "content": "name: Release\n\n# Trigger: push a tag like v1.0.10\non:\n  push:\n    tags:\n      - 'v*'\n  # Manual trigger for building artifacts only (no release created)\n  workflow_dispatch:\n    inputs:\n      version:\n        description: 'Version number (e.g. 1.0.10)'\n        required: true\n\npermissions:\n  contents: write\n\nenv:\n  CARGO_TERM_COLOR: always\n\njobs:\n  build:\n    name: Build ${{ matrix.artifact_suffix }}\n    runs-on: ${{ matrix.os }}\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n          - target: x86_64-unknown-linux-gnu\n            os: ubuntu-latest\n            artifact_suffix: linux-x64\n            archive: tar.gz\n\n          - target: aarch64-unknown-linux-gnu\n            os: ubuntu-latest\n            artifact_suffix: linux-arm64\n            archive: tar.gz\n            cross: true\n\n          - target: x86_64-apple-darwin\n            os: macos-latest\n            artifact_suffix: macos-x64\n            archive: tar.gz\n\n          - target: aarch64-apple-darwin\n            os: macos-latest\n            artifact_suffix: macos-arm64\n            archive: tar.gz\n\n          - target: x86_64-pc-windows-msvc\n            os: windows-latest\n            artifact_suffix: win-x64\n            archive: zip\n\n          - target: aarch64-pc-windows-msvc\n            os: windows-latest\n            artifact_suffix: win-arm64\n            archive: zip\n\n          - target: x86_64-unknown-linux-musl\n            os: ubuntu-latest\n            artifact_suffix: linux-musl-x64\n            archive: tar.gz\n            musl: true\n\n          - target: aarch64-unknown-linux-musl\n            os: ubuntu-latest\n            artifact_suffix: linux-musl-arm64\n            archive: tar.gz\n            cross: true\n            musl: true\n\n    steps:\n      - name: Checkout\n        uses: actions/checkout@v6\n\n      - name: Determine version\n        id: version\n        shell: bash\n        run: |\n          if [[ \"${{ github.event_name }}\" == \"workflow_dispatch\" ]]; then\n            VERSION=\"${{ github.event.inputs.version }}\"\n          else\n            # Extract from tag: v1.0.10 -> 1.0.10\n            VERSION=\"${GITHUB_REF_NAME#v}\"\n          fi\n          echo \"version=${VERSION}\" >> \"$GITHUB_OUTPUT\"\n          echo \"Version: ${VERSION}\"\n\n      - name: Install Rust toolchain\n        uses: dtolnay/rust-toolchain@stable\n        with:\n          targets: ${{ matrix.target }}\n\n      - name: Install cross (for cross-compilation)\n        if: matrix.cross\n        run: cargo install cross --git https://github.com/cross-rs/cross\n\n      - name: Install musl tools\n        if: matrix.musl && !matrix.cross\n        run: sudo apt-get install -y musl-tools\n\n      - name: Update version in source\n        shell: bash\n        run: |\n          VERSION=\"${{ steps.version.outputs.version }}\"\n          DATE_SUFFIX=\"$(date +%Y%m%d)\"\n          VERSION_CODE=\"${VERSION}.${DATE_SUFFIX}\"\n\n          # Update Cargo.toml\n          sed -i.bak \"s/^version = .*/version = \\\"${VERSION}\\\"/\" Cargo.toml\n\n          # Update version.rs\n          sed -i.bak \"s/^pub const CODE: .*/pub const CODE: \\&str = \\\"${VERSION_CODE}\\\";/\" src/version.rs\n\n          echo \"Cargo.toml version: ${VERSION}\"\n          echo \"version.rs CODE: ${VERSION_CODE}\"\n\n      - name: Build\n        shell: bash\n        run: |\n          if [[ \"${{ matrix.cross }}\" == \"true\" ]]; then\n            cross build --release --target ${{ matrix.target }}\n          else\n            cargo build --release --target ${{ matrix.target }}\n          fi\n\n      # ── macOS Code Signing & Notarization ──────────────────────────\n      - name: Import Apple certificate\n        if: runner.os == 'macOS'\n        env:\n          CERTIFICATE_BASE64: ${{ secrets.APPLE_CERTIFICATE_BASE64 }}\n          CERTIFICATE_PASSWORD: ${{ secrets.APPLE_CERTIFICATE_PASSWORD }}\n        run: |\n          CERTIFICATE_PATH=\"$RUNNER_TEMP/certificate.p12\"\n          KEYCHAIN_PATH=\"$RUNNER_TEMP/signing.keychain-db\"\n          KEYCHAIN_PASSWORD=\"$(openssl rand -hex 16)\"\n\n          echo -n \"$CERTIFICATE_BASE64\" | base64 --decode -o \"$CERTIFICATE_PATH\"\n\n          security create-keychain -p \"$KEYCHAIN_PASSWORD\" \"$KEYCHAIN_PATH\"\n          security set-keychain-settings -lut 21600 \"$KEYCHAIN_PATH\"\n          security unlock-keychain -p \"$KEYCHAIN_PASSWORD\" \"$KEYCHAIN_PATH\"\n\n          security import \"$CERTIFICATE_PATH\" \\\n            -P \"$CERTIFICATE_PASSWORD\" \\\n            -A -t cert -f pkcs12 \\\n            -k \"$KEYCHAIN_PATH\"\n\n          security set-key-partition-list \\\n            -S apple-tool:,apple: \\\n            -k \"$KEYCHAIN_PASSWORD\" \\\n            \"$KEYCHAIN_PATH\"\n\n          security list-keychain -d user -s \"$KEYCHAIN_PATH\"\n\n      - name: Sign macOS binary\n        if: runner.os == 'macOS'\n        env:\n          SIGNING_IDENTITY: ${{ secrets.APPLE_SIGNING_IDENTITY }}\n        run: |\n          BINARY=\"target/${{ matrix.target }}/release/siteone-crawler\"\n\n          codesign --force --options runtime \\\n            --sign \"$SIGNING_IDENTITY\" \\\n            \"$BINARY\"\n\n          echo \"Verifying signature...\"\n          codesign --verify --verbose \"$BINARY\"\n          echo \"Signature OK\"\n\n      - name: Notarize macOS binary\n        if: runner.os == 'macOS'\n        env:\n          APPLE_ID: ${{ secrets.APPLE_ID }}\n          APPLE_ID_PASSWORD: ${{ secrets.APPLE_ID_PASSWORD }}\n          APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }}\n        run: |\n          BINARY=\"target/${{ matrix.target }}/release/siteone-crawler\"\n          NOTARIZE_ZIP=\"$RUNNER_TEMP/notarize.zip\"\n\n          # ditto is required — Apple's notary service rejects zip-created archives\n          ditto -c -k --keepParent \"$BINARY\" \"$NOTARIZE_ZIP\"\n\n          echo \"Submitting for notarization...\"\n          xcrun notarytool submit \"$NOTARIZE_ZIP\" \\\n            --apple-id \"$APPLE_ID\" \\\n            --password \"$APPLE_ID_PASSWORD\" \\\n            --team-id \"$APPLE_TEAM_ID\" \\\n            --wait\n\n          echo \"Notarization complete\"\n\n      - name: Clean up keychain\n        if: runner.os == 'macOS' && always()\n        run: |\n          KEYCHAIN_PATH=\"$RUNNER_TEMP/signing.keychain-db\"\n          if [ -f \"$KEYCHAIN_PATH\" ]; then\n            security delete-keychain \"$KEYCHAIN_PATH\"\n          fi\n      # ────────────────────────────────────────────────────────────────\n\n      - name: Package (Unix)\n        if: matrix.archive == 'tar.gz'\n        shell: bash\n        run: |\n          VERSION=\"${{ steps.version.outputs.version }}\"\n          ARTIFACT=\"siteone-crawler-v${VERSION}-${{ matrix.artifact_suffix }}\"\n          mkdir -p \"staging/siteone-crawler\"\n          cp \"target/${{ matrix.target }}/release/siteone-crawler\" \"staging/siteone-crawler/\"\n          cp README.md \"staging/siteone-crawler/\" 2>/dev/null || true\n          cp LICENSE \"staging/siteone-crawler/\" 2>/dev/null || true\n          chmod +x \"staging/siteone-crawler/siteone-crawler\"\n          (cd staging && tar czf \"../${ARTIFACT}.tar.gz\" siteone-crawler/)\n          echo \"ARTIFACT_PATH=${ARTIFACT}.tar.gz\" >> \"$GITHUB_ENV\"\n\n      - name: Package (Windows)\n        if: matrix.archive == 'zip'\n        shell: bash\n        run: |\n          VERSION=\"${{ steps.version.outputs.version }}\"\n          ARTIFACT=\"siteone-crawler-v${VERSION}-${{ matrix.artifact_suffix }}\"\n          mkdir -p \"staging/siteone-crawler\"\n          cp \"target/${{ matrix.target }}/release/siteone-crawler.exe\" \"staging/siteone-crawler/\"\n          cp README.md \"staging/siteone-crawler/\" 2>/dev/null || true\n          cp LICENSE \"staging/siteone-crawler/\" 2>/dev/null || true\n          (cd staging && 7z a -r \"../${ARTIFACT}.zip\" siteone-crawler/)\n          echo \"ARTIFACT_PATH=${ARTIFACT}.zip\" >> \"$GITHUB_ENV\"\n\n      # ── Build .deb and .rpm packages (Linux only) ──────────────\n      - name: Install cross-compilation tools (arm64)\n        if: runner.os == 'Linux' && matrix.cross\n        run: sudo apt-get install -y binutils-aarch64-linux-gnu\n\n      - name: Strip binary (Linux)\n        if: runner.os == 'Linux'\n        shell: bash\n        run: |\n          BINARY=\"target/${{ matrix.target }}/release/siteone-crawler\"\n          if [[ \"${{ matrix.target }}\" == \"aarch64\"* ]]; then\n            aarch64-linux-gnu-strip -s \"$BINARY\" || true\n          else\n            strip -s \"$BINARY\" || true\n          fi\n\n      - name: Build .deb package\n        if: runner.os == 'Linux'\n        shell: bash\n        run: |\n          cargo install cargo-deb\n          if [[ \"${{ matrix.musl }}\" == \"true\" ]]; then\n            cargo deb --no-build --no-strip --target ${{ matrix.target }} --variant static\n          else\n            cargo deb --no-build --no-strip --target ${{ matrix.target }}\n          fi\n          echo \"DEB_PATH=$(ls target/${{ matrix.target }}/debian/*.deb)\" >> \"$GITHUB_ENV\"\n\n      - name: Build .rpm package\n        if: runner.os == 'Linux'\n        shell: bash\n        run: |\n          cargo install cargo-generate-rpm\n          mkdir -p target/release\n          cp \"target/${{ matrix.target }}/release/siteone-crawler\" target/release/\n          if [[ \"${{ matrix.musl }}\" == \"true\" ]]; then\n            # Override package name for static/musl variant\n            sed -i 's/^name = \"siteone-crawler\"$/name = \"siteone-crawler-static\"/' Cargo.toml\n          fi\n          cargo generate-rpm --target ${{ matrix.target }}\n          echo \"RPM_PATH=$(find target -name '*.rpm' -path '*/generate-rpm/*' | head -1)\" >> \"$GITHUB_ENV\"\n\n      - name: Upload .deb artifact\n        if: runner.os == 'Linux'\n        uses: actions/upload-artifact@v7\n        with:\n          name: siteone-crawler-${{ matrix.artifact_suffix }}-deb\n          path: ${{ env.DEB_PATH }}\n\n      - name: Upload .rpm artifact\n        if: runner.os == 'Linux'\n        uses: actions/upload-artifact@v7\n        with:\n          name: siteone-crawler-${{ matrix.artifact_suffix }}-rpm\n          path: ${{ env.RPM_PATH }}\n      # ────────────────────────────────────────────────────────────────\n\n      - name: Upload artifact\n        uses: actions/upload-artifact@v7\n        with:\n          name: siteone-crawler-${{ matrix.artifact_suffix }}\n          path: ${{ env.ARTIFACT_PATH }}\n\n  # ─────────────────────────────────────────────────────────────────\n  # Build Alpine .apk packages from musl binaries\n  # ─────────────────────────────────────────────────────────────────\n  package-alpine:\n    name: Build Alpine .apk (${{ matrix.arch }})\n    needs: build\n    runs-on: ubuntu-latest\n    strategy:\n      fail-fast: false\n      matrix:\n        include:\n          - arch: x86_64\n            artifact_suffix: linux-musl-x64\n          - arch: aarch64\n            artifact_suffix: linux-musl-arm64\n    steps:\n      - name: Checkout\n        uses: actions/checkout@v6\n\n      - name: Determine version\n        id: version\n        shell: bash\n        run: |\n          if [[ \"${{ github.event_name }}\" == \"workflow_dispatch\" ]]; then\n            VERSION=\"${{ github.event.inputs.version }}\"\n          else\n            VERSION=\"${GITHUB_REF_NAME#v}\"\n          fi\n          echo \"version=${VERSION}\" >> \"$GITHUB_OUTPUT\"\n\n      - name: Download musl binary\n        uses: actions/download-artifact@v8\n        with:\n          name: siteone-crawler-${{ matrix.artifact_suffix }}\n          path: dist\n\n      - name: Extract binary\n        run: |\n          VERSION=\"${{ steps.version.outputs.version }}\"\n          tar xzf \"dist/siteone-crawler-v${VERSION}-${{ matrix.artifact_suffix }}.tar.gz\" -C dist\n\n      - name: Setup Alpine\n        uses: jirutka/setup-alpine@v1\n        with:\n          arch: ${{ matrix.arch }}\n          packages: abuild\n\n      - name: Prepare signing key\n        shell: alpine.sh --root {0}\n        env:\n          ALPINE_RSA_KEY: ${{ secrets.ALPINE_RSA_PRIVATE_KEY }}\n          ALPINE_RSA_PUB: ${{ secrets.ALPINE_RSA_PUBLIC_KEY }}\n        run: |\n          BUILDER=runner\n\n          # Install signing key\n          mkdir -p /etc/apk/keys\n          printf '%s\\n' \"$ALPINE_RSA_PUB\" > /etc/apk/keys/siteone.rsa.pub\n\n          # Setup abuild config for builder\n          mkdir -p \"/home/$BUILDER/.abuild\"\n          printf '%s\\n' \"$ALPINE_RSA_KEY\" > \"/home/$BUILDER/.abuild/siteone.rsa\"\n          printf '%s\\n' \"$ALPINE_RSA_PUB\" > \"/home/$BUILDER/.abuild/siteone.rsa.pub\"\n          chmod 600 \"/home/$BUILDER/.abuild/siteone.rsa\"\n          cat > \"/home/$BUILDER/.abuild/abuild.conf\" << 'EOF'\n          PACKAGER_PRIVKEY=\"$HOME/.abuild/siteone.rsa\"\n          EOF\n          chown -R \"$BUILDER\" \"/home/$BUILDER/.abuild\"\n\n          # Add user to abuild group\n          addgroup \"$BUILDER\" abuild\n\n      - name: Build .apk\n        shell: alpine.sh {0}\n        env:\n          VERSION: ${{ steps.version.outputs.version }}\n        run: |\n          ARCH=$(uname -m)\n\n          # Prepare build directory\n          mkdir -p ~/build\n          cp \"$GITHUB_WORKSPACE/dist/siteone-crawler/siteone-crawler\" ~/build/\n          cp \"$GITHUB_WORKSPACE/LICENSE\" ~/build/ 2>/dev/null || true\n\n          # Create APKBUILD\n          cat > ~/build/APKBUILD << EOF\n          # Maintainer: Jan Reges <jan.reges@siteone.cz>\n          pkgname=siteone-crawler\n          pkgver=${VERSION}\n          pkgrel=1\n          pkgdesc=\"Website crawler and QA toolkit in Rust\"\n          url=\"https://crawler.siteone.io/\"\n          arch=\"${ARCH}\"\n          license=\"MIT\"\n          source=\"\"\n          options=\"!check !strip\"\n\n          package() {\n              install -Dm755 \"\\$startdir/siteone-crawler\" \"\\$pkgdir/usr/bin/siteone-crawler\"\n              install -Dm644 \"\\$startdir/LICENSE\" \"\\$pkgdir/usr/share/licenses/\\$pkgname/LICENSE\" 2>/dev/null || true\n          }\n          EOF\n\n          # Build the package\n          cd ~/build\n          abuild -d -P ~/packages\n\n          # Copy and rename to include arch (both arches produce the same filename)\n          mkdir -p \"$GITHUB_WORKSPACE/apk-out\"\n          for f in $(find ~/packages -name '*.apk'); do\n            BASENAME=$(basename \"$f\" .apk)\n            cp \"$f\" \"$GITHUB_WORKSPACE/apk-out/${BASENAME}-${ARCH}.apk\"\n          done\n\n      - name: Upload .apk artifact\n        uses: actions/upload-artifact@v7\n        with:\n          name: siteone-crawler-alpine-${{ matrix.arch }}\n          path: apk-out/*.apk\n\n  release:\n    name: Create GitHub Release\n    needs: [build, package-alpine]\n    runs-on: ubuntu-latest\n    if: always() && startsWith(github.ref, 'refs/tags/v') && needs.build.result == 'success'\n    steps:\n      - name: Checkout\n        uses: actions/checkout@v6\n\n      - name: Download all artifacts\n        uses: actions/download-artifact@v8\n        with:\n          path: artifacts\n          merge-multiple: true\n\n      - name: Determine version\n        id: version\n        run: echo \"version=${GITHUB_REF_NAME#v}\" >> \"$GITHUB_OUTPUT\"\n\n      - name: List artifacts\n        run: ls -lhR artifacts/\n\n      - name: Create Release\n        uses: softprops/action-gh-release@v2\n        with:\n          name: \"v${{ steps.version.outputs.version }}\"\n          body: |\n            ### Downloads\n\n            | Platform | Architecture | File |\n            |----------|-------------|------|\n            | Linux | x64 | `siteone-crawler-v${{ steps.version.outputs.version }}-linux-x64.tar.gz` |\n            | Linux | arm64 | `siteone-crawler-v${{ steps.version.outputs.version }}-linux-arm64.tar.gz` |\n            | Linux | x64 (musl/static) | `siteone-crawler-v${{ steps.version.outputs.version }}-linux-musl-x64.tar.gz` |\n            | Linux | arm64 (musl/static) | `siteone-crawler-v${{ steps.version.outputs.version }}-linux-musl-arm64.tar.gz` |\n            | macOS | arm64 (Apple Silicon) | `siteone-crawler-v${{ steps.version.outputs.version }}-macos-arm64.tar.gz` |\n            | macOS | x64 (Intel) | `siteone-crawler-v${{ steps.version.outputs.version }}-macos-x64.tar.gz` |\n            | Windows | x64 | `siteone-crawler-v${{ steps.version.outputs.version }}-win-x64.zip` |\n            | Windows | arm64 | `siteone-crawler-v${{ steps.version.outputs.version }}-win-arm64.zip` |\n\n            ### Linux packages (glibc — best performance, requires glibc 2.39+)\n\n            | Format | Architecture | File |\n            |--------|-------------|------|\n            | Debian/Ubuntu (.deb) | x64 | `siteone-crawler_${{ steps.version.outputs.version }}-1_amd64.deb` |\n            | Debian/Ubuntu (.deb) | arm64 | `siteone-crawler_${{ steps.version.outputs.version }}-1_arm64.deb` |\n            | Fedora/RHEL (.rpm) | x64 | `siteone-crawler-${{ steps.version.outputs.version }}-1.x86_64.rpm` |\n            | Fedora/RHEL (.rpm) | arm64 | `siteone-crawler-${{ steps.version.outputs.version }}-1.aarch64.rpm` |\n\n            ### Linux packages (musl/static — any Linux, ~50–80% slower)\n\n            | Format | Architecture | File |\n            |--------|-------------|------|\n            | Debian/Ubuntu (.deb) | x64 | `siteone-crawler-static_${{ steps.version.outputs.version }}-1_amd64.deb` |\n            | Debian/Ubuntu (.deb) | arm64 | `siteone-crawler-static_${{ steps.version.outputs.version }}-1_arm64.deb` |\n            | Fedora/RHEL (.rpm) | x64 | `siteone-crawler-static-${{ steps.version.outputs.version }}-1.x86_64.rpm` |\n            | Fedora/RHEL (.rpm) | arm64 | `siteone-crawler-static-${{ steps.version.outputs.version }}-1.aarch64.rpm` |\n            | Alpine (.apk) | x64 | `siteone-crawler-${{ steps.version.outputs.version }}-r1-x86_64.apk` |\n            | Alpine (.apk) | arm64 | `siteone-crawler-${{ steps.version.outputs.version }}-r1-aarch64.apk` |\n\n            ### Quick start\n\n            ```bash\n            # Extract and run\n            tar xzf siteone-crawler-v${{ steps.version.outputs.version }}-linux-x64.tar.gz\n            cd siteone-crawler\n            ./siteone-crawler --url=https://example.com\n            ```\n\n            ### Install via package manager\n\n            ```bash\n            # Debian/Ubuntu (glibc — Ubuntu 24.04+, Debian 13+)\n            sudo dpkg -i siteone-crawler_${{ steps.version.outputs.version }}-1_amd64.deb\n\n            # Debian/Ubuntu (static/musl — older distributions)\n            sudo dpkg -i siteone-crawler-static_${{ steps.version.outputs.version }}-1_amd64.deb\n\n            # Fedora/RHEL\n            sudo dnf install ./siteone-crawler-${{ steps.version.outputs.version }}-1.x86_64.rpm\n            ```\n          files: artifacts/*\n          generate_release_notes: true\n          draft: true\n          prerelease: false\n"
  },
  {
    "path": ".gitignore",
    "content": "/target\r\n/tmp/\r\n/dist/\r\n*.swp\r\n*.swo\r\n*~\r\n.idea/\r\n.vscode/\r\n*.cache\r\n"
  },
  {
    "path": "CHANGELOG.md",
    "content": "### Changelog\n\nAll notable changes to this project will be documented in this file. Dates are displayed in UTC.\n\n#### [v1.0.9](https://github.com/janreges/siteone-crawler/compare/v1.0.8...v1.0.9)\n\n- typos: non exhaustive typo and spelling corrections [`#8`](https://github.com/janreges/siteone-crawler/pull/8)\n- offline exporter: new option --ignore-store-file-error for the OfflineWebsiteExporter [`#16`](https://github.com/janreges/siteone-crawler/pull/16)\n- url handling: added option --transform-url to force requests for some URL to be internally transformed and a different URL/domain (e.g. local) to be queried, fixes #58 [`#58`](https://github.com/janreges/siteone-crawler/issues/58)\n- html report: added option to list which sections to include in the HTML report via --html-report-options (see README.md), fixes #63 [`#63`](https://github.com/janreges/siteone-crawler/issues/63)\n- offline export: fix behavior regarding URLs containing various valid UTF-8 characters (German, Chinese, etc.), fixes #65 [`#65`](https://github.com/janreges/siteone-crawler/issues/65)\n- seo analysis: fix for an issue that occurs when encoding UTF-8 due to some special characters in the content, fixes #51 [`#51`](https://github.com/janreges/siteone-crawler/issues/51)\n- offline website exporter: added option --offline-export-no-auto-redirect-html, which disables the generation of automatic sub-folder.html with meta redirects to sub-folder/index.html, fixes #54 [`#54`](https://github.com/janreges/siteone-crawler/issues/54)\n- offline website exporter: fix replacing reference  where it is followed by  and not an immediate number, fixes #52 [`#52`](https://github.com/janreges/siteone-crawler/issues/52)\n- slowest analyzer: fixed typo slowest-&gt;slower, fixes #42 [`#42`](https://github.com/janreges/siteone-crawler/issues/42)\n- url & sitemaps: as --url it is now possible to specify a URL to sitemap xml, or sitemap index xml, from which to find a list of all URLs, fixes #25 [`#25`](https://github.com/janreges/siteone-crawler/issues/25)\n- github: remove all unnecessary files from the release package [`e54029c`](https://github.com/janreges/siteone-crawler/commit/e54029cbef015a259d92e93933e81af2e851a145)\n- github: fix release workflow [`c9d5361`](https://github.com/janreges/siteone-crawler/commit/c9d5361acd646b24e47cb6e60e7d07be12cd96c9)\n- github: workflow for automatic creation of release archives for all 5 supported platforms/architectures [`0a461ac`](https://github.com/janreges/siteone-crawler/commit/0a461aca0b145982005b6a460d3e852a0767426a)\n- webp analysis: if there are avif images on the website (they are more optimized than webp), we will not report the absence of webp [`e067653`](https://github.com/janreges/siteone-crawler/commit/e06765332fa743f9bb22f5eb589cb71a01dc90db)\n- term: if TERM is not set or we're not in a TTY, use default width 138 [`eb839e4`](https://github.com/janreges/siteone-crawler/commit/eb839e423abf4df7020822986dc9e2ae43d44971)\n- options: handling of the situation of calling only 'crawler' without a parameters - complete documentation and a red message about the need to pass at least the --url parameter will be displayed [`fc390ae`](https://github.com/janreges/siteone-crawler/commit/fc390ae693ba201b060effc67a90ad893772558f)\n- phpstan: fix errors found by phpstan and increasing the memory limit for phpstan [`650d46a`](https://github.com/janreges/siteone-crawler/commit/650d46abb867ff04df24be01c3c6daebd42b0911)\n- tests: fix the tests after removing the underscore for the external domain [`b31a872`](https://github.com/janreges/siteone-crawler/commit/b31a872fdc439a321c364665d18634906ce8ad30)\n- Revert \"url parser: fix url parsing in some cases when href starts with './'\" [`240430b`](https://github.com/janreges/siteone-crawler/commit/240430bc90039063b9e810980360f798afa46f74)\n- url parser: fix url parsing in some cases when href starts with './' [`2443532`](https://github.com/janreges/siteone-crawler/commit/244353202c80c152b6a3b63ef83f6046338404e9)\n- url parser: fix url parsing in some cases when href starts with './' [`fe33e7b`](https://github.com/janreges/siteone-crawler/commit/fe33e7b6c404a62cf636db20b6193196c4bf6e25)\n- website to markdown: added --markdown-remove-links-and-images-from-single-file - useful when used within an AI tool to obtain context from a website (typically with documentation of a solution/framework) [`631e544`](https://github.com/janreges/siteone-crawler/commit/631e544b9eb836a01f68f055e80b8b35b16687dc)\n- website to markdown: fixed the problem with incorrect sorting of the root index.md (homepage should be at the beginning) [`c2ffff3`](https://github.com/janreges/siteone-crawler/commit/c2ffff32a48e84c872e132417ef1623015755e7e)\n- website to markdown: fine tuning of the resulting markdown files, correct detection of table headers, removal of excess whitespaces [`ee40b29`](https://github.com/janreges/siteone-crawler/commit/ee40b2915611824c676c5d4761a266edba6be0d2)\n- website to markdown: added --markdown-export-single-file for the ability to save all website content into one combined markdown file (smart detection and removal of shared headers and footers is also implemented) [`af01376`](https://github.com/janreges/siteone-crawler/commit/af013766830991f473d26dc25dc5804cc88b7c76)\n- readme: changed partnership to powered by JetBrains [`e77f755`](https://github.com/janreges/siteone-crawler/commit/e77f755527319e99d66cec6b2b2864dee4d560e4)\n- readme: added partnership with JetBrains [`0104646`](https://github.com/janreges/siteone-crawler/commit/0104646b6f209eae1c530bb68160a5fa238f7dda)\n- website to markdown: added implicit excluded selectors for typical 'hidden' classes [`b3c57d6`](https://github.com/janreges/siteone-crawler/commit/b3c57d69f9ef52592cab308b493b328b81c29705)\n- website to markdown: consecutive links fixes (ignore links without visible text or defined href) [`6d9a310`](https://github.com/janreges/siteone-crawler/commit/6d9a31053a56bb532961395a2e1821f2028e36ac)\n- website to markdown: list fixes and prepared auto-removal of duplicates (e.g. desktop & mobile version of menus) [`338b0c6`](https://github.com/janreges/siteone-crawler/commit/338b0c692a434a4f3a2a20160c9f45004526c04a)\n- website to markdown: removed unwanted escaping from links/images [`35c6f57`](https://github.com/janreges/siteone-crawler/commit/35c6f579a62080316210ec00734af8069ea32f27)\n- website to markdown: refactoring the way ul/ol lists are composed (there were problems with nested lists and whitespaces) [`15ea68c`](https://github.com/janreges/siteone-crawler/commit/15ea68ce7e3e3d962c5813ad971571eec42fe933)\n- README: improved introduction and added icons [`737b8c6`](https://github.com/janreges/siteone-crawler/commit/737b8c63bce618bdd613090205866d03bde1d67b)\n- docs: added Table of Contents to JSON-OUTPUT.md and TEXT-OUTPUT.md [`2aa2856`](https://github.com/janreges/siteone-crawler/commit/2aa28569de9fa0315219e68d084cc675ead57303)\n- docs: added detailed documentation and real sample JSON and TXT output from the crawler for a better idea of ​​its functionality [`09495d1`](https://github.com/janreges/siteone-crawler/commit/09495d187e3f80d4f4c29176c12540e300c5cb6f)\n- docs: added detailed documentation and real sample JSON and TXT output from the crawler for a better idea of ​​its functionality [`cb7606b`](https://github.com/janreges/siteone-crawler/commit/cb7606b2c8fe1547cfdf787d0b7050693228ff2e)\n- json output docs: first version [`73e8d45`](https://github.com/janreges/siteone-crawler/commit/73e8d45ad93e1cc88a778533d0955e22cec9d6c7)\n- output options: added option --timezone (e.g. Europe/Prague, default is UTC) to set the time zone in which dates and times in HTML reports and exported folder/file names should be, refs #57 [`e3d3213`](https://github.com/janreges/siteone-crawler/commit/e3d321315b6c9f0290b5795345a90e78af32a358)\n- website to markdown: use link URL as text when link text is empty [`873ffae`](https://github.com/janreges/siteone-crawler/commit/873ffae76a8d96c8a2a4e4670ad09f4ed8527d4a)\n- website to markdown: if the link contains nested div/span tags, display the link in markdown as a list-item so that it is on its own line [`c48f346`](https://github.com/janreges/siteone-crawler/commit/c48f34614a057b79e5f9e5d5fbb9877cd7c2d25f)\n- website to markdown: removed the use of html2markdown (problematic integration on windows due to cygwin) and replaced with a custom HtmlToMarkdownConverter [`4e1db09`](https://github.com/janreges/siteone-crawler/commit/4e1db090f7b9276663c8fda587e8673d67783340)\n- content processor: added justification for skipping URLs due to exceeding --max-depth [`a6bc08a`](https://github.com/janreges/siteone-crawler/commit/a6bc08ac2367b3fb008b51e1278b3b78ae5bfe28)\n- README: converting arguments to a table view and adding missing links to the outline [`c23a686`](https://github.com/janreges/siteone-crawler/commit/c23a6860f06918062a159747039bd38e868cd7f8)\n- README: added all missing options (--max-reqs-per-sec, --max-heading-level, --websocket-server, --console-width and a few others less important) [`82c48bc`](https://github.com/janreges/siteone-crawler/commit/82c48bccf597a7c3811c16ef6c8b29fc37d7c46c)\n- extra columns: added option to extract data using XPath and RegEx to --extra-columns [`cd6d55a`](https://github.com/janreges/siteone-crawler/commit/cd6d55af254f4f38b25399293aa6d122c578f4c7)\n- http response: ensuring that the repeated response header is merged into a concatenated string, instead of an array, refs #48 [`c0f3b21`](https://github.com/janreges/siteone-crawler/commit/c0f3b210e3ddca203eb9363f038bcf4e30a3f30c)\n- css processor: fix for a situation where some processors could cause CSS content to be NULL [`c8f2ffc`](https://github.com/janreges/siteone-crawler/commit/c8f2ffc45628a2f0f1e477dc7e2ea436c9ebafbe)\n- website to markdown: better removal of nested images in situations like [![logo by @foobar](data:image/gif;base64,fooo= \"logo by @foobar\")](index.html) [`9ecba5e`](https://github.com/janreges/siteone-crawler/commit/9ecba5e91608fbbcd625e3ff42621869a7e31f00)\n- website to markdown: first version of the converter of entire web pages to markdown [`b944edb`](https://github.com/janreges/siteone-crawler/commit/b944edbcc33381c97ba220e1920994574c676225)\n- security check: handle case of multiple headers with the same name [`706977e`](https://github.com/janreges/siteone-crawler/commit/706977e545c428ab82714e95a75294841dac5e46)\n- html processor: do not remove the schema and host for URLs defined in --ignore-regex [`8be42af`](https://github.com/janreges/siteone-crawler/commit/8be42afea5af076aee097842fa3c4996e66c47ef)\n- offline export: added --offline-export-remove-unwanted-code=&lt;1/0&gt; (default is 1) to remove unwanted code for offline mode - typically, JS of the analytics, social networks, cookie consent, cross origins, etc .. refs #37 [`17a11fa`](https://github.com/janreges/siteone-crawler/commit/17a11fa3fe7a2d9c012e0f70c2392e833e02193c)\n- loop protection: added --max-non200-responses-per-basename as configurable protection against looping with dynamic non-200 URLs. If a basename (the last part of the URL after the last slash) has more non-200 responses than this limit, other URLs with same basename will be ignored/skipped [`063bddf`](https://github.com/janreges/siteone-crawler/commit/063bddf47a9fe82dc2b08297acd16fd154001feb)\n- bin/swoole-cli: upgrade to latest Swoole 6.0.0 (this version already supports Swoole\\Threads - in the future there will be a refactoring that will relieve us of the necessity to use Swoole\\Table, which requires memory preallocation for a predefined number of rows + my ticket https://github.com/swoole/swoole-src/issues/5460 has been processed regarding the support of getting the values of repeated header) [`b6e7c23`](https://github.com/janreges/siteone-crawler/commit/b6e7c23c055032a1003605ef2679f1ca59b64a08)\n- css processor: fix query string and anchor processing for paths in url() + don't replace url(data:*) with complex information e.g. about svg including brackets, refs #31 [`36eece8`](https://github.com/janreges/siteone-crawler/commit/36eece89c0719602145dbf51673d80355a80bfd2)\n- skipped urls: width defined fixed at 60 - better for most situations than the previous dynamic calculation [`8ef462f`](https://github.com/janreges/siteone-crawler/commit/8ef462f2fb52b65213136705536ba575dd2a9511)\n- manager: refactored mb_convert_encoding() -&gt; htmlentities() as part of the migration to PHP 8.4.1 [`5c7c903`](https://github.com/janreges/siteone-crawler/commit/5c7c903d7d4d178b691c870e4a71fc862685c21d)\n- http cache analysis: added analysis of http cache of all pages and assets - divided by content type, domains, and their combination [`b09cfbd`](https://github.com/janreges/siteone-crawler/commit/b09cfbdf3fe033feef3b64b0fcbbda15dc0308ab)\n- css processing: added search for urls in @import url(*.css) [`c964fea`](https://github.com/janreges/siteone-crawler/commit/c964fea1382fec71b990ac2cd89683590694d5b3)\n- analysis/report: if there is no URL with code &gt;= 200, there is no point to perform analysis, print empty output of all analyzers and generate full report [`c1bb448`](https://github.com/janreges/siteone-crawler/commit/c1bb448922cb47d0fe7fa28d2c5f540d6961ea94)\n- options: fix passing booleans to correctUrl() in case of empty '-u' or '--url' parameters (recognized as boolean flags) [`a297fec`](https://github.com/janreges/siteone-crawler/commit/a297feccb34002604705c180855d8f12cd0e41a2)\n- skipped-urls: added overview of skipped URLs including summary across domains - not only from security point of view it is good to know where external links are pointing and from where js/css/fonts/images are loaded [`84ae146`](https://github.com/janreges/siteone-crawler/commit/84ae1467a6c02b194e9e5631351f00a52b5924e0)\n- user-agent: if a manually defined user-agent ends with the exclamation !, do not add the signature siteone-crawler/version and remove the exclamation [`cfda3b0`](https://github.com/janreges/siteone-crawler/commit/cfda3b072e208966f9e7078211257d1a027d2bfa)\n- options: better response and warning for unfilled required --url [`52e50db`](https://github.com/janreges/siteone-crawler/commit/52e50db58f15bd10ab64b70f4c3f3fbf299c0135)\n- dns resolving: added --resolve attribute, which behaves exactly the same as curl, and using the 'domain:port:ip' entry it is possible to provide a custom IP address for the domain:port pair [`4031181`](https://github.com/janreges/siteone-crawler/commit/403118132807f30c65ed89b1b2d8f924a22e3a90)\n- windows/cygwin: workarounds for cygwin environment to return as much DNS/SSL/TLS info as possible even if nslookup or dig cannot be called [`bfc4f55`](https://github.com/janreges/siteone-crawler/commit/bfc4f5508e85b4af2e7c17309181791a3a9d5fc1)\n- upload timeout: fix that --upload-timeout does not overwrite the primary timeout [`c429639`](https://github.com/janreges/siteone-crawler/commit/c429639e30d44420bc9af017536714df52868813)\n- readme: adding a sample report and clone of nextjs.org and a few other updates [`07ad5e1`](https://github.com/janreges/siteone-crawler/commit/07ad5e119b47455ff4a2e3ba6230a21203d40396)\n- readme: added description for --allowed-domain-for-external-files and --allowed-domain-for-crawling [`0c8b1b3`](https://github.com/janreges/siteone-crawler/commit/0c8b1b3fb791a5c0e8f540a34d62122682680c19)\n- filtering: added --single-foreign-page to ensure that only the linked page and its assets are loaded from the external domain (which second-level domain is not the same as the initialization URL), but not all other pages on the external domain are automatically crawled [`c4af4ec`](https://github.com/janreges/siteone-crawler/commit/c4af4ec5fb76456f4d47eaf6041ba4be4fbb48b8)\n- filtering: added --disable-all-assets as a shortcut for calling all --disable-* flags [`7e32c44`](https://github.com/janreges/siteone-crawler/commit/7e32c440fb0ee0260f7b1e2c6b2a01b753ffb149)\n- filtering: added --max-depth=&lt;int&gt; for maximum crawling depth (for pages, not assets) and --single-page moved to basic options [`2dbff75`](https://github.com/janreges/siteone-crawler/commit/2dbff756dec3735f8f8c9f293dcd846eb3b3fde6)\n- resource filtering: added --single-page for loading only one given URL and their assets [`7325a4b`](https://github.com/janreges/siteone-crawler/commit/7325a4bbf633f60015309e257af509a5f21384d5)\n- offline exporter: added the possibility to use --replace-query-string to replace the default behavior where the query string is replaced by a short hash constructed from the query string in filenames, see issue #30 [`1a3482c`](https://github.com/janreges/siteone-crawler/commit/1a3482c6dada06b8482f205ceb181d8b42a62607)\n- offline export: added --replace-content=&lt;val&gt; option to replace content in HTML/JS/CSS before saving to disk (with strict text & regexp support) [`81cddaa`](https://github.com/janreges/siteone-crawler/commit/81cddaaf57550ac253b3e1ab322c3f5498374e96)\n- revert caps [`76a7418`](https://github.com/janreges/siteone-crawler/commit/76a74184c871714871f537344a84e757069fff0c)\n- Revert \"Auxiliary commit to revert individual files from b3bb0eea10075aee124cce485379c24ece78df79\" [`5878be9`](https://github.com/janreges/siteone-crawler/commit/5878be97f663d8ac70eac9e56578e628faeabb9f)\n- robots.txt handling: process Disallow records only for user-agent 'SiteOne-Crawler' or '*' [`9c2c989`](https://github.com/janreges/siteone-crawler/commit/9c2c989c569fed518bb5139c1d496159cc486683)\n- new option for the OfflineWebsiteExporter [`2c4bbbc`](https://github.com/janreges/siteone-crawler/commit/2c4bbbc6f0e55a4f3af6a89be50450e15b65cdd2)\n- tables: added --rows-limit option (default 200) to hard limit the length of all tables with data from analyses (except Visited URLs) to prevent very long and slow reports .. tables are sorted by severity, so it should be ok [`9798252`](https://github.com/janreges/siteone-crawler/commit/9798252901dd25797d1d38fa26a19c6dbc409fa1)\n- video gallery: added display of all found videos with video player (including use of observer for lazy loading and smart option to preload first seconds of video + button to play 2 seconds of each video sequentially) [`411736a`](https://github.com/janreges/siteone-crawler/commit/411736ac3852d07464fe4a4a52c4c0bf171d716f)\n- license: change of licensing to MIT [`14b73e2`](https://github.com/janreges/siteone-crawler/commit/14b73e2e10cc924112966d2c5b16812dadf1fc48)\n- non exhaustive typo and spelling corrections [`b3bb0ee`](https://github.com/janreges/siteone-crawler/commit/b3bb0eea10075aee124cce485379c24ece78df79)\n\n#### [v1.0.8](https://github.com/janreges/siteone-crawler/compare/v1.0.7...v1.0.8)\n\n> 24 August 2024\n\n- reports: changed file name composition from report.mydomain.com.* to mydomain.com.report.* [`#9`](https://github.com/janreges/siteone-crawler/pull/9)\n- version: update to 1.0.8.20240824 [`6c634e0`](https://github.com/janreges/siteone-crawler/commit/6c634e0f88cce49aa3f5fb9cd69ca55fa5191bd8)\n- version 1.0.8.20240824 + changelog [`a02cc7b`](https://github.com/janreges/siteone-crawler/commit/a02cc7bf4c0fc4703189341d9ea0be2345b95796)\n- crawler: solved edge-case, which very rarely occurred when the queue processing was already finished, but the last outstanding coroutine still found some new URL [`a85990d`](https://github.com/janreges/siteone-crawler/commit/a85990d662d74af281805cfdf10c0320fee0007a)\n- javascript processor: improvement of webpack JS processing in order to correctly replace paths from VueJS during offline export (as e.g. in case of docs.netlify.com) .. without this, HTML had the correct paths in the left menu, but JS immediately broke them because they started with an absolute path with a slash at the beginning [`9bea99b`](https://github.com/janreges/siteone-crawler/commit/9bea99b9684e6059b8abfad4b382fafdad31c9a9)\n- offline export: detect and process fonts.googleapis.com/css* as CSS even if there is no .css extension [`da33100`](https://github.com/janreges/siteone-crawler/commit/da33100975635be8305e07c2023a22c300b66216)\n- js processor: removed the forgotten var_dump [`5f2c36d`](https://github.com/janreges/siteone-crawler/commit/5f2c36de1666e6987d2c9d88a39e3b6d0a2e1f32)\n- offline export: improved search for external JS in the case of webpack (dynamic composition of URLs from an object with the definition of chunks) - it was debugged on docs.netlify.com [`a61e72e`](https://github.com/janreges/siteone-crawler/commit/a61e72e7f5b773a437b4151432db04a5afd7124a)\n- offline export: in case the URL ends with a dot and a number (so it looks like an extension), we must not recognize it as an extension in some cases [`c382d95`](https://github.com/janreges/siteone-crawler/commit/c382d959f7440ebfcd95566ec0050e771a2f3495)\n- offline url converter: better support for SVG in case the URL does not contain an extension at all, but has e.g. 'icon' in the URL (it's not perfect) [`c9c01a6`](https://github.com/janreges/siteone-crawler/commit/c9c01a69905fefce82f4e8f85e707a0d1abb5e1e)\n- offline exporter: warning instead of exception for some edge-cases, e.g. not saving SVG without an extension does not cause the export to stop [`9d285f4`](https://github.com/janreges/siteone-crawler/commit/9d285f4d599ba8892dd8752e8d831cd3c86af178)\n- cors: do not set Origin request header for images (otherwise error 403 on cdn.sanity.io for svg, etc.) [`2f3b7eb`](https://github.com/janreges/siteone-crawler/commit/2f3b7eb51a03d42d3d2961c84aadcd118b546e05)\n- best practice analyzer: in checking for missing quotes ignore values ​​longer than 1000 characters (fixes, e.g., at skoda-auto.cz the error Compilation failed: regular expression is too large at offset 90936) [`8a009df`](https://github.com/janreges/siteone-crawler/commit/8a009df9734773275fd9805862dc9bfeeccb6079)\n- html report: added loading of extra headers to the visited URL list in the HTML report [`781cf17`](https://github.com/janreges/siteone-crawler/commit/781cf17c18088126db74ebc1ef00fee3d6784979)\n- Frontload the report names [`62d2aae`](https://github.com/janreges/siteone-crawler/commit/62d2aae57e31c7bfa53720446cc8dfbc59e482af)\n- robots.txt: added option --ignore-robots-txt (we often need to view internal or preview domains that are otherwise prohibited from indexing by search engines) [`9017c45`](https://github.com/janreges/siteone-crawler/commit/9017c45a675dd327895b57f14095ad6bd52a02fc)\n- http client: adden an explicit 'Connection: close' header and explicitly calling $client-&gt;close(), even though Swoole was doing it automatically after exiting the coroutine [`86a7346`](https://github.com/janreges/siteone-crawler/commit/86a7346d059452d210b945ca4329e1cc17781dca)\n- javascript processor: parse url addresses to import the JS module only in JS files (otherwise imports from HTML documentation, e.g. on the websites svelte.dev or nextjs.org, were parsed by mistake) [`592b618`](https://github.com/janreges/siteone-crawler/commit/592b618c01e75509e16a812fafab7f21f3c7c64d)\n- html processor: added obtaining urls from HTML attributes that are not wrapped in quotes (but I am aware that current regexps can cause problems in the cases when are used spaces, which are not properly escaped) [`f00abab`](https://github.com/janreges/siteone-crawler/commit/f00ababfa459eca27dce7657fe91c70831f86089)\n- offline url converter: swapping woff2/woff order for regex because in this case their priority is important and because of that woff2 didn't work properly [`3f318d1`](https://github.com/janreges/siteone-crawler/commit/3f318d19fa0a3757546493ac7f47cca21922b1f5)\n- non-200 url basename detection: we no longer consider e.g. image generators that have the same basename and the url to the image in the query parameters as the same basename [`bc15ef1`](https://github.com/janreges/siteone-crawler/commit/bc15ef198bb13fe845fef8cd4946b2cab5c2ea6d)\n- supertable: activation of automatic creation of active links also for homepage '/' [`c2e228e`](https://github.com/janreges/siteone-crawler/commit/c2e228e0d475351431cf9b060487e86ce6d33e52)\n- analysis and robots.txt: improving the display of url addresses for SEO analysis in the case of a multi-domain website, so that it cannot happen that the same url, e.g. '/', is in the overview multiple times without recognizing the domain or scheme + improving the work with robots.txt in SEO detection and displaying urls banned for indexing [`47c7602`](https://github.com/janreges/siteone-crawler/commit/47c7602217e40a4f6d4f3af5c71d6dff72952aab)\n- offline website exporter: we add the suffix '_' to the folder name only in the case of a typical extension of a static file - we don't want this to happen with domain names as well [`d16722a`](https://github.com/janreges/siteone-crawler/commit/d16722a5ad6271270fb0fff11e66a7f02f3b6e9a)\n- javascript processor: extract JS urls also from imports like import {xy} from \"./path/foo.js\" [`aec6cab`](https://github.com/janreges/siteone-crawler/commit/aec6cab051a46df9d89866f5cfd7e66312dafb92)\n- visited url: added 'txt' extension to looksLikeStaticFileByUrl() [`460c645`](https://github.com/janreges/siteone-crawler/commit/460c6453d91e85c2889ebaa2b2542fd88c5ffa6a)\n- html processor: extract JS urls also from &lt;link href=\"*.js\"&gt;, typically with rel=\"modulepreload\" [`c4a92be`](https://github.com/janreges/siteone-crawler/commit/c4a92bee00d96c530431134370a3ba0d2216a1c1)\n- html processor: extracting repeated calls to getFullUrl() into a variable [`a5e1306`](https://github.com/janreges/siteone-crawler/commit/a5e1306530717d9edd4f95a7989539a172a38f4a)\n- analysis: do not include urls that failed to load (timeout, skipping, etc.) in the analysis of content-types and source-domains - prevention of displaying content type 'unknown' [`b21ecfb`](https://github.com/janreges/siteone-crawler/commit/b21ecfb85f58d07c0a82b93826ad2977ab2cd523)\n- cli options: improved method of removing quotes even for options that can be arrays - also fixes --extra-columns='Title' [`97f2761`](https://github.com/janreges/siteone-crawler/commit/97f27611acf2fc4ed24b1e5574be84711ea3fa12)\n- url skipping: if there are a lot of URLs with the same basename (ending after the last slash), we will allow a maximum of 5 requests for URLs with the same basename - the purpose is to prevent a lot of 404 from being triggered when there is an incorrect relative link to relative/my-img.jpg on all pages (e.g. on 404 page on v2.svelte.dev) [`4fbb917`](https://github.com/janreges/siteone-crawler/commit/4fbb91791f9111cc6f9d98b60732fcca7fad2f1f)\n- analysis: perform most of the analysis only on URLs from domains for which we have crawling enabled [`313adde`](https://github.com/janreges/siteone-crawler/commit/313addede29ac847273b6ab6ed3a8ab878a6fb4a)\n- audio & video: added audio/video file search in &lt;audio&gt; and &lt;video&gt; tags, if file crawling is not disabled [`d72a5a5`](https://github.com/janreges/siteone-crawler/commit/d72a5a51bd6863425a3d8bcffc7a9b5eb831f979)\n- base practices: retexting stupid warning like '&lt;h2&gt; after &lt;h0&gt;' to '&lt;h2&gt; without previous heading [`041b383`](https://github.com/janreges/siteone-crawler/commit/041b3836a8a585158ae1a1a6fb0057b367f3a4f6)\n- initial url redirect: in the case thats is entered url that redirects to another url/domain within the same 2nd-level domain (typically http-&gt;https or mydomain.tld -&gt; www.mydomain.tld redirects), we continue crawling with new url/domain and declare a new url as initial url [`166e617`](https://github.com/janreges/siteone-crawler/commit/166e617fbc893798dc7b340f43de75df2d4cf335)\n\n#### [v1.0.7](https://github.com/janreges/siteone-crawler/compare/v1.0.6...v1.0.7)\n\n> 22 December 2023\n\n- version 1.0.7.20231222 + changelog [`9d2be52`](https://github.com/janreges/siteone-crawler/commit/9d2be52776c081989322953c7a31debfd4947420)\n- html report template: updated logo link to crawler.siteone.io [`9892cfe`](https://github.com/janreges/siteone-crawler/commit/9892cfe5708a3da2f5fc355246dd50b2a0c5cb4f)\n- http headers analysis: renamed 'Headers' to 'HTTP headers' [`436e6ea`](https://github.com/janreges/siteone-crawler/commit/436e6ea5a9914c8615bb03b444ac0aad15e31c49)\n- sitemap generator: added info about crawler to generated sitemap.xml [`7cb7005`](https://github.com/janreges/siteone-crawler/commit/7cb7005bf50b8f93b421c94c57ff51eb99b45912)\n- html report: refactor of all inline on* event listeners to data attributes and event listeners added from static JS inside &lt;script&gt;, so that we can disable all inline JS in the online HTML report and allow only our JS signed with hashes by Content-Security-Policy [`b576eef`](https://github.com/janreges/siteone-crawler/commit/b576eef55a5678a67928970fc51aaaefd7abd1a8)\n- readme: removed HTTP auth from roadmap (it's already done), improved guide how to implement own upload endpoint and message about SMTP moved under mailer options [`e1567ae`](https://github.com/janreges/siteone-crawler/commit/e1567aee52f9d09c1cef1ad35babaf9eea388175)\n- utils: hide passwords/authentication specified in cli parameters as *auth=xyz (e.g. --http-auth=abc:xyz)\" in html report [`c8bb88f`](https://github.com/janreges/siteone-crawler/commit/c8bb88fc1a65ecdfd53db23fc5d972b841830837)\n- readme: fixed formatting of the upload and expert options [`2d14bd5`](https://github.com/janreges/siteone-crawler/commit/2d14bd5972496989624f91617de2689601e1c027)\n- readme: added Upload Options [`d8352c5`](https://github.com/janreges/siteone-crawler/commit/d8352c5acfddbeef1c1ae6498556dc296d944e0b)\n- upload exporter: added possibility via --upload to upload HTML report to offline URL, by default crawler.siteone.io/html/* [`2a027c3`](https://github.com/janreges/siteone-crawler/commit/2a027c38bfdb8e6e416b9a79ebe81e809c9326d9)\n- parsed-url: fixed warning in the case of url without host [`284e844`](https://github.com/janreges/siteone-crawler/commit/284e844f3f94cdb02032ddb76e51caa9a584c120)\n- seo and opengraph: fixed false positives 'DENY (robots.txt)' in some cases [`658b649`](https://github.com/janreges/siteone-crawler/commit/658b6494130fa282505ec38f12aa058acf7709b9)\n- best practices and inline-svgs: detection and display of the entire icon set in the HTML report in the case of &lt;svg&gt; with more &lt;symbol&gt; or &lt;g&gt; [`3b2772c`](https://github.com/janreges/siteone-crawler/commit/3b2772c59f822b7b4a6f91e15b616815b5ff92c4)\n- sitemap generator: sort urls primary by number of dashes and secondary alphabetically (thanks to this, urls of the main levels will be at the beginning) [`bbc47e6`](https://github.com/janreges/siteone-crawler/commit/bbc47e6239f9693c621016a50e624698dc3d242d)\n- sitemap generator: only include URLs from the same domain as the initial URL [`9969254`](https://github.com/janreges/siteone-crawler/commit/9969254e35cd8c134f85a7817de8722091f0377c)\n- changelog: updated by 'composer changelog' [`0c67fd4`](https://github.com/janreges/siteone-crawler/commit/0c67fd4f8d308d8d51d5b912d9b82cc96fb6e4fb)\n- package.json: used by auto-changelog generator [`6ad8789`](https://github.com/janreges/siteone-crawler/commit/6ad87895e5a8ab8bbce3d9cbf92ee5e8b8218cc0)\n\n#### [v1.0.6](https://github.com/janreges/siteone-crawler/compare/v1.0.5...v1.0.6)\n\n> 8 December 2023\n\n- readme: removed bold links from the intro (it didn't look as good on github as it did in the IDE) [`b675873`](https://github.com/janreges/siteone-crawler/commit/b6758733cde67f11322a2f82573b19ec1a0edc9d)\n- readme: improved intro and gif animation with the real output [`fd9e2d6`](https://github.com/janreges/siteone-crawler/commit/fd9e2d69c8f940cfaa81ad7bab86f1a74f01b0da)\n- http auth: for security reasons, we only send auth data to the same 2nd level domain (and possibly subdomains). With HTTP basic auth, the name and password are only base64 encoded and we would send them to foreign domains (which are referred to from the crawled website) [`4bc8a7f`](https://github.com/janreges/siteone-crawler/commit/4bc8a7f9871064aa1c88c374aa299904409d2817)\n- html report: increased specificity of the .header class for the header, because this class were also used by the generic class at &lt;td class='header'&gt; in security tab [`9d270e8`](https://github.com/janreges/siteone-crawler/commit/9d270e884545d6459f20348db71404e513ae8928)\n- html report: improved readability of badge colors in light mode [`76c5680`](https://github.com/janreges/siteone-crawler/commit/76c5680397446b84f3b13800590d914b7a9b0533)\n- crawler: moving the decrement of active workers after parsing URLs from the content, where further filling of the queue could occur (for this reason, queue processing could sometimes get stuck in the final stages) [`f8f82ab`](https://github.com/janreges/siteone-crawler/commit/f8f82ab61c1969952bb70f1b598ed3d97938a84e)\n- analysis: do not parse/check empty HTML (it produced unnecessary warning) - it is valid to have content-type: text/html but with connect-lengt: 0 (for example case for 'gtm.js?id=') [`436d81b`](https://github.com/janreges/siteone-crawler/commit/436d81b81f905178fb972f8b5cd0236bac244bc4)\n\n#### [v1.0.5](https://github.com/janreges/siteone-crawler/compare/v1.0.4...v1.0.5)\n\n> 3 December 2023\n\n- changelog: updated changelog after 3 added commits to still untagged draft release 1.0.5 [`f42fe18`](https://github.com/janreges/siteone-crawler/commit/f42fe18de89676dc0dea4dc033207c934282d04b)\n- utils tests: fixed tests of methods getAbsolutePath() and getOutputFormattedPath() [`d4f4576`](https://github.com/janreges/siteone-crawler/commit/d4f4576ff566eb48495c9fb55a898b0989ef42c3)\n- crawler.php: replaced preg_match to str_contains [`5b28952`](https://github.com/janreges/siteone-crawler/commit/5b289521cdbb90b6571a29cb9c880e065b852129)\n- version: 1.0.5.20231204 + changelog [`7f2e974`](https://github.com/janreges/siteone-crawler/commit/7f2e9741fab25e9369151bc2d79a38b8827e2463)\n- option: replace placeholders like a '%domain' also in validateValue() method because there is also check if path is writable with attempt to mkdir [`329143f`](https://github.com/janreges/siteone-crawler/commit/329143fa23925ea523504735b3f724c026fe5ac6)\n- swoole in cygwin: improved getBaseDir() to work better even with the version of Swoole that does not have SCRIPT_DIR [`94cc5af`](https://github.com/janreges/siteone-crawler/commit/94cc5af4411a8c7427ee136a937ac629b8637668)\n- html processor: it must also process the page with the redirect, because is needed to replace the URL in the meta redirect tag [`9ce0eee`](https://github.com/janreges/siteone-crawler/commit/9ce0eeeebe1e524b9d46d91dd4cecb2e796db8c3)\n- sitemap: use formatted output path (primary for better output in Cygwin environment with needed C:/foo &lt;-&gt; /cygwin/c/foo conversion) [`6297a7f`](https://github.com/janreges/siteone-crawler/commit/6297a7f4069f9e09c013268e0df896db2fa91dec)\n- file exporter: use formatted output path (primary for better output in Cygwin environment with needed C:/foo &lt;-&gt; /cygwin/c/foo conversion) [`426cfb2`](https://github.com/janreges/siteone-crawler/commit/426cfb2b32f854d65abfce841e4e4f4badf04fef)\n- options: in the case of dir/file validation, we want to work with absolute paths for more precise error messages [`6df228b`](https://github.com/janreges/siteone-crawler/commit/6df228bdfc87a2c9fb6eee611fdc87d976b7f721)\n- crawler.php: improved baseDir detection - we want to work with absolute path in all scenarios [`9d1b2ce`](https://github.com/janreges/siteone-crawler/commit/9d1b2ce9bedb15ede90bcee9641e1cfc62b9c3cc)\n- utils: improved getAbsolutePath() for cygwin and added getOutputFormattedPath() with reverse logic for cygwin (C:/foo/bar &lt;-&gt; /cygdrive/c/foo/bar) [`161cfc5`](https://github.com/janreges/siteone-crawler/commit/161cfc5c4fd3fa3675cade409d7d5e11db2da0c6)\n- offline export: renamed --offline-export-directory to --offline-export-dir for consistency with --http-cache-dir or --result-storage-dir [`26ef45d`](https://github.com/janreges/siteone-crawler/commit/26ef45d145a1a02a5313067e6298571e26d9618b)\n\n#### [v1.0.4](https://github.com/janreges/siteone-crawler/compare/v1.0.3...v1.0.4)\n\n> 30 November 2023\n\n- dom parsing: handling warnings in case of impossibility to parse some DOM elements correctly, fixes #3 [`#3`](https://github.com/janreges/siteone-crawler/issues/3)\n- version: 1.0.4.20231201 + changelog [`8e15781`](https://github.com/janreges/siteone-crawler/commit/8e15781265cdd9cce10d9dcde57d46b57b50e1cf)\n- options: ignore empty values in the case of directives with the possibility of repeated definition [`5e30c2f`](https://github.com/janreges/siteone-crawler/commit/5e30c2f8ad6cf00ad819ba1d7d6ec4e6c95a7113)\n- http-cache: now the http cache is turned off using the 'off' value (it's more understandable) [`9508409`](https://github.com/janreges/siteone-crawler/commit/9508409fbba2d96dc92cd73bed5abe462d5cea15)\n- core options: added --console-width to enforce the definition of the console width and disable automatic detection via 'tput cols' on macOS/Linux or 'mode con' on Windows (used by Electron GUI) [`8cf44b0`](https://github.com/janreges/siteone-crawler/commit/8cf44b06616e15301c486146a7c6b1003ce5137f)\n- gui support: added base-dir detection for Windows where the GUI crawler runs in Cygwin [`5ce893a`](https://github.com/janreges/siteone-crawler/commit/5ce893a66c7f1e21af025603b66223e04246e029)\n- renaming: renamed 'siteone-website-crawler' to 'siteone-crawler' and 'SiteOne Website Crawler' to 'SiteOne Crawler' [`64ddde4`](https://github.com/janreges/siteone-crawler/commit/64ddde4b53f16679a8c4671c98b3f9c619d94b42)\n- utils: fixed color-support detection [`62dbac0`](https://github.com/janreges/siteone-crawler/commit/62dbac07d15ecfa0ff677c277e2a3381a47025bf)\n- core options: added --force-color options to bypass tty detection (used by Electron GUI) [`607b4ad`](https://github.com/janreges/siteone-crawler/commit/607b4ad8583845adea209f75edfa27870ac23f9d)\n- best practice analysis: in the case of checking an image (e.g. for the existence of WebP/AVIF), we also want to check external images, because very often websites have images linked from external domains or services for image modification or optimization [`6100187`](https://github.com/janreges/siteone-crawler/commit/6100187347e0bbba6270335e2d9b2faf37475333)\n- html report: set scaleDown as default object-fit for image gallery [`91cd300`](https://github.com/janreges/siteone-crawler/commit/91cd300dcd7455c2b9be548fb2746cea7fd7c904)\n- offline exporter: added short -oed as alias to --offline-export-directory [`22368d9`](https://github.com/janreges/siteone-crawler/commit/22368d9a892aab8011aa4a0884bf01a8560f6167)\n- image gallery: list of all images on the website (except those from the srcset, where there would be duplicates only in other sizes or formats), including SVG with rich filtering options (through image format, size and source tag/attribute) and the option of choosing small/medium/view and scale-down/contains/cover for object-fit css property [`43de0af`](https://github.com/janreges/siteone-crawler/commit/43de0af1c60d398f91b373c192d1a35ac2df2fd1)\n- core options: added a shortened version of the command name consisting of only one hyphen and the first letters of the words of the full command (e.g. --memory-limit has short version -ml), added getInitialScheme() [`eb9a3cc`](https://github.com/janreges/siteone-crawler/commit/eb9a3cc62dffc58be2701c52bb21509d39a5dfad)\n- visited url: added 'sourceAttr' with information about where the given URL was found and useful helper methods [`6de4e39`](https://github.com/janreges/siteone-crawler/commit/6de4e39c5f8b9ba685e3865193274ccf0ee91a3d)\n- found urls: in the case of the occurrence of one URL in several places/attributes, we consider the first one to be the main one (typically the same URL in src and then also in srcset) [`660bb2b`](https://github.com/janreges/siteone-crawler/commit/660bb2b2bd2cb6949fe9c573e72b31e9fb97a9fe)\n- url parsing: added more recognition of which attributes the given URL address was parsed from (we need to recognize src and srcset for ImageGallery in particular) [`802c3c6`](https://github.com/janreges/siteone-crawler/commit/802c3c66a40087745e68f47392f0e6e8e9725171)\n- supertable and urls: in removing the redundant hostname for a more compact URL output, we also take into account the scheme http:// or https:// of initial URL (otherwise somewhere it lookedlike duplicate) + prevention of ansi-color definitions for bash in the HTML output [`915469e`](https://github.com/janreges/siteone-crawler/commit/915469e2a4a6d0fed337ca70efe9170758751ade)\n- title/description/keywords parsing: added html entities decoding because some website uses decoded entities with &#xED; &#x2013; etc [`920523d`](https://github.com/janreges/siteone-crawler/commit/920523d3c55baf6cd7b2602334d9776b3e40f4d7)\n- crawler: added 'sourceAttr' to the swoole table queue and already visited URLs (we will use it in the Image Gallery for filtering, so as not to display unnecessarily and a lot of duplicate images only in other resolutions from the srcsets) [`0345abc`](https://github.com/janreges/siteone-crawler/commit/0345abc6dab770e3196dd88ff0123a2050828644)\n- url parameter: it is already possible not to enter the scheme and https:// or http:// will be added automatically (http:// for e.g. for localhost) [`85e14e9`](https://github.com/janreges/siteone-crawler/commit/85e14e961b53b83c208ac936972a335cace61bf8)\n- disabled images: in the case of a request to remove the images, replace their body with a 1x1px transparent gif and place a semi-transparent hatch with the crawler logo and opacity as a background [`c1418c3`](https://github.com/janreges/siteone-crawler/commit/c1418c3154301fd3995dde421b066f16850203e7)\n- url regex filtering: added option , which will allow you to limit the list of crawled pages according to the declared regexps, but at the same time it will allow you to crawl and download assets (js, css, images, fonts, documents, etc.) from any URL (but with respect to allowed domains) [`21e67e5`](https://github.com/janreges/siteone-crawler/commit/21e67e5be74050cd5b7c9998654ed66f18db4d85)\n- img srcset parsing: because a valid URL can also contain a comma (and various dynamic parametric img generators use them) and in the srcset a comma+whitespace should be used to separate multiple values, this is also reflected in the srcset parsing [`0db578b`](https://github.com/janreges/siteone-crawler/commit/0db578bda37c024b2b111c814e35c2107e4751ad)\n- websocket server: added option to set --websocket-server, which starts a parallel process with the websocket server, through which the crawler sends various information about the progress of crawling (this will also be used by Electron UI applications) [`649132f`](https://github.com/janreges/siteone-crawler/commit/649132f8965421cd1bb3570fbb9f534e6caef313)\n- http client: handle scenario when content loaded from cache is not valid (is_bool) [`1ddd099`](https://github.com/janreges/siteone-crawler/commit/1ddd099ecdadc5752016237ec1f0acf80e907dc8)\n- HTML report: updated logo with final look [`2a3bb42`](https://github.com/janreges/siteone-crawler/commit/2a3bb428180067a649f2467419920b3d4f70a9fd)\n- mailer: shortening and simplifying email content [`e797107`](https://github.com/janreges/siteone-crawler/commit/e7971071f8c5e4cff1472464ce9ec4407c198a59)\n- robots.txt: added info about loaded robots.txt to summary (limited to 10 domains for case of huge multi domain crawling) [`00f9365`](https://github.com/janreges/siteone-crawler/commit/00f93659637705bc6389c5f073a29f09b743370f)\n- redirects analyzer: handled edge case with empty url [`e9be1e3`](https://github.com/janreges/siteone-crawler/commit/e9be1e350b1d114c54b7099b54277da23467b538)\n- text output: added fancy banner with crawler logo (thanks to great SiteOne designers!) and smooth effect [`e011c35`](https://github.com/janreges/siteone-crawler/commit/e011c35f3cbc87fceb9d7a9c56c726817c79b543)\n- content processors: added applyContentChangesBeforeUrlParsing() and better NextJS chunks handling [`e5c404f`](https://github.com/janreges/siteone-crawler/commit/e5c404f2d52a7c2ebdb80ae3c93760c7e881dc9a)\n- url searches: added ignoring data:, mailto:, tel:, file:// and other non-requestable resources also to FoundUrls [`5349be2`](https://github.com/janreges/siteone-crawler/commit/5349be242f99567b8f5f093537a696ef5fd319ac)\n- crawler: added declare(strict_types=1) and banner [`27134d2`](https://github.com/janreges/siteone-crawler/commit/27134d29d16e3e24c633f010f731f11deeeadcb7)\n- heading structure analysis: highlighting and calculating errors for duplicate &lt;h1&gt; + added help cursor with a hint [`f5c7db6`](https://github.com/janreges/siteone-crawler/commit/f5c7db6206ed06e0cbaf38a7ae2505be573da2e6)\n- core options: added --help and --version, colorized help [`6f1ada1`](https://github.com/janreges/siteone-crawler/commit/6f1ada112898580d2de028c02e32fdeb8ad2a845)\n- ./crawler binary - send output of cd - to /dev/null and hide unwanted printed script path [`16fe79d`](https://github.com/janreges/siteone-crawler/commit/16fe79d08e24c4a6fbd87d16417413725aaa24e8)\n- README: updated paths in the documentation - it is now possible to use the ERROR: Option --url () must be valid URL [`86abd99`](https://github.com/janreges/siteone-crawler/commit/86abd998da94971c2512b6018085f39e8dd5db7f)\n- options: --workers default for Cygwin runtime is now 1 (instead of 3), because Cygwin runtime is highly unstable when workers &gt; 1 [`f484960`](https://github.com/janreges/siteone-crawler/commit/f4849606fb382e1b759f547c4f1bfe2e5d8b4d02)\n\n#### [v1.0.3](https://github.com/janreges/siteone-crawler/compare/v1.0.2...v1.0.3)\n\n> 10 November 2023\n\n- version: 1.0.3.20231110 + changelog [`5b80965`](https://github.com/janreges/siteone-crawler/commit/5b8096550dcd489a998d34fae44e3d99375e33e3)\n- cache/storage: better race-condition handling in a situation where several coroutines could write the same folder at one time, then mkdir reported 'File exists' [`be543dc`](https://github.com/janreges/siteone-crawler/commit/be543dc195e675e49064b20ee091903f1977942a)\n\n#### [v1.0.2](https://github.com/janreges/siteone-crawler/compare/v1.0.1...v1.0.2)\n\n> 10 November 2023\n\n- version: 1.0.2.20231110 + changelog [`230b947`](https://github.com/janreges/siteone-crawler/commit/230b9478a36ee664dfe080447c09da9c4a9bc25c)\n- html report: added aria labels to active/important elements [`a329b9d`](https://github.com/janreges/siteone-crawler/commit/a329b9d4e0f040996c17cb3382cf3c07c61a4b35)\n- version: 1.0.1.20231109 - changelog [`50dc69c`](https://github.com/janreges/siteone-crawler/commit/50dc69c9ab956691bbf97860355d410a0bdba0c9)\n\n#### [v1.0.1](https://github.com/janreges/siteone-crawler/compare/v1.0.0...v1.0.1)\n\n> 9 November 2023\n\n- version: 1.0.1.20231109 [`e213cb3`](https://github.com/janreges/siteone-crawler/commit/e213cb326db78e2f69fd3e4f04b9728223550a3d)\n- offline exporter: fixed case when on https:// website is link to same path but with http:// protocol (it overrided proper *.html file just with meta redirect .. real case from nextjs.org) [`4a1be0b`](https://github.com/janreges/siteone-crawler/commit/4a1be0bdfb62167c498f6c3b4c91fe74532ff833)\n- html processor: force to remove all anchor listeners when NextJS is detected (it is very hard to achive a working NextJS with offline file:// protocol) [`2b1d935`](https://github.com/janreges/siteone-crawler/commit/2b1d935419bade80d8e6ab07b2ae04ded0df131e)\n- file exporters: now by default crawler generates a html/json/txt report to 'tmp/[report|output].%domain%.%datetime%.[html|json|txt]' .. i assume that most people will want to save/see them [`7831c6b`](https://github.com/janreges/siteone-crawler/commit/7831c6b87dd41444a0fca529bc450bf7934ef541)\n- security analysis: removed multi-line console output for recommendations .. it was ugly [`310af30`](https://github.com/janreges/siteone-crawler/commit/310af308859dbb2fd5895af468195e2339f2788d)\n- json output: added JSON_UNESCAPED_UNICODE for unescaped unicode chars (e.g. czech chars will be readable) [`cf1de9f`](https://github.com/janreges/siteone-crawler/commit/cf1de9f60820963ccb78a00b43ca3aec8b311a77)\n- mailer: do not send e-mails in case of interruption of the crawler using ctrl+c [`19c94aa`](https://github.com/janreges/siteone-crawler/commit/19c94aac8211b4550ba11497e1332d604f8cdbc7)\n- refactoring: manager stats logic extracted into ManagerStats and implemented also into manager of content processors + stats added into 'Crawler stats' tab in HTML report [`3754200`](https://github.com/janreges/siteone-crawler/commit/3754200652dc91ac05efe22812e64c0e4be84019)\n- refactoring: content related logic extracted to content processors based on ContentProcessor interface with methods findUrls():?FoundUrls, applyContentChangesForOfflineVersion():void and isContentTypeRelevant():bool + better division of web framework related logic (NextJS, Astro, Svelte, ...) + better URL handling and maximized usage of ParsedUrl [`6d9f25c`](https://github.com/janreges/siteone-crawler/commit/6d9f25ce82f8a1cfbfbc6bc0b5a6a07262c427b1)\n- phpstan: ignore BASE_DIR warning [`6e0370a`](https://github.com/janreges/siteone-crawler/commit/6e0370aafe02d3bb2ca528ea8a9a37995f5ddce6)\n- offline website exporter: improved export of a website based on NextJS, but it's not perfect, because latest NextJS version do not have some JS/CSS path in code, but they are generated dynamicly from arrays/objects [`c4993ef`](https://github.com/janreges/siteone-crawler/commit/c4993efcb97f7058834713ed273f9c4274be5cad)\n- seo analyzer: fixed trim() warning when no &lt;h1&gt; found [`f0c526f`](https://github.com/janreges/siteone-crawler/commit/f0c526f5d2ff7d0155c1bfc7da7a6c0f2f7a1419)\n- offline export: a lot of improvements when generating the offline version of the website on NextJS - chunk detection from the manifest, replacing paths, etc. [`98c2e15`](https://github.com/janreges/siteone-crawler/commit/98c2e15acf4e22d25301d160968555c19ddd44cc)\n- seo and og: fixed division by zero when no og/twitter tags found [`19e4259`](https://github.com/janreges/siteone-crawler/commit/19e4259c519a3e41eb7aa8eabce80e6364e74639)\n- console output: lots of improvements for nice, consistent and minimal word-wrap output [`596a5dc`](https://github.com/janreges/siteone-crawler/commit/596a5dc17945359ffc0fef2ed8ed8ee8bfc1db00)\n- basic file/dir structure: created ./crawler (for Linux/macOS) and ./crawler.bat for Windows, init script moved to ./src, small related changes about file/dir path building [`5ce41ee`](https://github.com/janreges/siteone-crawler/commit/5ce41ee8e78425747bf40327152bd99499c64013)\n- header status: ignore too dynamic Content-Disposition header [`4e0c6fd`](https://github.com/janreges/siteone-crawler/commit/4e0c6fdf5c356f8c0eea78ccebe29641b90f96b4)\n- offline website exporter: added .html extensions to typical dynamic language extensions, because without it the browser will show them as source code [`7130b9e`](https://github.com/janreges/siteone-crawler/commit/7130b9eb666eca5b08c9dbeda91198bc85b31379)\n- html report: show tables with details, even if they are without data (it is good to know that the checks were carried out, but nothing was found) [`da019e4`](https://github.com/janreges/siteone-crawler/commit/da019e4591682c21e9f78de1ec26939088d92ccc)\n- tests: repaired tests after last changes of file/url building for offline website .. merlot is great! [`7c77c41`](https://github.com/janreges/siteone-crawler/commit/7c77c411ff67c01e07d16cb2acce0e926b264fcd)\n- utils: be more precise and do not replace attributes in SVG .. creative designers will not love you when looking at the broken SVG in HTML report [`3fc81bb`](https://github.com/janreges/siteone-crawler/commit/3fc81bb0c47eef2935da2e74721a809a9aff0959)\n- utils: be more precise in parsing phone numbers, otherwise people will 'love' you because of false positives .. wine is still great [`51fd574`](https://github.com/janreges/siteone-crawler/commit/51fd574c764d832d74cb5e67eed890bd9d349a5c)\n- html parser: better support for formatted html with tags/attributes on multiple lines [`89a36d2`](https://github.com/janreges/siteone-crawler/commit/89a36d2fcf3d96b61c4b3d2e20d5a46f4cb96cb8)\n- utils: don't be hungry in stripJavaScript() because you ate half of my html :) wine is already in my head... [`0e00957`](https://github.com/janreges/siteone-crawler/commit/0e0095727638b7940d2e555a6be231ad3dde19e4)\n- file result storage: changed cache directory structure for consistency with http client's cache, so it looks like my.domain.tld-443/04/046ec07c.cache [`26bf428`](https://github.com/janreges/siteone-crawler/commit/26bf428f95bc428485d7cf505e74c8a69c94d869)\n- http client cache: for better consistency with result storage cache, directory structure now contains also port, so it looks like my.domain.tld-443/b9/b989bdcf2b9389cf0c8e5edb435adc05.cache [`a0b2e09`](https://github.com/janreges/siteone-crawler/commit/a0b2e09d01e36aed56c0208a8001d616755de096)\n- http client cache: improved directory structure for large scale and better orientation for partial cache deleting.. current structure in tmp dir: my.domain.tld/b9/b989bdcf2b9389cf0c8e5edb435adc05.cache [`10e02c1`](https://github.com/janreges/siteone-crawler/commit/10e02c189297f28ea563ba6f3792462c2d6790ea)\n- offline website exporter: better srcset handling - urls can be defined with or without sizes [`473c1ad`](https://github.com/janreges/siteone-crawler/commit/473c1ad0d753df209aa160b0d90687c4bff21912)\n- html report: blue color for search term, looks better [`cb47df9`](https://github.com/janreges/siteone-crawler/commit/cb47df98e230c0375dbcb14c278250709bf3644a)\n- offline website exporter: handled situation of the same-name folder/file when both the folder /foo/next.js/ and the file /foo/next.js existed on the website (real case from vercel.com) [`7c27d2c`](https://github.com/janreges/siteone-crawler/commit/7c27d2c2277dd134615563ee4eaa706ec0ee7485)\n- exporters: added exec times to summary messages [`41c8873`](https://github.com/janreges/siteone-crawler/commit/41c8873dc33d7f08d91f77d71fcf1bf2fafa30ae)\n- crawler: use port from URL if defined or by scheme .. previous solution didn't work properly for localhost:port and parsed URLs to external websites [`324ba04`](https://github.com/janreges/siteone-crawler/commit/324ba04267b962a56817dd10e3ecba7777702aa2)\n- heading analysis: changed sorting to DESC by errors, renamed Headings structure -&gt; Heading structure [`dbc1a38`](https://github.com/janreges/siteone-crawler/commit/dbc1a38f33d4094aebe64020531518538e2b3baf)\n- security analysis: detection and ignoring of URLs that point to a non-existent static file but return 404 HTML, better description [`193fb7d`](https://github.com/janreges/siteone-crawler/commit/193fb7dcf1f994aba69b646576bf7c6f8701a975)\n- super table: added escapeOutputHtml property to column for better escape managing + updated related supertables [`bfb901c`](https://github.com/janreges/siteone-crawler/commit/bfb901cb82b9cda81198df0dc87885b5eceb5c93)\n- headings analysis: replace usage of DOMNode-&gt;textContent because when the headings contain other tags, including &lt;script&gt;, textContent also contains JS code, but without the &lt;script&gt; tag [`5c426c2`](https://github.com/janreges/siteone-crawler/commit/5c426c24969a063aa3366da02520025733cf16e7)\n- best practices: better missing quotes detection and minimizing false positives in special cases (HTML/JS in attributes, etc.) [`b03a534`](https://github.com/janreges/siteone-crawler/commit/b03a5345e7f71f880ee4d36fb9f51c230d8c772f)\n- best practices: better SVG detection and minimizing false positives (e.g. code snippets with SVG), improved look in HTML report and better descriptions [`c35f7e2`](https://github.com/janreges/siteone-crawler/commit/c35f7e226f6cd384e5c8cf4b9af3a1a0d3be4cfc)\n- headers analysis: added [ignored generic values] or [see values below] for specific headers [`a7b444d`](https://github.com/janreges/siteone-crawler/commit/a7b444dab0e1c3949abfa0e0746db18343b9b55d)\n- core options: changed --hide-scheme-and-host to --show-scheme-and-host (by default is hidden schema+host better) [`3c202e9`](https://github.com/janreges/siteone-crawler/commit/3c202e998a824f97b6f481575a24e2924c9dc663)\n- truncating: replaced '...' with '…' [`870cf8c`](https://github.com/janreges/siteone-crawler/commit/870cf8cd447fd14e389d76bcc8853b1e691f5349)\n- accessibility analyzer: better descriptions [`514b471`](https://github.com/janreges/siteone-crawler/commit/514b47124d101cd4f0bd67148f41ea5644febd62)\n- crawler & http client: if the response is loaded from the cache, we do not wait due to rate limiting - very useful for repeated executions [`61fbfab`](https://github.com/janreges/siteone-crawler/commit/61fbfab34ba07c1856099051b8f68dc76b1adf09)\n- header stats: added missing strval in values preview [`9e11030`](https://github.com/janreges/siteone-crawler/commit/9e1103064af0962ed4963cace61bf7ad201d19a2)\n- content type analyzer: increased column width for MIME type from 20 to 26 (enough for application/octet-stream) [`c806674`](https://github.com/janreges/siteone-crawler/commit/c806674ee82d0aba90a9d61e10ff2b5e2cf6c813)\n- SSL/TLS analyzer: fixed issues on Windows with Cygwin where nslookup does not work reliably [`714b9e1`](https://github.com/janreges/siteone-crawler/commit/714b9e12a2426574731b62d460c98f1fed95aa18)\n- text output: removed redundant whitespaces from banner after .YYYYMMDD was added to the version number [`8b76205`](https://github.com/janreges/siteone-crawler/commit/8b76205b41ca9cbf4dd32e7d908f4fe932c4a2a3)\n- readme: added link to #ready-to-use-releases to summary [`574b39e`](https://github.com/janreges/siteone-crawler/commit/574b39e836794c98e7be8ceaa81d1ab0c50ab149)\n- readme: added section Ready-to-use releases [`44d686b`](https://github.com/janreges/siteone-crawler/commit/44d686b910a36747d002ec2886b85c22be5c4864)\n- changelog: added changelog by https://github.com/cookpete/auto-changelog/tree/master + added 'composer changelog' [`d11af7e`](https://github.com/janreges/siteone-crawler/commit/d11af7e4d847362276e1dd4cec3c25cad38263fb)\n\n#### v1.0.0\n\n> 7 November 2023\n\n- proxy: added support for --proxy=&lt;host:port&gt;, closes #1 [`#1`](https://github.com/janreges/siteone-crawler/issues/1)\n- license: renamed to LICENSE.md [`c0f8ec2`](https://github.com/janreges/siteone-crawler/commit/c0f8ec22a68741b1740981dc98bdec13d8e5182a)\n- license: added license CC 4.0 BY [`bd5371b`](https://github.com/janreges/siteone-crawler/commit/bd5371b99363fbb5de29c33f0fcc572d154e467d)\n- version: set v1.0.0.20231107 [`bdbf2be`](https://github.com/janreges/siteone-crawler/commit/bdbf2be97e68cfa01fb992fb960c1c5313d5780f)\n- version: set v1.0.0 [`a98e61e`](https://github.com/janreges/siteone-crawler/commit/a98e61e161652861541743df6fe1d8c55be446f9)\n- SSL/TLS analyzer: uncolorize valid-to in summary item, phpstan fixes (non-funcional changes) [`88d1d9f`](https://github.com/janreges/siteone-crawler/commit/88d1d9fec8bc29cd26ab88c18d6c122939b59bba)\n- content type analyzer: added table with MIME types [`b744f13`](https://github.com/janreges/siteone-crawler/commit/b744f139e417b625bd22ea282f744b55406853b1)\n- seo analysis: added TOP10 non-unique titles and descriptions to tab SEO and OpenGraph + badges [`4ae14c1`](https://github.com/janreges/siteone-crawler/commit/4ae14c13be5163704c2c6a2d55d75bc83f41f801)\n- html report: increased sidebar width to prevent wrapping in the case of higher numbers in badges [`c5c8f4c`](https://github.com/janreges/siteone-crawler/commit/c5c8f4cae991bbdd6b6a8a7fab6cbaae1c199344)\n- dns analyzer: increased column size to prevent auto-truncation of dns/ip addresses [`b4d4127`](https://github.com/janreges/siteone-crawler/commit/b4d4127b2b67efd63fff53ae0ad27b6c9a987501)\n- html report: fixed badge with errors on DNS and SSL tab [`e290403`](https://github.com/janreges/siteone-crawler/commit/e29040349ac4966b22842e52ee4c102a67f9860c)\n- html report: ensure that no empty tabs will be in report (e.g. in case where all analyzers will be deactivated by --analyzer-filter-regex='/anything/') [`6dd5bcc`](https://github.com/janreges/siteone-crawler/commit/6dd5bcc67d215bca085ef75cb98398aa162ce5fa)\n- html report: improved replacement of non-badged cells to transparent badge for better alignment [`172a074`](https://github.com/janreges/siteone-crawler/commit/172a074c519a55c492d2b72250232e23749cd75b)\n- html report: increased visible part of long tables from 500px to 658px (based on typical sidebar height), updated title [`0be355f`](https://github.com/janreges/siteone-crawler/commit/0be355f5474ad6aff461ac3362127569d29eac22)\n- utils: selected better colors for ansi-&gt;html conversion [`6c2a8e3`](https://github.com/janreges/siteone-crawler/commit/6c2a8e364790e2cdb338f164c572aafd9e3db6c1)\n- SSL/TLS analyzer: evaluation and hints about unsafe or recommeneded protocols, from-to validation, colorized output [`5cea1fe`](https://github.com/janreges/siteone-crawler/commit/5cea1fe51d500db433c4d86fe5fa8660d2ef2a14)\n- SEO & OpenGraph analyzers: refactored class names, headings structure moved to own tab, other small improvements [`75a9724`](https://github.com/janreges/siteone-crawler/commit/75a97245af1e896ab3304891dd4459873ad3a26f)\n- security analyzer: bette vulnerabilities explanation and better output formatting [`ee172cb`](https://github.com/janreges/siteone-crawler/commit/ee172cb25073e2e5452b38d5a6c52802e9585bcc)\n- summary: selected more suitable icons from the utf-8 set that work well in the console and HTML [`ef67483`](https://github.com/janreges/siteone-crawler/commit/ef67483827755895f0edf3149f4f106d28ba1942)\n- header stats: addValue() can accept both string and array [`a0d746b`](https://github.com/janreges/siteone-crawler/commit/a0d746ba9f956c03cb4ad1bddee14a26951ff86d)\n- headers & redirects - text improvements [`3ac9010`](https://github.com/janreges/siteone-crawler/commit/3ac9010c33e9048f1b3d24182232ae182ae681ca)\n- dns analyzer: colorized output and added info about CNAME chain into summary [`7dd1f8a`](https://github.com/janreges/siteone-crawler/commit/7dd1f8ac1eafcdcd92f651d397b561f6383fdcfc)\n- best practices analyzer: added SVG sanitization to prevent XSS, fine-tuning of missing quotes detection, typos [`4dc1eb5`](https://github.com/janreges/siteone-crawler/commit/4dc1eb592de3631f61ed67dfb87466a95462d5f3)\n- options: added extras option, e.g. for number range validation [`760a865`](https://github.com/janreges/siteone-crawler/commit/760a865082a7cd5f8e439f3fc9094fb7503a78be)\n- seo and socials: small type-hint and phpstan fixes [`bf695be`](https://github.com/janreges/siteone-crawler/commit/bf695be5fa859ca49bef67fb6511039e4301bb34)\n- best practice analyzer: added found depth to messages about too deep DOM depth [`220b43c`](https://github.com/janreges/siteone-crawler/commit/220b43c77a6d4747a29cf483e11a985dc07ac460)\n- analysis: added SSL/TLS analyzer with info about SSL certificate, its validity, supported protocols, issuer .. in the report SSL/TLS info are under tab 'DNS and TLS/SSL' [`3daf175`](https://github.com/janreges/siteone-crawler/commit/3daf1757e1eee765ea3d6b2dca1ed55ffb694d4a)\n- super table: show fulltext only for &gt;= 10 rows + visible height of the table in HTML shorten to 500px/20 rows and show 'Show entire table' link .. implemented only with HTML+CSS, so that it also works on devices without JS (e.g. e-mail browser on iOS) [`7fb9e52`](https://github.com/janreges/siteone-crawler/commit/7fb9e52de2514b0fc1a11032238de815f76acb37)\n- analysis: added seo & sharing analysis - meta info (title, h1, description, keywords), OG/Twitter data, heading structure details [`53e12e6`](https://github.com/janreges/siteone-crawler/commit/53e12e63102d70b0329194493599523808758716)\n- best practices: added checks for WebP and AVIF images [`0ccabc6`](https://github.com/janreges/siteone-crawler/commit/0ccabc633cdae4b7ef7b03aad22ab8cfab1a590f)\n- best practices: added brotli support reporting to tables [`7ff2c53`](https://github.com/janreges/siteone-crawler/commit/7ff2c53e56705c19de77d54db578338252007b99)\n- super table: added option to specify whether the table should be displayed on the output to the console, html or json [`6bb6217`](https://github.com/janreges/siteone-crawler/commit/6bb62177522a61bab1673b9d5f19e18f50bd54a3)\n- headers analysis: analysis of HTTP headers of all requests to the main domain, their detailed breakdown, values and statistics [`1fcc1db`](https://github.com/janreges/siteone-crawler/commit/1fcc1dba38a3ac41f0547a4f11a2aef9af1d876f)\n- analysis: fixed search of attributes with missing quotes [`3db31b9`](https://github.com/janreges/siteone-crawler/commit/3db31b9c01317d8c8ac6eba6b98679be79982c3e)\n- super table: added the number of found/displayed lines next to the full text [`6e7f3d4`](https://github.com/janreges/siteone-crawler/commit/6e7f3d4b4de0cfa378920c9389291a9902c0c486)\n- super table: removed setting column widths for HTML table - works best without forcing widths [`2a785e7`](https://github.com/janreges/siteone-crawler/commit/2a785e70b675ef681b005042a50b289b3b29d600)\n- html report: even wider content of the report is allowed, for better functioning for high-resolution displays [`363990c`](https://github.com/janreges/siteone-crawler/commit/363990c3566cb39d653ab2760df6bb4d2acd8149)\n- pages 404: truncate too long urls [`082bae6`](https://github.com/janreges/siteone-crawler/commit/082bae6f28d2ba8296591a0885548faa0b38a59a)\n- fixes: fixed various minor warnings related to specific content or parameters [`da1802d`](https://github.com/janreges/siteone-crawler/commit/da1802d82f8ccf2de3f4329bf3b952ebefeb3449)\n- options: ignore extra comma or empty value in list [`3f5cab6`](https://github.com/janreges/siteone-crawler/commit/3f5cab68bc4981faea7b7bed30b9f687ea773830)\n- super table: added useful fulltext search for all super tables [`50a4edf`](https://github.com/janreges/siteone-crawler/commit/50a4edf9caa69f67fdc21c3c32a92d201c211ccc)\n- colors: more light color for badge.neutral in light mode because previous was too contrasting [`0dbad09`](https://github.com/janreges/siteone-crawler/commit/0dbad0920f8f8a9f14186f9513e3ea6793fcf297)\n- colors: notice is now blue instead of yellow and severity order fix in some places (critical -&gt; warning -&gt; notice -&gt; ok -&gt; info) [`1b50b99`](https://github.com/janreges/siteone-crawler/commit/1b50b99ae079a4d1cdc350038e105d469dec524a)\n- colors: changed gray color to more platform-consistent color, otherwise gray was too dark on macOS [`173c9bd`](https://github.com/janreges/siteone-crawler/commit/173c9bd211bf066b69bb3adbde487ec3e99f6da1)\n- scripts: removed helper run.tests* scripts [`e9f0c8f`](https://github.com/janreges/siteone-crawler/commit/e9f0c8ff768042737bfab57b5d2270df995c611e)\n- analysis: added table with detailed list of security findings and URLs [`5b9e0fe`](https://github.com/janreges/siteone-crawler/commit/5b9e0fe1c3a514941abf2e277bf3f2bd4e017004)\n- analysis: added SecurityAnalyzer, which checks the existence and values of security headers and performs HTML analysis for common issues [`0cb7cb9`](https://github.com/janreges/siteone-crawler/commit/0cb7cb9daac5303227e31b72b0f6931218968bf7)\n- http auth: added support for basic HTTP authentication by --http-auth=username:password [`147e004`](https://github.com/janreges/siteone-crawler/commit/147e0040e97f6ad37da7897813063cbb73302e22)\n- error handling: improved behaviour in case of entering a non-existent domain or problems with DNS resolving [`5c08fb4`](https://github.com/janreges/siteone-crawler/commit/5c08fb4c82409863f73fcdcd66f9a0ba76206c5c)\n- html report: implemented completely redesigned html report with useful information, with light/dark mode and possibility to sort tables by clicking on the header .. design inspired by Zanrly from Shuffle.dev [`05da14f`](https://github.com/janreges/siteone-crawler/commit/05da14f50b108deec4827c5c0324bbd1b9775b37)\n- http client: fix of extension detection in the case of very non-standard or invalid URLs [`113faa5`](https://github.com/janreges/siteone-crawler/commit/113faa501016f14c017f5f1eaa586a6fae35efbf)\n- options: increased default memory limit from 512M to 2048M + fixed refactored 'file-system' -&gt; 'file' in docs for result storage [`1471b28`](https://github.com/janreges/siteone-crawler/commit/1471b2884bcbf1806a388e4ae85cc4f7e1bc11fe)\n- utils: fix that date formats are not detected as a phone number in parsePhoneNumbersFromHtml() [`e4e1009`](https://github.com/janreges/siteone-crawler/commit/e4e10097f7e74816dd716d2713516d5ff8eef39a)\n- strict types: added declare(strict_types=1) to all classes with related fixes and copyright [`92dd47c`](https://github.com/janreges/siteone-crawler/commit/92dd47c72e4f1aaa5a05187f60f2a9f0a5c285ee)\n- dns analyzer: added information about the DNS of the given domain - shows the entire cname/alias chain as well as the final resolved IPv4/IPv6 addresses + tests [`199421d`](https://github.com/janreges/siteone-crawler/commit/199421df3c96e2f2bec20f45230cbd812e9fc21c)\n- utils: helper function parsePhoneNumbersFromHtml() used in BestPracticeAnalyzer + tests [`09cc5fb`](https://github.com/janreges/siteone-crawler/commit/09cc5fbbbdf7f4a706ef912221e32d476fa397b4)\n- summary consistency: forced dots at the end of each item in the summary list [`4758e38`](https://github.com/janreges/siteone-crawler/commit/4758e38c3b2ab73476516662129e3b6abd78ff44)\n- crawler: support for more benevolent tags for title and meta attributes .. e.g. even the title can contain other HTML attributes [`770b339`](https://github.com/janreges/siteone-crawler/commit/770b339fb7b6ac86af56a864feb184977974d37d)\n- options: default timeout increased from 3 to 5 seconds .. after testing on a lot websites, it makes better sense [`eb74207`](https://github.com/janreges/siteone-crawler/commit/eb7420736f5c4d353651ec39d8d030a8485e1486)\n- super table: added option to force non-breakable spaces in column cells [`3500818`](https://github.com/janreges/siteone-crawler/commit/35008185064331d33c380e0643606f2dbaeb2b64)\n- best practice analyzer: added measurement of individual steps + added checking of active links with phone numbers &lt;a href=\"tel: 123...\"&gt; [`1bb39e8`](https://github.com/janreges/siteone-crawler/commit/1bb39e87a440975e8956fbf1d66b81ef1b424574)\n- accessibility analyzer: added measurement of individual steps + removed DOMDocument parsing after refactoring [`2a7c49b`](https://github.com/janreges/siteone-crawler/commit/2a7c49b415dd2864cc37497d409cb083abb99df5)\n- analysis: added option to measure the duration and number of analysis steps + the analyzeVisitedUrl() method already accepts DOMDocument (if HTML) so the analyzers themselves do not have to do it twice [`d8b9a3d`](https://github.com/janreges/siteone-crawler/commit/d8b9a3d8e0016ec4cc6da908a1bd9db39370e9da)\n- super table: calculated auto-width can't be shorter than column name (label) [`b97484f`](https://github.com/janreges/siteone-crawler/commit/b97484f22d59bee04b935fa204d18c609ba8658c)\n- utils: removed ungreedy flag from all regular expressions, it caused problems under some circumstances [`03fc202`](https://github.com/janreges/siteone-crawler/commit/03fc202ed2f30fe4bd2001e8fcaecbea5ca45f7e)\n- phpstan: fixed all level 5 issues [`04c21aa`](https://github.com/janreges/siteone-crawler/commit/04c21aaeeed24117740fac22b5756363e3a4769d)\n- phpstan: fixed all level 4 issues [`91fee49`](https://github.com/janreges/siteone-crawler/commit/91fee49a0aefa603c4dba9bc1f19d658a7ab413e)\n- phpstan: fixed all level 3 issues [`2f7866a`](https://github.com/janreges/siteone-crawler/commit/2f7866a389b05e3c796e7f1f0bd7f6410a23cb05)\n- phpstan: fixed all level 2 issues [`e438996`](https://github.com/janreges/siteone-crawler/commit/e4389962be4a476bdcacc6acc18f36c7037b90ee)\n- phpstan: installed phpstan with level 2 for now [`b896e6c`](https://github.com/janreges/siteone-crawler/commit/b896e6c0552e4fd938088594a7d44d6af14fc809)\n- tests: allowed nextjs.org for crawling (incorrectly because of this, a couple of tests did not pass) [`cdc7f56`](https://github.com/janreges/siteone-crawler/commit/cdc7f5688f6aca0e822c3fa6daee6a3acd99eeeb)\n- refactor: moved /Crawler/ into /src/Crawler/ + added file attachment support to mailer [`2f0d26c`](https://github.com/janreges/siteone-crawler/commit/2f0d26c7d2f7cb65495b375dd4b11bf7849888e2)\n- sitemap exporter: renamed addErrorToSummary -&gt; addCriticalToSummary [`e46e192`](https://github.com/janreges/siteone-crawler/commit/e46e1926df52a3edfc4137ebd8ede9dee8a45bf1)\n- text output: added options --show-inline-criticals and --show-inline-warning which displays the found problems directly under the URL - the displayed table will be less clear, but the problems are clearly visible [`725b212`](https://github.com/janreges/siteone-crawler/commit/725b2124172710895d86503fd4a933e2ea91efaa)\n- composer.json: added require declarations for ext-dom, ext-libxml (used in analyzers) and ext-zlib (used in cache/storages) [`3542cf0`](https://github.com/janreges/siteone-crawler/commit/3542cf03829e9a3c745e58e0df1bc2f6284d25ba)\n- analysis: added accessibility and best practices analyzers with useful checks [`860316f`](https://github.com/janreges/siteone-crawler/commit/860316fa685509104462412aeb125417dceaee28)\n- analysis: added AnalysisManager for better analysis control with the possibility to filter required analyzers using --analyzer-filter-regex [`150569f`](https://github.com/janreges/siteone-crawler/commit/150569fd20c380781ed5971cefd47308762a730a)\n- result storage: options --result-storage, --result-storage-dir and --result-storage-compression for storage of response bodies and headers (by default is used memory storage but you can use file storage for extremely large websites) [`d2a8fab`](https://github.com/janreges/siteone-crawler/commit/d2a8fabcef72067500dfcb0065e87ebc4395dac3)\n- http cache: added --http-cache-dir and --http-cache-compression parameters (by default http cache is on and set to 'tmp/http-client-cache' and compression is disabled) [`2eb9ed8`](https://github.com/janreges/siteone-crawler/commit/2eb9ed86d9d53b4735a3de3cf6d06b652818dbc0)\n- super table: the currentOrderColumn is already optional - sometimes we want to leave the table sorted according to the input array [`4fba880`](https://github.com/janreges/siteone-crawler/commit/4fba880fcf137a6207df4c5177cf3ec80afaa3ae)\n- analysis: replaced severity ok/warning/error with ok/notice/warning/critical - it made more sense for analyzers [`18dbaa7`](https://github.com/janreges/siteone-crawler/commit/18dbaa7a4a760874ba39c75af28f7e808fb8eb2e)\n- analysis: added support for immediate analysis of visited URLs with the possibility to insert the analyzer's own columns into the main table [`004865f`](https://github.com/janreges/siteone-crawler/commit/004865f223c9ec688c4f522cd8f93d8022458130)\n- content types: fixed json/xml detection [`00fc180`](https://github.com/janreges/siteone-crawler/commit/00fc1808838c7a191cc9986e884ffda26f841281)\n- content type analyzer: decreased URLs column size from 6 to 5 - that's enough [`2eefbaf`](https://github.com/janreges/siteone-crawler/commit/2eefbafad24f68118a2efe8d6ddedc4d3d45b5cf)\n- formatting: unification of duration formatting across the entire application [`412ee7a`](https://github.com/janreges/siteone-crawler/commit/412ee7ab5c5eda19dfc5492a6cc9edbb7c5969c6)\n- super table: fixed sorting for array of arrays [`4829be8`](https://github.com/janreges/siteone-crawler/commit/4829be8f8e1d3f0d8201dedfa99d245453601422)\n- source domains analyzer: minor formatting improvements [`2d32ced`](https://github.com/janreges/siteone-crawler/commit/2d32cedb59aa13e4e27a1dbe58eff586e4407cd9)\n- offline website exporter: added info about successful export to summary [`92e7e46`](https://github.com/janreges/siteone-crawler/commit/92e7e46bdbc1f1cff329cf4aff5ee99dd70332e2)\n- help: added red message about invalid CLI parameters also to the end of help output, because help is already too long [`6942e8f`](https://github.com/janreges/siteone-crawler/commit/6942e8f4535d748763a124207634ea7548bbfa83)\n- super table: added column property 'formatterWillChangeValueLength' to handle situation with the colored text and broken padding [`7371a68`](https://github.com/janreges/siteone-crawler/commit/7371a68f11191b0b21307e6ca703e362f476b815)\n- analyzers: setting a more meaningful analyzers order [`5e8f747`](https://github.com/janreges/siteone-crawler/commit/5e8f747392f291abdfb0140038c42fe84801955c)\n- analyzers: added source domains analyzer with summary of domains and downloaded content types (number/size/duration) [`f478f17`](https://github.com/janreges/siteone-crawler/commit/f478f178fb2f79a81e5db89909951816ac6e1c9f)\n- super table: added auto-width column feature [`d2c04de`](https://github.com/janreges/siteone-crawler/commit/d2c04dec3312d72ed373236d73f7a4d3bbf8c20d)\n- renaming: '--max-workers' to '--workers' with possibility to use shortcut '-w=&lt;num&gt;' + adding possibility to use shortcut '-rps=&lt;num&gt;' for '--max-reqs-per-sec=&lt;num&gt;' [`218f8ff`](https://github.com/janreges/siteone-crawler/commit/218f8ffcca15550853bcb4ace44dedf260d1e735)\n- extra columns: added ability to force columns to the required length via \"!\" + refactoring using ExtraColumn [`def82ff`](https://github.com/janreges/siteone-crawler/commit/def82ff3f5f11efa2e4ef812e086a5c8379ac962)\n- readme: divisionlit of features into several groups and divided accordingly [`c03d231`](https://github.com/janreges/siteone-crawler/commit/c03d2311b618f8aad165ffad39ae51989f60f846)\n- offline exporter: export of the website to the offline form has already been fine-tuned (but not perfect yet), --disable-* options to disable JS/CSS/images/fonts/etc. and a lot of other related functionalities [`0d04a98`](https://github.com/janreges/siteone-crawler/commit/0d04a9805bdebea708eba44cc6680bd58995d559)\n- crawler: added possibility to set speed via --max-reqs-per-sec (default 10) [`d57cc4a`](https://github.com/janreges/siteone-crawler/commit/d57cc4a39e6ce1882ee3233b015200382d90f06f)\n- tests: dividing asserts for URL conversion testing into different detailed groups [`f6221cb`](https://github.com/janreges/siteone-crawler/commit/f6221cb5d3e5e844f146a95940479b20604c37cf)\n- html url parser: added support for loading fonts from &lt;link href='...'&gt; [`4c482d1`](https://github.com/janreges/siteone-crawler/commit/4c482d1078fb535e4a3be96f6c3e7ded2ea02d65)\n- manager: remove avif/webp support if OfflineWebsiteExporter is active - we want to use only long-supported jpg/png/gif on the local offline version [`3ec81d3`](https://github.com/janreges/siteone-crawler/commit/3ec81d338590ae16ee337cbbfa8a741e01b0522d)\n- http response: transformation of the redirect to html with redirection through the &lt;meta&gt; tag [`8f6ff16`](https://github.com/janreges/siteone-crawler/commit/8f6ff161066a82af9ae91a738aae66327fe407b6)\n- initiator: skip comments or empty arguments [`12f4c52`](https://github.com/janreges/siteone-crawler/commit/12f4c52b7fe0429926c2a6540e8842eae4882888)\n- http client: added crawler signature to User-Agent and X-Crawler-Info header + added possibility to set Origin request header (otherwise some servers block downloading the fonts) [`ae4eaf3`](https://github.com/janreges/siteone-crawler/commit/ae4eaf3298e0bc94c1d913d08393426e380ba4ad)\n- visited url: added isStaticFile() [`f1cd5e8`](https://github.com/janreges/siteone-crawler/commit/f1cd5e8e397b734dc3353db943c2928ff46cf520)\n- crawler: increased pcre.backtrack_limit and pcre.recursion_limit (100x) to support longer HTML/CSS/JS [`35a6e9a`](https://github.com/janreges/siteone-crawler/commit/35a6e9a4729fffa7ee0a77b0be50621c4077a7b9)\n- core options: renamed --headers-to-table to --extra-columns [`7c30988`](https://github.com/janreges/siteone-crawler/commit/7c30988fdecdaeb6aa89aed15a864a033c121d2f)\n- crawler: added type for audio and xml + static cache for getContentTypeIdByContentTypeHeader [`386599e`](https://github.com/janreges/siteone-crawler/commit/386599e881051ae8c14b7ec9688690e50c0dd7dc)\n- found urls: normalization of URL takes care of spaces + change of source type to int [`c3063a2`](https://github.com/janreges/siteone-crawler/commit/c3063a247f10bf00b8516eb2303bb85cab426c15)\n- debugging: possibility to enable debugging through ParsedUrl [`979dc0e`](https://github.com/janreges/siteone-crawler/commit/979dc0e89af063b5ffe04b49275ceb0fa9191db2)\n- offline url converter: class for solving the translation of URL addresses to offline/local + tests [`44118e6`](https://github.com/janreges/siteone-crawler/commit/44118e6bf96f6b25c7d8410084f76dfb3eb10188)\n- url converter: TargetDomainRelation enum with tests [`fd6cf21`](https://github.com/janreges/siteone-crawler/commit/fd6cf216d903785adf46923ed2a805937f724d15)\n- initiator: check only script basename in unknown args check [`888448f`](https://github.com/janreges/siteone-crawler/commit/888448fc9c598a7e8f750e746214b2834722b412)\n- offline website export: to run the exporter is necessary to set --offline-export-directory [`33e9f95`](https://github.com/janreges/siteone-crawler/commit/33e9f952814b52bdfc7634cf4b9521d393b87417)\n- offline website export: to run the exporter is necessary to set --offline-export-directory [`bcc007b`](https://github.com/janreges/siteone-crawler/commit/bcc007b6a3a9c0e9de23e76bd6f9150c7d2295c9)\n- log & tmp: added .gitkeep for versioning of these folders - they are used by some optional features [`065f8ef`](https://github.com/janreges/siteone-crawler/commit/065f8ef27fabe889e8a35b98fd75ce260263d268)\n- offline website export & tests: added the already well-functioning option to export the entire website to offline mode working from local static HTML files, including images, fonts, styles, scripts and other files (no documentation yet) + lot of related changes in Crawler + added first test testing some important functionalities about relative URL building [`4633211`](https://github.com/janreges/siteone-crawler/commit/463321199e6f9bac10b097e3f286da6a13f36906)\n- composer & phpunit: added composer, phpunit and license CC BY 4.0 [`4979143`](https://github.com/janreges/siteone-crawler/commit/4979143ac2aea9d7b3fe9fcfb9d57f1890c1f114)\n- visited-url: added info if is external and if is allowed to crawl it [`268a696`](https://github.com/janreges/siteone-crawler/commit/268a6960f8ff69046c8e6c73beae98d24b73ba1f)\n- text-output: added peak memory usage and average traffic bandwidth to total stats [`cb68340`](https://github.com/janreges/siteone-crawler/commit/cb683407e2cdcd62f5484da96baf9ef43e49a4b3)\n- crawler: added video support and fixed javascript detection by content-type [`3c3eb96`](https://github.com/janreges/siteone-crawler/commit/3c3eb9625f20657e971249c14cdff97a0a0b8687)\n- url parsers: extraction of url parsing from html/css into dedicated classes and FoundUrl with info about source tag/attribute [`d87597d`](https://github.com/janreges/siteone-crawler/commit/d87597d36507c7bd6029f87bf1801586eea9b420)\n- manager: ensure that done callback is executed only once [`d99cccd`](https://github.com/janreges/siteone-crawler/commit/d99cccd91b43680e0726f9c037fb568a9e8be1b4)\n- http-client: extraction of http client functionality into dedicated classes and implemented cache for HTTP responses (critical for efficient development) [`8439e37`](https://github.com/janreges/siteone-crawler/commit/8439e376c50a346e133a2d99e7406020bb89030a)\n- debugging: added debugging related expert options + Debugger class [`2c89682`](https://github.com/janreges/siteone-crawler/commit/2c89682feaf65a4f224da8ebaf05c48aa899eccc)\n- parsed-url: added query, it is already needed [`860df08`](https://github.com/janreges/siteone-crawler/commit/860df086ae8c8556420d92e249b3b459b8bf288f)\n- status: trim only HTML bodies because trim break some types of binary files, e.g. avif [`fca2156`](https://github.com/janreges/siteone-crawler/commit/fca2156a2f9607f705a32833a650ae70d5690772)\n- url parsers: unification of extension length in relevant regexes to {1,10} [`96a3548`](https://github.com/janreges/siteone-crawler/commit/96a35484ba5ab0eee7e43837c1eade1aba6f8a57)\n- basic-stats: fixed division by zero and nullable times [`8c38b96`](https://github.com/janreges/siteone-crawler/commit/8c38b9660752f132c09e3ceaab596e54176b46e9)\n- fastest-analyzer: show only URLs with status 200 on the TOP list [`0085dd1`](https://github.com/janreges/siteone-crawler/commit/0085dd1fcbd3b5657eca73345921fe3fc6f407bc)\n- content-type-analyzer: added stats for 42x statuses (429 Too many requests) [`4f49d12`](https://github.com/janreges/siteone-crawler/commit/4f49d124d1d9993abe3babd9a181c9768b5c2903)\n- file export: fixed HTML report error after last refactoring [`e77fa6c`](https://github.com/janreges/siteone-crawler/commit/e77fa6cf791da08b522e2124545c303ab5de67ed)\n- sitemap: publish only URLs with status 200 OK [`b2d4448`](https://github.com/janreges/siteone-crawler/commit/b2d44488a28aeca3421c36ca1e5ada0030de26d8)\n- summary: added missing &lt;/ul&gt; and renamed heading Stats to Summary in HTML report [`c645e16`](https://github.com/janreges/siteone-crawler/commit/c645e16016611a49f70c3d5de9e6ab4d58a45048)\n- status summary: added summary showing important analyzed metrics with OK/WARNING/CRITICAL icons, ordering by severity and INFO about the export execution + interrupting the script by CTRL+C will also run all analyzers, exporters and display all statistics for already processed URLs [`fd643d0`](https://github.com/janreges/siteone-crawler/commit/fd643d016036f4eed5418375f8b25cfe08549ed0)\n- output consistency: ensuring color and formatting consistency of different types of values (status codes, request durations) [`3ffe1d2`](https://github.com/janreges/siteone-crawler/commit/3ffe1d2a939d718a6fae9c1f927646cfbec808f4)\n- analyzers: added content-type analyzer with stats for total/avg times, total sizes and statuses 200x, 300x, 400x, 500x [`0475347`](https://github.com/janreges/siteone-crawler/commit/04753478bce1f81dfdab73cd19b0541e725317fe)\n- crawler: better content-type handling for statistics and added 'Type' column to URL lists + refactored info from array to class [`346caf4`](https://github.com/janreges/siteone-crawler/commit/346caf45f3a18e75a0cf4d0e65961fbee63c9632)\n- supertable: is now able to display from the array-of-arrays as well as from the array-of-objects + it can translate color declarations from bash to HTML colors when rendering to HTML [`80f0b1c`](https://github.com/janreges/siteone-crawler/commit/80f0b1ca3d50ee7dfae9a01eccbe15fcc06a72d5)\n- analyzers: TOP slowest/fastest pages analyzer now evaluates only HTML pages, otherwise static content skews the results + decreased minTime for slowest analysis from 0.1 to 0.01 sec (on a very fast and cached website, the results were empty, which is not ideal) [`1390bbc`](https://github.com/janreges/siteone-crawler/commit/1390bbc6daa5484fed8612731dc99f734c406042)\n- major refactoring: implementation of the Status class summarizing useful information for analyzers/exporters (replaces the JsonOutput over-use) + implementation of basic analyzers (404, redirects, slow/fast URLs) + SuperTable component that exports data to text and HTML + choice of memory-limit setting + change of some default values [`efb9a60`](https://github.com/janreges/siteone-crawler/commit/efb9a60aa0be5cb8af55b09723a236370fccb904)\n- url parsing: fixes for cases when query params are used with htm/html/php/asp etc. + mini readme fix [`af1acfa`](https://github.com/janreges/siteone-crawler/commit/af1acfa9efa536d2ef2e51b2f0a2404ef9d2417a)\n- minor refactoring: renaming about core options, small non-functional changes [`1dd258e`](https://github.com/janreges/siteone-crawler/commit/1dd258e81eb4d06658e5e41e62141d5be48ce622)\n- major refactoring: better modularity and auto loading in the area of the exporters, analyzers, their configurability and help auto-building + new mailer options --mail-from-name and --mail-subject-template [`0c57dbd`](https://github.com/janreges/siteone-crawler/commit/0c57dbdb30702cc6669a703788b530fbc4d04af6)\n- json output: automatic shortening of the URL according to the text width of the console, because if the long URL exceeds the width of the window, the rewriting of the line with the progressbar stops working properly [`106332b`](https://github.com/janreges/siteone-crawler/commit/106332b1d8421dbea5f8725536fa3efed6834564)\n- manual exit: captures CTRL+C and ends with the statistics for at least the current URLs [`7f4fc80`](https://github.com/janreges/siteone-crawler/commit/7f4fc80c5f9f0fe47da2d9bee2e139489c36a966)\n- error handling: show red error with help when queue or visited tables are full and info how to fix it [`4efbd73`](https://github.com/janreges/siteone-crawler/commit/4efbd734d775aaa2e6dd66d2d8ed7a007871a1dd)\n- DOM elements: implemented DOM elements counter and when you add 'DOM' to --headers-to-column you will see DOM elements count [`1837a9c`](https://github.com/janreges/siteone-crawler/commit/1837a9cb12f97a33aec6bcf03a54250bd48545a2)\n- sitemap and no-color: implemented xml/txt sitemap generator and --no-color option [`f9ade44`](https://github.com/janreges/siteone-crawler/commit/f9ade44d470d97bcc399039bc91a5ce74a6537c1)\n- readme: added table of contents and rewrited intro, features and installation chapters [`469fd1c`](https://github.com/janreges/siteone-crawler/commit/469fd1cf15af4d191c239b2523e0fd8614f7653f)\n- readme: removed deprecated and duplicate mailer docs [`c5effe8`](https://github.com/janreges/siteone-crawler/commit/c5effe84aece85f7a6aaa97228cd84a5eade4f8b)\n- readme and CLI help: dividing the parameters into clear groups and improving parameters description - in README.md is detailed form, in CLI instructions is a shorter version. [`19ff724`](https://github.com/janreges/siteone-crawler/commit/19ff724ec0d21f08c4d6cf09def06ba27b023598)\n- include/ignore regex: added option to limit crawled URLs with the common combination of --include-regex and --ignore-regex [`88e393d`](https://github.com/janreges/siteone-crawler/commit/88e393d33c07fab77173432fd0faf7fe631c2c2c)\n- html report: masking passwords, styling, added logo, better info ordering and other small changes [`4cdcdab`](https://github.com/janreges/siteone-crawler/commit/4cdcdabf145ffe6f02d84b3250b2a1fc46a5677a)\n- mailer & exports: implemented ability to send HTML report to e-mail via SMTP + exports to HTML/JSON/TXT file + better reporting of HTTP error conditions (timeout, etc.) + requests for assets are sent only as HEAD without the need to download all binary data + updated documentation [`a97c29d`](https://github.com/janreges/siteone-crawler/commit/a97c29d78f07b4d854853c474fb9d0542b6f2796)\n- table output: option to set expected column length for better look by 'X-Cache(10)' [`e44f89d`](https://github.com/janreges/siteone-crawler/commit/e44f89d6c3114ccf02c70f38d5ffa5a0f081c1b2)\n- output: renamed print*() methods to more meaningul add*() relevant also for JSON output [`1069c4a`](https://github.com/janreges/siteone-crawler/commit/1069c4a346d13878c52a316b5953ffa997ec3700)\n- options: default timeout decreased from 10 to 3, --table-url-column-size renamed to --url-column-size and decreased its default value from 100 to 80, new option --hide-progress-bar, changed --truncate-url-to-column-size to --do-not-truncate-url [`e75038c`](https://github.com/janreges/siteone-crawler/commit/e75038c56afcf85ae591b1dbedf33a54fcd84754)\n- readme: improved documentation describing use on Windows, macOS or arm64 Linux [`baf2d05`](https://github.com/janreges/siteone-crawler/commit/baf2d0596a3e8367d51fe6ab75793d803e984330)\n- readme: added info about really tested crawler on Windows with Cygwin (Cygwin has some output limitations and it is not possible to achieve such nice behavior as on Linux) [`1f195c0`](https://github.com/janreges/siteone-crawler/commit/1f195c0c9c8565a37fcb5786070e69c6aa0b8e0e)\n- windows compatibility: ensuring compatibility with running through cygwin Swoole, which I recommend in the documentation for Windows users [`c22cc45`](https://github.com/janreges/siteone-crawler/commit/c22cc4559ed3de2ac5e4e6e2957b4d3233b4fda5)\n- json output: implemented nice continuos progress reporting, intentionally on STDERR so the output on STDOUT can be used to save JSON to file + improved README.md [`c095249`](https://github.com/janreges/siteone-crawler/commit/c095249d03c96a00da75553b10dadf7e025a5b0b)\n- limits: increased limit of max queue length from 1000 to 2000 (this default will more suitable even for medium-sized websites) [`c8c3312`](https://github.com/janreges/siteone-crawler/commit/c8c33121c371cc4d0f0791a250178254d9e3a88a)\n- major refactoring: splitting the code into classes, improving error handling and implementing other functions (JSON output, assets crawling) [`f6902fc`](https://github.com/janreges/siteone-crawler/commit/f6902fc025943ef96150739ae6834358097b235d)\n- readme: added information how to use crawler with Windows, macOS or arm64 architecture + a few other details [`721f4bb`](https://github.com/janreges/siteone-crawler/commit/721f4bb73e92f65ca3aab789219f046dea665931)\n- url parsing: handled situations when relative or dotted URLs are also used in HTML, e.g. href='sub/page', href='./sub/page' or href='../sub/page', href='../../sub/page' etc. + few minor optimizations [`c2bbf72`](https://github.com/janreges/siteone-crawler/commit/c2bbf72cf636340a43ebf8472c38008d0fc50f27)\n- memory allocation: added optional params --max-queue-length=&lt;n&gt; (default 1000), --max-visited-urls=&lt;n&gt; (default 5000) and --max-url-length=&lt;u&gt; (default 2000) [`947a43f`](https://github.com/janreges/siteone-crawler/commit/947a43f3bb826ad852ca51390ae2778fbff320e0)\n- Initial commit with first version 2023.10.1 [`7109788`](https://github.com/janreges/siteone-crawler/commit/71097884df3c1ade6fd7c02b4ac9ac8f5f161a12)\n"
  },
  {
    "path": "CLAUDE.md",
    "content": "# CLAUDE.md\r\n\r\nThis file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.\r\n\r\n## Setup After Clone\r\n\r\n```bash\r\ngit config core.hooksPath .githooks               # enable pre-commit hook (fmt + clippy + tests)\r\n```\r\n\r\n## Build & Test Commands\r\n\r\n```bash\r\ncargo fmt                                         # auto-format code (always run before build)\r\ncargo build                                       # debug build\r\ncargo build --release                             # release build (~11s)\r\ncargo test                                        # unit tests + offline integration tests (~300 tests)\r\ncargo test --test integration_crawl -- --ignored --test-threads=1  # network integration tests (crawls crawler.siteone.io)\r\ncargo test scoring::ci_gate::tests::all_checks_pass  # run a single test by name\r\ncargo clippy -- -D warnings                       # lint (CI enforces zero warnings)\r\ncargo fmt -- --check                              # format check\r\n```\r\n\r\n## Quick Run\r\n\r\n```bash\r\n./target/release/siteone-crawler --url=https://example.com --single-page\r\n./target/release/siteone-crawler --url=https://example.com --output=json --http-cache-dir=  # no cache\r\n./target/release/siteone-crawler --html-to-markdown=page.html                               # convert local HTML to markdown (stdout)\r\n./target/release/siteone-crawler --html-to-markdown=page.html --html-to-markdown-output=page.md  # convert to file\r\n```\r\n\r\n## Architecture\r\n\r\n### Crawl Lifecycle (in order)\r\n\r\n1. **CLI Parsing** (`Initiator` → `CoreOptions::parse_argv()`): Parses 120+ CLI options, merges config file if present, validates. Exits with code 101 on error, code 2 on `--help`/`--version`. Non-crawl utility modes (`--serve-markdown`, `--serve-offline`, `--html-to-markdown`) exit early in `main.rs` before creating the Manager.\r\n\r\n2. **Analyzer Registration** (`Initiator::register_analyzers()`): Creates all 15 analyzer instances (Accessibility, BestPractice, Caching, ContentType, DNS, ExternalLinks, Fastest, Headers, Page404, Redirects, Security, SeoAndOpenGraph, SkippedUrls, Slowest, SourceDomains, SslTls) and registers them with `AnalysisManager`. Some analyzers receive config from CLI options (e.g. `fastest_top_limit`, `max_heading_level`).\r\n\r\n3. **Manager Setup** (`Manager::run()`): Creates `Status` (result storage), `Output` (text/json/multi), `HttpClient` (with optional proxy, auth, cache), `ContentProcessorManager` (HTML, CSS, JS, XML, Astro, Next.js, Svelte processors), and the `Crawler` instance.\r\n\r\n4. **Robots.txt Fetch** (`Crawler::fetch_robots_txt()`): Before crawling starts, fetches and parses `/robots.txt` from the initial domain. Respects `--ignore-robots-txt` option.\r\n\r\n5. **Crawl Loop** (`Crawler::run()`): Breadth-first concurrent URL processing:\r\n   - URL queue (`DashMap`) seeded with initial URL\r\n   - Tokio tasks limited by `Semaphore` (= `--workers` count) + rate limiting (`--max-reqs-per-sec`)\r\n   - Per-URL flow: check robots.txt → HTTP request → on error, store with negative status code → on success, run content processors → extract links from HTML → enqueue discovered URLs\r\n   - Content processors (`HtmlProcessor`, `CssProcessor`, etc.) transform response bodies during crawl — used by offline/markdown exporters for URL rewriting\r\n   - Each visited URL's response is stored in `Status` for post-crawl analysis\r\n   - Per-URL data collected: status code, headers, body, response time, content type, size, redirects\r\n\r\n6. **Post-Crawl Analysis** (`Manager::run_post_crawl()`): Sequential pipeline after crawling ends:\r\n   - Transfer skipped URLs from crawler to `Status`\r\n   - Run all registered analyzers (`AnalysisManager::run_analyzers()`): each analyzer gets read access to `Status` (all crawled data) and write access to `Output` (adds tables/findings)\r\n   - Add content processor stats table\r\n\r\n7. **Exporters** (`Manager::run_exporters()`): Generate output files based on CLI options:\r\n   - `SitemapExporter`: XML/TXT sitemap files\r\n   - `OfflineWebsiteExporter`: Static website copy with rewritten relative URLs\r\n   - `MarkdownExporter`: HTML→Markdown conversion with relative .md links\r\n   - `FileExporter`: Save text/JSON output to file\r\n   - `HtmlReport`: Self-contained HTML report (also used by Mailer and Upload)\r\n   - `MailerExporter`: Email HTML report via SMTP\r\n   - `UploadExporter`: Upload report to remote server\r\n\r\n8. **Scoring** (`scorer::calculate_scores()`): Computes quality scores (0–10) across 5 weighted categories (Performance 20%, SEO 20%, Security 25%, Accessibility 20%, Best Practices 15%). Deductions come from summary findings (criticals, warnings) and stats (404s, 5xx, slow responses).\r\n\r\n9. **CI/CD Gate** (`ci_gate::evaluate()`): When `--ci` is active, checks scores and stats against configurable thresholds (`--ci-min-score`, `--ci-max-404`, etc.). Returns exit code 10 on failure.\r\n\r\n10. **Summary & Output** (`Output::add_summary()`, `Output::end()`): Prints summary table with OK/Warning/Critical counts, finalizes output. Exit code: 0 = success, 3 = no pages crawled, 10 = CI gate failed.\r\n\r\n### How Analyzers Work\r\n\r\nEach analyzer implements the `Analyzer` trait (`analysis/analyzer.rs`). Analyzers are **post-crawl only** — they don't run during crawling. The `AnalysisManager` calls each analyzer's `analyze(&Status, &mut Output)` method after all URLs have been visited. Analyzers read crawled data from `Status` (visited URLs, response headers, bodies, skipped URLs) and produce `SuperTable` instances that get added to `Output`. Analyzers also add `Item` entries to the `Summary` (OK, Warning, Critical, Info findings) which feed into scoring.\r\n\r\n### How Content Processors Work\r\n\r\nContent processors implement `ContentProcessor` (`content_processor/content_processor.rs`) and run **during crawl** on each URL's response body. They serve two purposes: (1) transform content for offline/markdown export (rewrite URLs to relative paths), and (2) extract metadata (links, assets). Processors are type-specific: `HtmlProcessor` handles HTML, `CssProcessor` handles CSS `url()` references, etc. The `ContentProcessorManager` dispatches to the right processor based on content type.\r\n\r\n### Concurrency Model\r\n\r\nThe crawler uses tokio for async I/O with a semaphore-based worker pool (`options.workers`). Shared state uses:\r\n- `Arc<DashMap<...>>` for lock-free concurrent maps (URL queue, visited URLs, skipped URLs)\r\n- `Arc<Mutex<...>>` for sequential-access state (Status, Output, AnalysisManager)\r\n- `Arc<AtomicBool/AtomicUsize>` for simple flags and counters\r\n\r\n### Key Traits\r\n\r\n- **`Analyzer`** (`analysis/analyzer.rs`): Post-crawl analysis (SEO, security, headers, etc.). Each analyzer gets `&Status` and `&mut Output`.\r\n- **`Exporter`** (`export/exporter.rs`): Output generators (HTML report, offline website, markdown, sitemap, mailer, upload).\r\n- **`Output`** (`output/output.rs`): Formatting backend. Implementations: `TextOutput`, `JsonOutput`, `MultiOutput`.\r\n- **`ContentProcessor`** (`content_processor/content_processor.rs`): Per-URL content transformation during crawl (HTML, JS, CSS, XML processors).\r\n\r\n### Options System\r\n\r\nCLI options are defined in `options/core_options.rs` via `get_options()` which returns an `Options` struct with typed option groups. Parsing flow: `parse_argv()` → merge config file → parse flags → `CoreOptions::from_options()` → `apply_option_value()` for each option. New CLI options require: adding the field to `CoreOptions`, a case in `apply_option_value()`, and an entry in the appropriate option group.\r\n\r\n### Exit Codes\r\n\r\n| Code | Meaning |\r\n|------|---------|\r\n| 0 | Success (with `--ci`: all thresholds passed) |\r\n| 1 | Runtime error |\r\n| 2 | Help/version displayed |\r\n| 3 | No pages successfully crawled (DNS failure, timeout, etc.) |\r\n| 10 | CI/CD quality gate failed |\r\n| 101 | Configuration error |\r\n\r\n### HTTP Response Body\r\n\r\n`HttpResponse.body` is `Option<Vec<u8>>` (not String) to preserve binary data for images, fonts, etc. Use `body_text()` for string content. Failed HTTP requests return `Ok(HttpResponse)` with negative status codes (-1 connection error, -2 timeout, -4 send error), not `Err`.\r\n\r\n### Testing Structure\r\n\r\n- **Unit tests**: In-file `#[cfg(test)] mod tests` blocks (standard Rust convention)\r\n- **Integration tests**: `tests/integration_crawl.rs` with shared helpers in `tests/common/mod.rs`\r\n- Network-dependent integration tests are `#[ignore]` — run explicitly with `--ignored`\r\n\r\n### Testing Complex Scenarios with Sample Websites\r\n\r\nThe crawler has a built-in HTTP server (`--serve-offline=<dir>`) that can serve any local directory as a static website. This enables efficient local testing of edge cases without deploying a real site:\r\n\r\n1. Create a sample website directory, e.g. `./tmp/sample-website-xyz/`\r\n2. Add HTML files and assets simulating the desired scenario (spaces in filenames, special characters, redirect chains, broken links, specific heading structures, etc.)\r\n3. Start the built-in server: `./target/release/siteone-crawler --serve-offline=./tmp/sample-website-xyz/ --serve-port=8888`\r\n4. In another terminal, crawl the local site: `./target/release/siteone-crawler --url=http://127.0.0.1:8888/`\r\n5. Verify the crawler handles the scenario correctly (output, offline export, analysis results)\r\n\r\nThis approach is useful for reproducing bug reports, testing regex edge cases (e.g. URLs with spaces, HTML entities, unusual attribute quoting), validating offline/markdown export for specific HTML structures, and any scenario that would be hard to find on a live website.\r\n\r\n### Key Files\r\n\r\n- `src/engine/crawler.rs` (~1700 lines): Core crawl loop, URL queue management, HTML/content parsing\r\n- `src/options/core_options.rs` (~2500 lines): All 120+ CLI options, parsing, validation\r\n- `src/export/utils/offline_url_converter.rs` (~1400 lines): URL-to-file-path conversion for offline export\r\n- `src/export/html_report/report.rs`: HTML report generation with embedded template\r\n- `src/scoring/scorer.rs`: Quality score calculation from summary findings\r\n- `src/scoring/ci_gate.rs`: CI/CD threshold evaluation\r\n\r\n### Edition & Rust Version\r\n\r\nProject uses `edition = \"2024\"` (Rust 1.85+) with `rust-version = \"1.94\"`. Edition 2024 features used throughout: `unsafe extern` blocks, `if let` chaining (`if let ... && ...`), `unsafe { std::env::set_var() }`.\r\n\r\n### Commit Policy\r\n\r\n**Never commit automatically.** Commits are only allowed on explicit user request. Before every commit, always run `git status`, review the changes, and stage only the relevant files — never use `git add -A` or `git add .` blindly.\r\n\r\n### Commit Messages\r\n\r\nUse [Conventional Commits](https://www.conventionalcommits.org/): `feat:`, `fix:`, `refactor:`, `perf:`, `docs:`, `style:`, `ci:`, `chore:`, `test:`. Examples:\r\n- `feat: add built-in HTTP server for markdown/offline exports`\r\n- `fix: correct non-ASCII text corruption in heading ID generation`\r\n- `perf: eliminate heap allocation in content_type_for_extension`\r\n- `chore: bump version to 2.0.3`\r\n\r\n### Releasing a New Version\r\n\r\n1. Update version in `Cargo.toml` (`version = \"X.Y.Z\"`)\r\n2. Update version in `src/version.rs` (`pub const CODE: &str = \"X.Y.Z.YYYYMMDD\";`)\r\n3. Run `cargo check` so that `Cargo.lock` is updated with the new version\r\n4. Commit all three files (`Cargo.toml`, `src/version.rs`, `Cargo.lock`): `git commit -m \"chore: bump version to X.Y.Z\"`\r\n5. Tag and push: `git tag vX.Y.Z && git push && git push --tags`\r\n\r\n### Important Conventions\r\n\r\n- Tables, column order, and formatting must stay consistent across versions. The HTML parser uses the `scraper` crate.\r\n- HTTP cache lives in `tmp/http-client-cache/` by default. Delete it for fresh crawls or use `--http-cache-dir=` to disable.\r\n- `rustls` requires explicit `ring` CryptoProvider installation in `main.rs`.\r\n"
  },
  {
    "path": "Cargo.toml",
    "content": "[package]\nname = \"siteone-crawler\"\nversion = \"2.3.0\"\nedition = \"2024\"\nrust-version = \"1.94\"\nauthors = [\"Ján Regeš <jan.reges@siteone.cz>\"]\ndescription = \"Website crawler and QA toolkit in Rust for security, performance, SEO, and accessibility audits, offline cloning, markdown export, sitemap generation, cache warming, and CI/CD gating — one dependency-free binary for all major platforms, 10 tools in one.\"\nlicense = \"MIT\"\nrepository = \"https://github.com/janreges/siteone-crawler\"\nhomepage = \"https://crawler.siteone.io/\"\nkeywords = [\"crawler\", \"seo\", \"website-analysis\", \"accessibility\", \"security\"]\ncategories = [\"command-line-utilities\", \"web-programming\"]\nreadme = \"README.md\"\n\n[[bin]]\nname = \"siteone-crawler\"\npath = \"src/main.rs\"\n\n[dependencies]\ntokio = { version = \"1\", features = [\"full\"] }\nreqwest = { version = \"0.13\", features = [\"gzip\", \"brotli\", \"deflate\", \"rustls\", \"socks\", \"cookies\", \"stream\", \"blocking\", \"multipart\"] }\nscraper = \"0.25\"\nregex = \"1\"\nclap = { version = \"4\", features = [\"derive\"] }\nserde = { version = \"1\", features = [\"derive\"] }\nserde_json = \"1\"\ncolored = \"3\"\ndashmap = \"6\"\nhickory-resolver = \"0.25\"\nrustls = { version = \"0.23\", features = [\"ring\"] }\nx509-parser = \"0.18\"\nlettre = { version = \"0.11\", default-features = false, features = [\"tokio1-rustls-tls\", \"smtp-transport\", \"builder\"] }\nflate2 = \"1\"\nbrotli = \"8\"\nchrono = { version = \"0.4\", features = [\"serde\"] }\nchrono-tz = \"0.10\"\nterminal_size = \"0.4\"\nquick-xml = \"0.39\"\nthiserror = \"2\"\nanyhow = \"1\"\nmd-5 = \"0.10\"\nurl = \"2\"\npercent-encoding = \"2\"\nmime = \"0.3\"\nonce_cell = \"1\"\nindexmap = \"2\"\ngethostname = \"1.1\"\nrustls-native-certs = \"0.8\"\nego-tree = \"0.10\"\nbase64 = \"0.22\"\ndirs = \"6\"\npulldown-cmark = \"0.13.1\"\ninquire = { version = \"0.9\", default-features = false, features = [\"crossterm\"] }\ncrossterm = \"0.29\"\nfancy-regex = \"0.17\"\n\n[package.metadata.deb]\nmaintainer = \"Ján Regeš <jan.reges@siteone.cz>\"\ncopyright = \"2023-2026, Ján Regeš\"\ndepends = \"libc6\"\nsection = \"web\"\npriority = \"optional\"\nextended-description = \"\"\"\\\nSiteOne Crawler is an ultra-fast, open-source website crawler and QA toolkit \\\nwritten in Rust. It helps developers, DevOps teams, QA engineers, and technical \\\nSEO specialists crawl websites, audit quality, stress-test pages under load, \\\nclone sites for offline browsing and archiving, export content to markdown, \\\ngenerate sitemaps, warm caches, and enforce CI/CD quality gates — all from a \\\nsingle, dependency-free binary for Linux, macOS, and Windows.\\n\\\n\\n\\\nIt combines multiple website tooling workflows in one application: security, \\\nperformance, SEO, accessibility, and best-practices audits; whole-site quality \\\nscoring; UX checks that other tools miss (e.g. non-clickable phone numbers, \\\nmissing alt text, broken heading hierarchy); reporting of all external links \\\nwith their source pages, redirects, and 404s; stress/load testing with tunable \\\nconcurrency and rate limits; offline multi-domain cloning with URL rewriting; \\\nmarkdown export for documentation, archiving, or AI workflows; sitemap \\\ngeneration; post-deploy cache warming; and automated quality checks for CI/CD \\\npipelines.\\n\\\n\\n\\\nSiteOne Crawler can output results as interactive HTML reports (including an \\\nimage gallery of all pictures found on the site), structured JSON, or readable \\\nterminal text, making it suitable both for local development and for automation \\\nin CI/CD environments. It can also email HTML reports directly via \\\nthe user's own SMTP server and includes a built-in web server for browsing \\\ngenerated markdown exports, plus extensive CLI configurability for advanced \\\nuse cases.\\n\\\n\\n\\\nWhether you need a technical website audit, an offline mirror, a load-testing \\\nhelper, a markdown export for LLM/AI processing, or a reliable quality gate \\\nbefore deployment, SiteOne Crawler delivers 10 tools in one — as an ultra-fast, \\\nportable, open-source Rust binary with zero runtime dependencies.\"\"\"\nassets = [\n    [\"target/release/siteone-crawler\", \"usr/bin/\", \"755\"],\n    [\"README.md\", \"usr/share/doc/siteone-crawler/\", \"644\"],\n    [\"LICENSE\", \"usr/share/doc/siteone-crawler/\", \"644\"],\n]\n\n[package.metadata.deb.variants.static]\nname = \"siteone-crawler-static\"\ndepends = \"\"\nconflicts = \"siteone-crawler\"\nprovides = \"siteone-crawler\"\nextended-description = \"\"\"\\\nStatically linked (musl) variant of SiteOne Crawler for maximum Linux compatibility. \\\nThis version runs on any Linux distribution regardless of the installed glibc version. \\\nInstall this if the standard siteone-crawler package reports a 'GLIBC not found' error. \\\nNote: ~50–80% slower than the glibc variant for CPU-intensive operations (offline and \\\nmarkdown export) due to the musl memory allocator.\"\"\"\n\n[package.metadata.generate-rpm]\nassets = [\n    { source = \"target/release/siteone-crawler\", dest = \"/usr/bin/siteone-crawler\", mode = \"0755\" },\n    { source = \"README.md\", dest = \"/usr/share/doc/siteone-crawler/README.md\", mode = \"0644\" },\n    { source = \"LICENSE\", dest = \"/usr/share/doc/siteone-crawler/LICENSE\", mode = \"0644\" },\n]\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2023-2026 Ján Regeš\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE."
  },
  {
    "path": "README.md",
    "content": "# SiteOne Crawler\r\n\r\nSiteOne Crawler is a powerful and easy-to-use **website analyzer, cloner, and converter** designed for developers seeking security and performance insights, SEO specialists identifying optimization opportunities, and website owners needing reliable backups and offline versions.\r\n\r\n**Now rewritten in Rust** for maximum performance, minimal resource usage, and zero runtime dependencies. The transition from PHP+Swoole to Rust resulted in **25% faster execution** and **30% lower memory consumption** while producing identical output.\r\n\r\n**Discover the SiteOne Crawler advantage:**\r\n\r\n*   **Run Anywhere:** Single native binary for **🪟 Windows**, **🍎 macOS**, and **🐧 Linux** (x64 & arm64). No runtime dependencies.\r\n*   **Work Your Way:** Launch the binary without arguments for an **interactive wizard** 🧙 with 10 preset modes, use the extensive **command-line interface** 📟 ([releases](https://github.com/janreges/siteone-crawler/releases), [▶️ video](https://www.youtube.com/watch?v=25T_yx13naA&list=PL9mElgTe-s1Csfg0jXWmDS0MHFN7Cpjwp)) for automation and power, or enjoy the intuitive **desktop GUI application** 💻 ([GUI app](https://github.com/janreges/siteone-crawler-gui), [▶️ video](https://www.youtube.com/watch?v=rFW8LNEVNdw)) for visual control.\r\n*   **Rich Output Formats:** Interactive **HTML audit report** 📊 with sortable tables and quality scoring (0.0-10.0) (see [nextjs.org sample](https://crawler.siteone.io/html/2024-08-23/forever/cl8xw4r-fdag8wg-44dd.html)), detailed **JSON** for programmatic consumption, and human-readable **text** for terminal. Send HTML reports directly to your inbox via **built-in SMTP mailer** 📧.\r\n*   **CI/CD Integration:** Built-in **quality gate** (`--ci`) with configurable thresholds — exit code 10 on failure enables automated deployment blocking. Also useful for **cache warming** — crawling the entire site after deployment populates your reverse proxy/CDN cache.\r\n*   **Offline & Markdown Power:** Create complete **offline clones** 💾 for browsing without a server ([nextjs.org clone](https://crawler.siteone.io/examples-exports/nextjs.org/)) or convert entire websites into clean **Markdown** 📝 — perfect for backups, documentation, or feeding content to AI models ([examples](https://github.com/janreges/siteone-crawler-markdown-examples/)).\r\n*   **Deep Crawling & Analysis:** Thoroughly crawl every page and asset, identify errors (404s, redirects), generate **sitemaps** 🗺️, and even get **email summaries** 📧 (watch [▶️ video example](https://www.youtube.com/watch?v=PHIFSOmk0gk)).\r\n*   **Learn More:** Dive into the 🌐 [Project Website](https://crawler.siteone.io/), explore the detailed [Documentation](https://crawler.siteone.io/configuration/command-line-options/), or check the [JSON](docs/JSON-OUTPUT.md)/[Text](docs/TEXT-OUTPUT.md) output specs.\r\n\r\nGIF animation of the crawler in action (also available as a [▶️ video](https://www.youtube.com/watch?v=25T_yx13naA&list=PL9mElgTe-s1Csfg0jXWmDS0MHFN7Cpjwp)):\r\n\r\n![SiteOne Crawler](docs/siteone-crawler-command-line.gif)\r\n\r\n## Table of contents\r\n\r\n- [✨ Features](#-features)\r\n    * [🕷️ Crawler](#️-crawler)\r\n    * [🛠️ Dev/DevOps assistant](#️-devdevops-assistant)\r\n    * [📊 Analyzer](#-analyzer)\r\n    * [📧 Reporter](#-reporter)\r\n    * [💾 Offline website generator](#-offline-website-generator)\r\n    * [📝 Website to markdown converter](#-website-to-markdown-converter)\r\n    * [🗺️ Sitemap generator](#️-sitemap-generator)\r\n- [🚀 Installation](#-installation)\r\n    * [📦 Pre-built binaries](#-pre-built-binaries)\r\n    * [🍺 Homebrew (macOS / Linux)](#-homebrew-macos--linux)\r\n    * [🐧 Debian / Ubuntu (apt)](#-debian--ubuntu-apt)\r\n    * [🎩 Fedora / RHEL (dnf)](#-fedora--rhel-dnf)\r\n    * [🦎 openSUSE / SLES (zypper)](#-opensuse--sles-zypper)\r\n    * [🏔️ Alpine Linux (apk)](#️-alpine-linux-apk)\r\n    * [🔨 Build from source](#-build-from-source)\r\n- [▶️ Usage](#️-usage)\r\n    * [Interactive wizard](#interactive-wizard)\r\n    * [Basic example](#basic-example)\r\n    * [CI/CD example](#cicd-example)\r\n    * [Fully-featured example](#fully-featured-example)\r\n    * [⚙️ Arguments](#️-arguments)\r\n        + [Basic settings](#basic-settings)\r\n        + [Output settings](#output-settings)\r\n        + [Resource filtering](#resource-filtering)\r\n        + [Advanced crawler settings](#advanced-crawler-settings)\r\n        + [File export settings](#file-export-settings)\r\n        + [Mailer options](#mailer-options)\r\n        + [Upload options](#upload-options)\r\n        + [Offline exporter options](#offline-exporter-options)\r\n        + [Markdown exporter options](#markdown-exporter-options)\r\n        + [Sitemap options](#sitemap-options)\r\n        + [Expert options](#expert-options)\r\n        + [Fastest URL analyzer](#fastest-url-analyzer)\r\n        + [SEO and OpenGraph analyzer](#seo-and-opengraph-analyzer)\r\n        + [Slowest URL analyzer](#slowest-url-analyzer)\r\n        + [Built-in HTTP server](#built-in-http-server)\r\n        + [HTML-to-Markdown conversion](#html-to-markdown-conversion)\r\n        + [CI/CD settings](#cicd-settings)\r\n- [🏆 Quality Scoring](#-quality-scoring)\r\n- [🔄 CI/CD Integration](#-cicd-integration)\r\n- [📄 Output Examples](#-output-examples)\r\n- [🧪 Testing](#-testing)\r\n- [⚠️ Disclaimer](#️-disclaimer)\r\n- [📜 License](#-license)\r\n\r\n## ✨ Features\r\n\r\nIn short, the main benefits can be summarized in these points:\r\n\r\n- **🕷️ Crawler** - very powerful crawler of the entire website reporting useful information about each URL (status code,\r\n  response time, size, custom headers, titles, etc.)\r\n- **🛠️ Dev/DevOps assistant** - offers stress/load testing with configurable concurrent workers (`--workers`) and request\r\n  rate (`--max-reqs-per-sec`), cache warming, localhost testing, and rich URL/content-type filtering\r\n- **📊 Analyzer** - analyzes all webpages and reports strange or error behaviour and useful statistics (404, redirects, bad\r\n  practices, SEO and security issues, heading structures, etc.)\r\n- **📧 Reporter** - interactive **HTML audit report**, structured **JSON**, and colored **text** output; built-in\r\n  **SMTP mailer** sends HTML reports directly to your inbox\r\n- **💾 Offline website generator** - clone entire websites to browsable local HTML files (no server needed) including all\r\n  assets. Supports **multi-domain clones** — include subdomains or external domains with intelligent cross-linking.\r\n- **📝 Website to markdown converter** - export the entire website to browsable text markdown (viewable on GitHub or any\r\n  text editor), or generate a **single-file markdown** with smart header/footer deduplication — ideal for **feeding to AI\r\n  tools**. Includes a **built-in web server** that renders markdown exports as styled HTML pages.\r\n  Also supports **standalone HTML-to-Markdown conversion** of local files (`--html-to-markdown`).\r\n  See [markdown examples](https://github.com/janreges/siteone-crawler-markdown-examples/).\r\n- **🗺️ Sitemap generator** - allows you to generate `sitemap.xml` and `sitemap.txt` files with a list of all pages on your\r\n  website\r\n- **🏆 Quality scoring** - automatic quality scoring (0.0-10.0) across 5 categories: Performance, SEO, Security, Accessibility, Best Practices\r\n- **🔄 CI/CD quality gate** - configurable thresholds with exit code 10 on failure for automated pipelines; also\r\n  useful as a **post-deployment cache warmer** for reverse proxies and CDNs\r\n\r\nThe following features are summarized in greater detail:\r\n\r\n### 🕷️ Crawler\r\n\r\n- **all major platforms** supported without dependencies (🐧 Linux, 🪟 Windows, 🍎 macOS, arm64) — single native binary\r\n- has incredible **🚀 native Rust performance** with async I/O and multi-threaded crawling\r\n- provides simulation of **different device types** (desktop/mobile/tablet) thanks to predefined User-Agents\r\n- will crawl **all files**, styles, scripts, fonts, images, documents, etc. on your website\r\n- will respect the `robots.txt` file and will not crawl the pages that are not allowed\r\n- has a **beautiful interactive** and **🎨 colourful output**\r\n- it will **clearly warn you** ⚠️ of any wrong use of the tool (e.g. input parameters validation or wrong permissions)\r\n- as `--url` parameter, you can specify also a `sitemap.xml` file (or [sitemap index](https://www.sitemaps.org/protocol.html#index)),\r\n  which will be processed as a list of URLs. In sitemap-only mode, the crawler follows only URLs from\r\n  the sitemap — it does not discover additional links from HTML pages. Gzip-compressed sitemaps (`*.xml.gz`)\r\n  are fully supported, both as direct URLs and when referenced from sitemap index files.\r\n- respects the HTML `<base href>` tag when resolving relative URLs on pages that use it.\r\n\r\n### 🛠️ Dev/DevOps assistant\r\n\r\n- allows testing **public** and **local projects on specific ports** (e.g. `http://localhost:3000/`)\r\n- works as a **stress/load tester** — configure the number of **concurrent workers** (`--workers`) and the **maximum\r\n  requests per second** (`--max-reqs-per-sec`) to simulate various traffic levels and test your infrastructure's\r\n  resilience against high load or DoS scenarios\r\n- combine with **rich filtering options** — include/ignore URLs by regex (`--include-regex`, `--ignore-regex`), disable\r\n  specific asset types (`--disable-javascript`, `--disable-images`, etc.), or limit crawl depth (`--max-depth`) to focus\r\n  the load on specific parts of your website\r\n- will help you **warm up the application cache** or the **cache on the reverse proxy** of the entire website\r\n\r\n### 📊 Analyzer\r\n\r\n- will **find the weak points** or **strange behavior** of your website\r\n- built-in analyzers cover SEO, security headers, accessibility, best practices, performance, SSL/TLS, caching, and more\r\n\r\n### 📧 Reporter\r\n\r\nThree output formats:\r\n\r\n- **Interactive HTML report** — a self-contained `.html` file with sortable tables, quality scores, color-coded\r\n  findings, and sections for SEO, security, accessibility, performance, headers, redirects, 404s, and more. Open it\r\n  in any browser — no server needed.\r\n- **JSON output** — structured data with all crawled URLs, response details, analysis findings, scores, and CI/CD gate\r\n  results. Ideal for programmatic consumption, dashboards, and integrations.\r\n- **Text output** — human-readable colored terminal output with tables, progress bars, and summaries.\r\n\r\nAdditional reporting features:\r\n\r\n- **Built-in SMTP mailer** — send the HTML audit report directly to one or more email addresses via your own SMTP\r\n  server. Configure sender, recipients, subject template, and SMTP credentials via CLI options.\r\n- will provide you with data for **SEO analysis**, just add the `Title`, `Keywords` and `Description` extra columns\r\n- will provide useful **summaries and statistics** at the end of the processing\r\n\r\n### 💾 Offline website generator\r\n\r\n- will help you **export the entire website** to offline form, where it is possible to browse the site through local\r\n  HTML files (without HTTP server) including all documents, images, styles, scripts, fonts, etc.\r\n- supports **multi-domain clones** — include subdomains (`*.mysite.tld`) or entirely different domains in a single\r\n  offline export. All URLs across included domains are **intelligently rewritten to relative paths**, so the resulting\r\n  offline version cross-links pages between domains seamlessly — you get one unified browsable clone.\r\n- you can **limit what assets** you want to download and export (see `--disable-*` directives) .. for some types of\r\n  websites the best result is with the `--disable-javascript` option.\r\n- you can specify by `--allowed-domain-for-external-files` (short `-adf`) from which **external domains** it is possible\r\n  to **download** assets (JS, CSS, fonts, images, documents) including `*` option for all domains.\r\n- you can specify by `--allowed-domain-for-crawling` (short `-adc`) which **other domains** should be included in the\r\n  **crawling** if there are any links pointing to them. You can enable e.g. `mysite.*` to export all language mutations\r\n  that have a different TLD or `*.mysite.tld` to export all subdomains.\r\n- you can use `--single-page` to **export only one page** to which the URL is given (and its assets), but do not follow\r\n  other pages.\r\n- you can use `--single-foreign-page` to **export only one page** from another domain (if allowed by `--allowed-domain-for-crawling`),\r\n  but do not follow other pages.\r\n- you can use `--replace-content` to **replace content** in HTML/JS/CSS with `foo -> bar` or regexp in PCRE format, e.g.\r\n  `/card[0-9]/i -> card`. Can be specified multiple times.\r\n- you can use `--replace-query-string` to **replace chars in query string** in the filename.\r\n- you can use `--max-depth` to set the **maximum crawling depth** (for pages, not assets). `1` means `/about` or `/about/`,\r\n  `2` means `/about/contacts` etc.\r\n- you can use it to **export your website to a static form** and host it on GitHub Pages, Netlify, Vercel, etc. as a\r\n  static backup and part of your **disaster recovery plan** or **archival/legal needs**\r\n- works great with **older conventional websites** but also **modern ones**, built on frameworks like Next.js, Nuxt.js,\r\n  SvelteKit, Astro, Gatsby, etc. When a JS framework is detected, the export also performs some framework-specific code\r\n  modifications for optimal results.\r\n- **try it** for your website, and you will be very pleasantly surprised :-)\r\n\r\n### 📝 Website to markdown converter\r\n\r\nTwo export modes:\r\n\r\n- **Multi-file markdown** — exports the entire website with all subpages to a directory of **browsable `.md` files**.\r\n  The markdown renders nicely when uploaded to GitHub, viewed in VS Code, or any text editor. Links between pages are\r\n  converted to relative `.md` links so you can navigate between files. Optionally includes images and other files\r\n  (PDF, etc.).\r\n- **Single-file markdown** — combines all pages into **one large markdown file** with smart removal of duplicate website\r\n  headers and footers across pages. Ideal for **feeding entire website content to AI tools** (ChatGPT, Claude, etc.)\r\n  that process markdown more effectively than raw HTML.\r\n\r\nSmart conversion features:\r\n\r\n- **collapsible accordions** — large link lists (menus, navigation, footer links with 8+ items) are automatically\r\n  collapsed into `<details>` accordions with contextual labels (\"Menu\", \"Links\") for better readability\r\n- content before the main heading (typically h1) — such as the site header and navigation — is moved to the end of the\r\n  page below a `---` separator, so the actual page content comes first\r\n- you can set multiple selectors (CSS-like) to **remove unwanted elements** from the exported markdown\r\n- **code block detection** and **syntax highlighting** for popular programming languages\r\n- HTML tables are converted to proper **markdown tables**\r\n\r\nBuilt-in web server:\r\n\r\n- use `--serve-markdown=<dir>` to start a **built-in HTTP server** that renders your markdown export as styled HTML\r\n  pages with tables, dark/light mode, breadcrumb navigation, and accordion support — perfect for browsing and sharing\r\n  the export locally or on a network\r\n\r\nStandalone HTML-to-Markdown conversion:\r\n\r\n- use `--html-to-markdown=<file>` to convert a **local HTML file** directly to Markdown without crawling any website\r\n- outputs clean Markdown to **stdout** (pipe-friendly) or to a file with `--html-to-markdown-output=<file>`\r\n- uses the same conversion pipeline as `--markdown-export-dir` — including all cleanup, accordion collapsing, code language detection, and implicit exclusions (cookie banners, `aria-hidden` elements, `role=\"menu\"` dropdowns)\r\n- respects `--markdown-disable-images`, `--markdown-disable-files`, `--markdown-exclude-selector`, and `--markdown-move-content-before-h1-to-end`\r\n- does **not** rewrite links (`.html` → `.md`) since the file is standalone with no site context\r\n\r\n💡 Tip: you can push the exported markdown folder to your GitHub repository, where it will be automatically rendered as a browsable\r\ndocumentation. You can look at the [examples](https://github.com/janreges/siteone-crawler-markdown-examples/) of converted websites to markdown.\r\n\r\nSee all available [markdown exporter options](#markdown-exporter-options) and [HTML-to-Markdown conversion options](#html-to-markdown-conversion).\r\n\r\n### 🗺️ Sitemap generator\r\n\r\n- will help you create a `sitemap.xml` and `sitemap.txt` for your website\r\n- you can set the priority of individual pages based on the number of slashes in the URL\r\n\r\nDon't hesitate and try it. You will love it as we do! ❤️\r\n\r\n## 🚀 Installation\r\n\r\n### 📦 Pre-built binaries\r\n\r\nDownload pre-built binaries from [🐙 GitHub releases](https://github.com/janreges/siteone-crawler/releases) for all major platforms (🐧 Linux, 🪟 Windows, 🍎 macOS, x64 & arm64).\r\n\r\nThe binary is self-contained — no runtime dependencies required.\r\n\r\n```bash\r\n# Linux / macOS — download, extract, run\r\n./siteone-crawler --url=https://my.domain.tld\r\n```\r\n\r\n**🐧 Linux binary variants:**\r\n\r\nFor Linux, two binary variants are provided:\r\n\r\n| Variant | Compatibility | Performance |\r\n|---------|--------------|-------------|\r\n| **glibc** (primary) | Requires glibc 2.39+ (Ubuntu 24.04+, Debian 13+, Fedora 40+) | Full native performance |\r\n| **musl** (compatible) | Any Linux distribution (statically linked, no dependencies) | ~50–80% slower due to musl memory allocator |\r\n\r\nThe **glibc** variant is recommended for current distributions — it offers the best performance. If you are running an older distribution (e.g. Ubuntu 22.04, Debian 12) and encounter a `GLIBC_2.xx not found` error, use the **musl** variant instead. The musl binary is fully statically linked and runs on any Linux system regardless of the installed glibc version. The performance difference is mainly noticeable during CPU-intensive operations like offline and markdown exports.\r\n\r\n**Note for macOS users**: In case that Mac refuses to start the crawler from your Download folder, move the entire folder with the Crawler **via the terminal** to another location, for example to the homefolder `~`.\r\n\r\n### 🍺 Homebrew (macOS / Linux)\r\n\r\n```bash\r\nbrew install janreges/tap/siteone-crawler\r\nsiteone-crawler --url=https://my.domain.tld\r\n```\r\n\r\n### 🐧 Debian / Ubuntu (apt)\r\n\r\n```bash\r\ncurl -1sLf 'https://dl.cloudsmith.io/public/janreges/siteone-crawler/setup.deb.sh' | sudo -E bash\r\nsudo apt-get install siteone-crawler\r\n```\r\n\r\n> **Older distributions (Ubuntu 22.04, Debian 11/12, etc.):** If you get a `GLIBC_X.XX not found` error, install the statically linked variant instead:\r\n> ```bash\r\n> sudo apt-get install siteone-crawler-static\r\n> ```\r\n> See [Linux binary variants](#-pre-built-binaries) for details on the performance difference.\r\n\r\n### 🎩 Fedora / RHEL (dnf)\r\n\r\n```bash\r\ncurl -1sLf 'https://dl.cloudsmith.io/public/janreges/siteone-crawler/setup.rpm.sh' | sudo -E bash\r\nsudo dnf install siteone-crawler\r\n```\r\n\r\n> **Older distributions:** If you get a `GLIBC_X.XX not found` error, use `sudo dnf install siteone-crawler-static` instead.\r\n> See [Linux binary variants](#-pre-built-binaries) for details.\r\n\r\n### 🦎 openSUSE / SLES (zypper)\r\n\r\n```bash\r\ncurl -1sLf 'https://dl.cloudsmith.io/public/janreges/siteone-crawler/setup.rpm.sh' | sudo -E bash\r\nsudo zypper install siteone-crawler\r\n```\r\n\r\n> **Older distributions:** If you get a `GLIBC_X.XX not found` error, use `sudo zypper install siteone-crawler-static` instead.\r\n> See [Linux binary variants](#-pre-built-binaries) for details.\r\n\r\n### 🏔️ Alpine Linux (apk)\r\n\r\n```bash\r\ncurl -1sLf 'https://dl.cloudsmith.io/public/janreges/siteone-crawler/setup.alpine.sh' | sudo -E bash\r\nsudo apk add siteone-crawler\r\n```\r\n\r\n### 🔨 Build from source\r\n\r\nRequires [Rust](https://www.rust-lang.org/tools/install) 1.85 or later.\r\n\r\n```bash\r\ngit clone https://github.com/janreges/siteone-crawler.git\r\ncd siteone-crawler\r\n\r\n# Build optimized release binary\r\ncargo build --release\r\n\r\n# Run\r\n./target/release/siteone-crawler --url=https://my.domain.tld\r\n```\r\n\r\n**Build statically linked (musl) binary:**\r\n\r\n```bash\r\n# Install musl toolchain (Ubuntu/Debian)\r\nsudo apt-get install musl-tools\r\nrustup target add x86_64-unknown-linux-musl\r\n\r\n# Build static binary (no system dependencies)\r\ncargo build --release --target x86_64-unknown-linux-musl\r\n\r\n# Run — works on any Linux distribution\r\n./target/x86_64-unknown-linux-musl/release/siteone-crawler --url=https://my.domain.tld\r\n```\r\n\r\n## ▶️ Usage\r\n\r\n### Interactive wizard\r\n\r\nRun the binary **without any arguments** and an interactive wizard will guide you through the\r\nconfiguration. Choose from 10 preset modes, enter the target URL, fine-tune settings with\r\narrow keys, and the crawler starts immediately — no need to remember CLI flags.\r\n\r\n```\r\n? Choose a crawl mode:\r\n❯ Quick Audit               Fast site health overview — crawls all pages and assets\r\n  SEO Analysis               Extract titles, descriptions, keywords, and OpenGraph tags\r\n  Performance Test           Measure response times with cache disabled — find bottlenecks\r\n  Security Check             Check SSL/TLS, security headers, and redirects site-wide\r\n  Offline Clone              Download entire website with all assets for offline browsing\r\n  Markdown Export            Convert pages to Markdown for AI models or documentation\r\n  Stress Test                High-concurrency load test with cache-busting random params\r\n  Single Page                Deep analysis of a single URL — SEO, security, performance\r\n  Large Site Crawl           High-throughput HTML-only crawl for large sites (100k+ pages)\r\n  Custom                     Start from defaults and configure every option manually\r\n  ──────────────────────────────────────\r\n  Browse offline export      Serve a previously exported offline site via HTTP\r\n  Browse markdown export     Serve a previously exported markdown site via HTTP\r\n[↑↓ to move, enter to select, type to filter]\r\n```\r\n\r\nAfter selecting a preset and entering the URL, the wizard shows a settings form where you can\r\nadjust workers, timeout, content types, export options, and more. A configuration summary with the\r\nequivalent CLI command is displayed before the crawl starts — copy it for future use without the\r\nwizard.\r\n\r\nIf existing offline or markdown exports are detected in `./tmp/`, the wizard also offers to\r\n**serve them via the built-in HTTP server** directly from the menu.\r\n\r\n### Basic example\r\n\r\nTo run the crawler from the command line, provide the required arguments:\r\n\r\n```bash\r\n./siteone-crawler --url=https://mydomain.tld/ --device=mobile\r\n```\r\n\r\n### CI/CD example\r\n\r\n```bash\r\n# Fail deployment if quality score < 7.0 or any 5xx errors\r\n./siteone-crawler --url=https://mydomain.tld/ --ci --ci-min-score=7.0 --ci-max-5xx=0\r\necho $?  # 0 = pass, 10 = fail\r\n```\r\n\r\n### Fully-featured example\r\n\r\n```bash\r\n./siteone-crawler --url=https://mydomain.tld/ \\\r\n  --output=text \\\r\n  --workers=2 \\\r\n  --max-reqs-per-sec=10 \\\r\n  --memory-limit=2048M \\\r\n  --resolve='mydomain.tld:443:127.0.0.1' \\\r\n  --timeout=5 \\\r\n  --proxy=proxy.mydomain.tld:8080 \\\r\n  --http-auth=myuser:secretPassword123 \\\r\n  --user-agent=\"My User-Agent String\" \\\r\n  --extra-columns=\"DOM,X-Cache(10),Title(40),Keywords(50),Description(50>),Heading1=xpath://h1/text()(20>),ProductPrice=regexp:/Price:\\s*\\$?(\\d+(?:\\.\\d{2})?)/i#1(10)\" \\\r\n  --accept-encoding=\"gzip, deflate\" \\\r\n  --url-column-size=100 \\\r\n  --max-queue-length=3000 \\\r\n  --max-visited-urls=10000 \\\r\n  --max-url-length=5000 \\\r\n  --max-non200-responses-per-basename=10 \\\r\n  --include-regex=\"/^.*\\/technologies.*/\" \\\r\n  --include-regex=\"/^.*\\/fashion.*/\" \\\r\n  --ignore-regex=\"/^.*\\/downloads\\/.*\\.pdf$/i\" \\\r\n  --analyzer-filter-regex=\"/^.*$/i\" \\\r\n  --remove-query-params \\\r\n  --keep-query-param=page \\\r\n  --add-random-query-params \\\r\n  --transform-url=\"live-site.com -> local-site.local\" \\\r\n  --transform-url=\"/cdn\\.live-site\\.com/ -> local-site.local/cdn\" \\\r\n  --show-scheme-and-host \\\r\n  --do-not-truncate-url \\\r\n  --output-html-report=tmp/myreport.html \\\r\n  --html-report-options=\"summary,seo-opengraph,visited-urls,security,redirects\" \\\r\n  --output-json-file=/dir/report.json \\\r\n  --output-text-file=/dir/report.txt \\\r\n  --add-timestamp-to-output-file \\\r\n  --add-host-to-output-file \\\r\n  --offline-export-dir=tmp/mydomain.tld \\\r\n  --replace-content='/<foo[^>]+>/ -> <bar>' \\\r\n  --ignore-store-file-error \\\r\n  --sitemap-xml-file=/dir/sitemap.xml \\\r\n  --sitemap-txt-file=/dir/sitemap.txt \\\r\n  --sitemap-base-priority=0.5 \\\r\n  --sitemap-priority-increase=0.1 \\\r\n  --markdown-export-dir=tmp/mydomain.tld.md \\\r\n  --markdown-export-single-file=tmp/mydomain.tld.combined.md \\\r\n  --markdown-move-content-before-h1-to-end \\\r\n  --markdown-disable-images \\\r\n  --markdown-disable-files \\\r\n  --markdown-remove-links-and-images-from-single-file \\\r\n  --markdown-exclude-selector='.exclude-me' \\\r\n  --markdown-replace-content='/<foo[^>]+>/ -> <bar>' \\\r\n  --markdown-replace-query-string='/[a-z]+=[^&]*(&|$)/i -> $1__$2' \\\r\n  --mail-to=your.name@my-mail.tld \\\r\n  --mail-to=your.friend.name@my-mail.tld \\\r\n  --mail-from=crawler@my-mail.tld \\\r\n  --mail-from-name=\"SiteOne Crawler\" \\\r\n  --mail-subject-template=\"Crawler Report for %domain% (%date%)\" \\\r\n  --mail-smtp-host=smtp.my-mail.tld \\\r\n  --mail-smtp-port=25 \\\r\n  --mail-smtp-user=smtp.user \\\r\n  --mail-smtp-pass=secretPassword123 \\\r\n  --ci --ci-min-score=7.0 --ci-min-security=8.0\r\n```\r\n\r\n## ⚙️ Arguments\r\n\r\nFor a clearer list, I recommend going to the documentation: 🌐 https://crawler.siteone.io/configuration/command-line-options/\r\n\r\n### Basic settings\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--url=<url>` | Required. HTTP or HTTPS URL address of the website or sitemap xml to be crawled.<br>Use quotation marks `''` if the URL contains query parameters. |\r\n| `--single-page` | Load only one page to which the URL is given (and its assets), but do not follow other pages. |\r\n| `--max-depth=<int>` | Maximum crawling depth (for pages, not assets). Default is `0` (no limit). `1` means `/about`<br>or `/about/`, `2` means `/about/contacts` etc. |\r\n| `--device=<val>` | Device type for choosing a predefined User-Agent. Ignored when `--user-agent` is defined.<br>Supported values: `desktop`, `mobile`, `tablet`. Default is `desktop`. |\r\n| `--user-agent=<val>` | Custom User-Agent header. Use quotation marks. If specified, it takes precedence over<br>the device parameter. If you add `!` at the end, the siteone-crawler/version will not be<br>added as a signature at the end of the final user-agent. |\r\n| `--timeout=<int>` | Request timeout in seconds. Default is `5`. |\r\n| `--proxy=<host:port>` | HTTP proxy to use in `host:port` format. Host can be hostname, IPv4 or IPv6. |\r\n| `--http-auth=<user:pass>` | Basic HTTP authentication in `username:password` format. |\r\n| `--config-file=<file>` | Load CLI options from a config file. One option per line, `#` comments allowed.<br>Without this flag, auto-discovers `~/.siteone-crawler.conf` or `/etc/siteone-crawler.conf`.<br>CLI arguments override config file values. |\r\n\r\n### Output settings\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--output=<val>` | Output type. Supported values: `text`, `json`. Default is `text`. |\r\n| `--extra-columns=<values>` | Comma delimited list of extra columns added to output table. You can specify HTTP headers<br>(e.g. `X-Cache`), predefined values (`Title`, `Keywords`, `Description`, `DOM`), or custom<br>extraction from text files (HTML, JS, CSS, TXT, JSON, XML, etc.) using XPath or regexp.<br>For custom extraction, use the format `Custom_column_name=method:pattern#group(length)`, where<br>`method` is `xpath` or `regexp`, `pattern` is the extraction pattern, an optional `#group` specifies the<br>capturing group (or node index for XPath) to return (defaulting to the entire match or first node), and an<br>optional `(length)` sets the maximum output length (append `>` to disable truncation).<br>For example, use `Heading1=xpath://h1/text()(20>)` to extract the text of the first H1 element<br>from the HTML document, and `ProductPrice=regexp:/Price:\\s*\\$?(\\d+(?:\\.\\d{2})?)/i#1(10)`<br>to extract a numeric price (e.g., \"29.99\") from a string like \"Price: $29.99\". |\r\n| `--url-column-size=<num>` | Basic URL column width. By default, it is calculated from the size of your terminal window. |\r\n| `--rows-limit=<num>` | Max. number of rows to display in tables with analysis results.<br>Default is `200`. |\r\n| `--timezone=<val>` | Timezone for datetimes in HTML reports and timestamps in output folders/files, e.g. `Europe/Prague`.<br>Default is `UTC`. |\r\n| `--do-not-truncate-url` | In the text output, long URLs are truncated by default to `--url-column-size` so the table does not<br>wrap due to long URLs. With this option, you can turn off the truncation. |\r\n| `--show-scheme-and-host` | On text output, show scheme and host also for origin domain URLs. |\r\n| `--hide-progress-bar` | Hide progress bar visible in text and JSON output for more compact view. |\r\n| `--hide-columns=<list>` | Hide specified columns from the progress table. Comma-separated list of column names:<br>`type`, `time`, `size`, `cache`. Example: `--hide-columns=cache` or `--hide-columns=cache,type`. |\r\n| `--no-color` | Disable colored output. |\r\n| `--force-color` | Force colored output regardless of support detection. |\r\n| `--show-inline-criticals` | Show criticals from the analyzer directly in the URL table. |\r\n| `--show-inline-warnings` | Show warnings from the analyzer directly in the URL table. |\r\n\r\n### Resource filtering\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--disable-all-assets` | Disables crawling of all assets and files and only crawls pages in href attributes.<br>Shortcut for calling all other `--disable-*` flags. |\r\n| `--disable-javascript` | Disables JavaScript downloading and removes all JavaScript code from HTML,<br>including `onclick` and other `on*` handlers. |\r\n| `--disable-styles` | Disables CSS file downloading and at the same time removes all style definitions<br>by `<style>` tag or inline by style attributes. |\r\n| `--disable-fonts` | Disables font downloading and also removes all font/font-face definitions from CSS. |\r\n| `--disable-images` | Disables downloading of all images and replaces found images in HTML with placeholder image only. |\r\n| `--disable-files` | Disables downloading of any files (typically downloadable documents) to which various links point. |\r\n| `--remove-all-anchor-listeners` | On all links on the page remove any event listeners. Useful on some types of sites with modern<br>JS frameworks that would like to compose content dynamically (React, Svelte, Vue, Angular, etc.). |\r\n\r\n### Advanced crawler settings\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--workers=<int>` | Maximum number of concurrent workers (threads).<br>Crawler will not make more simultaneous requests to the server than this number.<br>Use carefully! A high number of workers can cause a DoS attack. Default is `3`. |\r\n| `--max-reqs-per-sec=<val>` | Max requests/s for whole crawler. Be careful not to cause a DoS attack. Default value is `10`. |\r\n| `--memory-limit=<size>` | Memory limit in units `M` (Megabytes) or `G` (Gigabytes). Default is `2048M`. |\r\n| `--resolve=<host:port:ip>` | Custom DNS resolution in `domain:port:ip` format. Same as [curl --resolve](https://everything.curl.dev/usingcurl/connections/name.html?highlight=resolve#provide-a-custom-ip-address-for-a-name).<br>Can be specified multiple times. |\r\n| `--allowed-domain-for-external-files=<domain>` | Enable loading of file content from another domain (e.g. CDN).<br>Can be specified multiple times. Use `*` for all domains. |\r\n| `--allowed-domain-for-crawling=<domain>` | Allow crawling of other listed domains — typically language mutations on other domains.<br>Can be specified multiple times. Use wildcards like `*.mysite.tld`. |\r\n| `--single-foreign-page` | When crawling of other domains is allowed, ensures that only the linked page<br>and its assets are crawled from foreign domains. |\r\n| `--include-regex=<regex>` | PCRE-compatible regular expression for URLs that should be included.<br>Can be specified multiple times. Example: `--include-regex='/^\\/public\\//'` |\r\n| `--ignore-regex=<regex>` | PCRE-compatible regular expression for URLs that should be ignored.<br>Can be specified multiple times. |\r\n| `--regex-filtering-only-for-pages` | Apply `*-regex` rules only to page URLs, not static assets. |\r\n| `--analyzer-filter-regex` | PCRE-compatible regular expression for filtering analyzers by name. |\r\n| `--accept-encoding=<val>` | Custom `Accept-Encoding` request header. Default is `gzip, deflate, br`. |\r\n| `--remove-query-params` | Remove query parameters from found URLs. |\r\n| `--keep-query-param=<name>` | Keep only the specified query parameter(s) in discovered URLs; all others are removed.<br>Can be specified multiple times. If `--remove-query-params` is also set, all parameters<br>are removed regardless. |\r\n| `--add-random-query-params` | Add random query parameters to each URL to bypass caches. |\r\n| `--transform-url=<from->to>` | Transform URLs before crawling. Use `from -> to` for simple replacement or `/regex/ -> replacement`.<br>Can be specified multiple times. |\r\n| `--force-relative-urls` | Normalize all discovered URLs matching the initial domain (incl. www variant and protocol<br>differences) to canonical form. Prevents duplicate files in offline export when the site<br>uses inconsistent URL formats (http/https, www/non-www). |\r\n| `--ignore-robots-txt` | Ignore robots.txt content. |\r\n| `--http-cache-dir=<dir>` | Cache dir for HTTP responses. Disable with `--http-cache-dir='off'` or `--no-cache`.<br>Default is `~/.cache/siteone-crawler/http-cache` (XDG-compliant, respects `$XDG_CACHE_HOME`). |\r\n| `--http-cache-compression` | Enable compression for HTTP cache storage. |\r\n| `--http-cache-ttl=<val>` | TTL for HTTP cache entries (e.g. `1h`, `7d`, `30m`). Use `0` for infinite. Default is `24h`. |\r\n| `--no-cache` | Disable HTTP cache completely. Shortcut for `--http-cache-dir='off'`. |\r\n| `--max-queue-length=<num>` | Maximum length of the waiting URL queue. Default is `9000`. |\r\n| `--max-visited-urls=<num>` | Maximum number of visited URLs. Default is `10000`. |\r\n| `--max-skipped-urls=<num>` | Maximum number of skipped URLs. Default is `10000`. |\r\n| `--max-url-length=<num>` | Maximum supported URL length in chars. Default is `2083`. |\r\n| `--max-non200-responses-per-basename=<num>` | Protection against looping with dynamic non-200 URLs. Default is `5`. |\r\n\r\n### File export settings\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--output-html-report=<file>` | Save HTML report into that file. Set to empty `''` to disable HTML report.<br>By default saved into `tmp/%domain%.report.%datetime%.html`. |\r\n| `--html-report-options=<sections>` | Comma-separated list of sections to include in HTML report.<br>Available sections: `summary`, `seo-opengraph`, `image-gallery`, `video-gallery`, `visited-urls`, `dns-ssl`, `crawler-stats`, `crawler-info`, `headers`, `content-types`, `skipped-urls`, `external-links`, `caching`, `best-practices`, `accessibility`, `security`, `redirects`, `404-pages`, `slowest-urls`, `fastest-urls`, `source-domains`.<br>Default: all sections. |\r\n| `--output-json-file=<file>` | File path for JSON output. Set to empty `''` to disable JSON file.<br>By default saved into `tmp/%domain%.output.%datetime%.json`.<br>See [JSON Output Documentation](docs/JSON-OUTPUT.md) for format details. |\r\n| `--output-text-file=<file>` | File path for TXT output. Set to empty `''` to disable TXT file.<br>By default saved into `tmp/%domain%.output.%datetime%.txt`.<br>See [Text Output Documentation](docs/TEXT-OUTPUT.md) for format details. |\r\n| `--add-timestamp-to-output-file` | Append timestamp to output filenames (HTML report, JSON, TXT) except sitemaps. |\r\n| `--add-host-to-output-file` | Append initial URL host to output filenames (HTML report, JSON, TXT) except sitemaps. |\r\n\r\n**Default output directory:** Report files are saved into `./tmp/` in the current working directory. If `./tmp/` cannot be created (e.g. read-only filesystem), the crawler falls back to the platform's XDG data directory (`~/.local/share/siteone-crawler/` on Linux, `~/Library/Application Support/siteone-crawler/` on macOS, `%APPDATA%\\siteone-crawler\\` on Windows) and prints a notice to stderr.\r\n\r\n### Mailer options\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--mail-to=<email>` | Recipients of HTML e-mail reports. Required for mailer activation.<br>You can specify multiple emails separated by comma. |\r\n| `--mail-from=<email>` | E-mail sender address. Default is `siteone-crawler@your-hostname.com`. |\r\n| `--mail-from-name=<val>` | E-mail sender name. Default is `SiteOne Crawler`. |\r\n| `--mail-subject-template=<val>` | E-mail subject template. You can use `%domain%`, `%date%` and `%datetime%`.<br>Default is `Crawler Report for %domain% (%date%)`. |\r\n| `--mail-smtp-host=<host>` | SMTP host for sending emails. Default is `localhost`. |\r\n| `--mail-smtp-port=<port>` | SMTP port for sending emails. Default is `25`. |\r\n| `--mail-smtp-user=<user>` | SMTP user, if your SMTP server requires authentication. |\r\n| `--mail-smtp-pass=<pass>` | SMTP password, if your SMTP server requires authentication. |\r\n\r\n### Upload options\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--upload` | Enable HTML report upload to `--upload-to`. |\r\n| `--upload-to=<url>` | URL of the endpoint where to send the HTML report. Default is `https://crawler.siteone.io/up`. |\r\n| `--upload-retention=<val>` | How long should the HTML report be kept in the online version?<br>Values: 1h / 4h / 12h / 24h / 3d / 7d / 30d / 365d / forever.<br>Default is `30d`. |\r\n| `--upload-password=<val>` | Optional password (user will be 'crawler') to display the online HTML report. |\r\n| `--upload-timeout=<int>` | Upload timeout in seconds. Default is `3600`. |\r\n\r\n### Offline exporter options\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--offline-export-dir=<dir>` | Path to directory where to save the offline version of the website. |\r\n| `--offline-export-store-only-url-regex=<regex>` | Debug: store only URLs matching these PCRE regexes. Can be specified multiple times. |\r\n| `--offline-export-remove-unwanted-code=<1/0>` | Remove unwanted code for offline mode (analytics, social networks, etc.). Default is `1`. |\r\n| `--offline-export-no-auto-redirect-html` | Disable automatic creation of redirect HTML files for subfolders containing `index.html`. |\r\n| `--offline-export-preserve-url-structure` | Preserve the original URL path structure. E.g. `/about` is stored as `about/index.html`<br>instead of `about.html`. Useful for web server deployment where the clone should maintain<br>the same URL hierarchy as the original site. |\r\n| `--offline-export-preserve-urls` | Preserve original URL format in exported HTML/CSS/JS — same-domain links become root-relative (`/path`), cross-domain links stay absolute. Ideal for processing with [siteone-chunker](https://github.com/janreges/siteone-chunker) and RAG pipelines where links must resolve to the production website. |\r\n| `--replace-content=<val>` | Replace content in HTML/JS/CSS with `foo -> bar` or PCRE regexp.<br>Can be specified multiple times. |\r\n| `--replace-query-string=<val>` | Replace characters in query string filenames.<br>Can be specified multiple times. |\r\n| `--offline-export-lowercase` | Convert all filenames to lowercase for offline export. Useful for case-insensitive filesystems. |\r\n| `--ignore-store-file-error` | Ignore any file storing errors and continue. |\r\n| `--disable-astro-inline-modules` | Disable inlining of Astro module scripts for offline export.<br>Scripts will remain as external files with corrected relative paths. |\r\n\r\n### Markdown exporter options\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--markdown-export-dir=<dir>` | Path to directory where to save the markdown version of the website. |\r\n| `--markdown-export-single-file=<file>` | Path to a file for combined markdown. Requires `--markdown-export-dir`. |\r\n| `--markdown-move-content-before-h1-to-end` | Move content before main H1 heading to the end of the markdown. |\r\n| `--markdown-disable-images` | Do not export and show images in markdown files. |\r\n| `--markdown-disable-files` | Do not export files other than HTML/CSS/JS/fonts/images (e.g. PDF, ZIP). |\r\n| `--markdown-remove-links-and-images-from-single-file` | Remove links and images from combined single file. |\r\n| `--markdown-exclude-selector=<val>` | Exclude DOM elements by CSS selector from markdown export.<br>Can be specified multiple times. |\r\n| `--markdown-replace-content=<val>` | Replace text content with `foo -> bar` or PCRE regexp.<br>Can be specified multiple times. |\r\n| `--markdown-replace-query-string=<val>` | Replace characters in query string filenames.<br>Can be specified multiple times. |\r\n| `--markdown-export-store-only-url-regex=<regex>` | Debug: store only URLs matching these PCRE regexes. Can be specified multiple times. |\r\n| `--markdown-ignore-store-file-error` | Ignore any file storing errors and continue. |\r\n\r\n### Sitemap options\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--sitemap-xml-file=<file>` | File path for generated XML Sitemap. Extension `.xml` added if not specified. |\r\n| `--sitemap-txt-file=<file>` | File path for generated TXT Sitemap. Extension `.txt` added if not specified. |\r\n| `--sitemap-base-priority=<num>` | Base priority for XML sitemap. Default is `0.5`. |\r\n| `--sitemap-priority-increase=<num>` | Priority increase based on slashes in URL. Default is `0.1`. |\r\n\r\n### Expert options\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--debug` | Activate debug mode. |\r\n| `--debug-log-file=<file>` | Log file for debug messages. When set without `--debug`, logging is active without visible output. |\r\n| `--debug-url-regex=<regex>` | Regex for URL(s) to debug. Can be specified multiple times. |\r\n| `--result-storage=<val>` | Result storage type. Values: `memory` or `file`. Use `file` for large websites. Default is `memory`. |\r\n| `--result-storage-dir=<dir>` | Directory for `--result-storage=file`. Default is `tmp/result-storage`. |\r\n| `--result-storage-compression` | Enable compression for results storage. |\r\n| `--http-cache-dir=<dir>` | Cache dir for HTTP responses. Disable with `--http-cache-dir='off'` or `--no-cache`.<br>Default is `~/.cache/siteone-crawler/http-cache` (XDG-compliant, respects `$XDG_CACHE_HOME`). |\r\n| `--http-cache-compression` | Enable compression for HTTP cache storage. |\r\n| `--http-cache-ttl=<val>` | TTL for HTTP cache entries (e.g. `1h`, `7d`, `30m`). Use `0` for infinite. Default is `24h`. |\r\n| `--websocket-server=<host:port>` | Start crawler with websocket server on given host:port. |\r\n| `--console-width=<int>` | Enforce a fixed console width, disabling automatic detection. |\r\n\r\n### Fastest URL analyzer\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--fastest-urls-top-limit=<int>` | Number of URLs in TOP fastest list. Default is `20`. |\r\n| `--fastest-urls-max-time=<val>` | Maximum response time for an URL to be considered fast. Default is `1`. |\r\n\r\n### SEO and OpenGraph analyzer\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--max-heading-level=<int>` | Max heading level from 1 to 6 for analysis. Default is `3`. |\r\n\r\n### Slowest URL analyzer\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--slowest-urls-top-limit=<int>` | Number of URLs in TOP slowest list. Default is `20`. |\r\n| `--slowest-urls-min-time=<val>` | Minimum response time threshold for slow URLs. Default is `0.01`. |\r\n| `--slowest-urls-max-time=<val>` | Maximum response time for very slow evaluation. Default is `3`. |\r\n\r\n### Built-in HTTP server\r\n\r\nBrowse exported markdown or offline HTML files through a local web server with a built-in viewer.\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--serve-markdown=<dir>` | Start built-in HTTP server for browsing a markdown export directory.<br>Renders `.md` files as styled HTML with tables, accordions, dark/light mode, and breadcrumb navigation. |\r\n| `--serve-offline=<dir>` | Start built-in HTTP server for browsing an offline HTML export directory.<br>Serves static files with Content-Security-Policy restricting assets to the same origin. |\r\n| `--serve-port=<int>` | Port for the built-in HTTP server. Default is `8321`. |\r\n| `--serve-bind-address=<addr>` | Bind address for the built-in HTTP server. Default is `127.0.0.1` (localhost only).<br>Use `0.0.0.0` to listen on all network interfaces and their IP addresses. |\r\n\r\n**Example:**\r\n\r\n```bash\r\n# Browse markdown export\r\n./siteone-crawler --serve-markdown=./exports/markdown\r\n\r\n# Browse offline export on custom port, accessible from network\r\n./siteone-crawler --serve-offline=./exports/offline --serve-port=9000 --serve-bind-address=0.0.0.0\r\n```\r\n\r\n### HTML-to-Markdown conversion\r\n\r\nConvert a local HTML file to clean Markdown without crawling. Uses the same conversion pipeline as the markdown exporter.\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--html-to-markdown=<file>` | Convert a local HTML file to Markdown and print to stdout. No crawling is performed.<br>Respects `--markdown-disable-images`, `--markdown-disable-files`, `--markdown-move-content-before-h1-to-end`, and `--markdown-exclude-selector`. |\r\n| `--html-to-markdown-output=<file>` | Write the converted Markdown to a file instead of stdout. Requires `--html-to-markdown`. |\r\n\r\n**Examples:**\r\n\r\n```bash\r\n# Convert HTML file to Markdown (printed to stdout)\r\n./siteone-crawler --html-to-markdown=page.html\r\n\r\n# Convert and save to a file\r\n./siteone-crawler --html-to-markdown=page.html --html-to-markdown-output=page.md\r\n\r\n# Convert with options: remove images, exclude navigation, move header below h1\r\n./siteone-crawler --html-to-markdown=page.html \\\r\n  --markdown-disable-images \\\r\n  --markdown-exclude-selector=nav \\\r\n  --markdown-move-content-before-h1-to-end\r\n\r\n# Pipe to other tools (e.g. clipboard, AI, wc)\r\n./siteone-crawler --html-to-markdown=page.html | pbcopy\r\n./siteone-crawler --html-to-markdown=page.html | wc -l\r\n```\r\n\r\n### CI/CD settings\r\n\r\n| Parameter | Description |\r\n|-----------|-------------|\r\n| `--ci` | Enable CI/CD quality gate. Crawler exits with code 10 if thresholds are not met. Default file outputs (HTML, JSON, TXT reports) are suppressed unless explicitly requested via `--output-*` options. |\r\n| `--ci-min-score=<val>` | Minimum overall quality score (0.0-10.0). Default is `5.0`. |\r\n| `--ci-min-performance=<val>` | Minimum Performance category score (0.0-10.0). Default is `5.0`. |\r\n| `--ci-min-seo=<val>` | Minimum SEO category score (0.0-10.0). Default is `5.0`. |\r\n| `--ci-min-security=<val>` | Minimum Security category score (0.0-10.0). Default is `5.0`. |\r\n| `--ci-min-accessibility=<val>` | Minimum Accessibility category score (0.0-10.0). Default is `3.0`. |\r\n| `--ci-min-best-practices=<val>` | Minimum Best Practices category score (0.0-10.0). Default is `5.0`. |\r\n| `--ci-max-404=<int>` | Maximum number of 404 responses allowed. Default is `0`. |\r\n| `--ci-max-5xx=<int>` | Maximum number of 5xx server error responses allowed. Default is `0`. |\r\n| `--ci-max-criticals=<int>` | Maximum number of critical analysis findings allowed. Default is `0`. |\r\n| `--ci-max-warnings=<int>` | Maximum number of warning analysis findings allowed. Not checked by default. |\r\n| `--ci-max-avg-response=<val>` | Maximum average response time in seconds. Not checked by default. |\r\n| `--ci-min-pages=<int>` | Minimum number of HTML pages that must be found. Default is `10`. |\r\n| `--ci-min-assets=<int>` | Minimum number of assets (JS, CSS, images, fonts) that must be found. Default is `10`. |\r\n| `--ci-min-documents=<int>` | Minimum number of documents (PDF, etc.) that must be found. Default is `0` (not checked). |\r\n\r\n**Default behavior with `--ci` alone:** overall score >= 5.0, each category score >= 5.0 (Performance, SEO, Security, Best Practices) and Accessibility >= 3.0, 404 errors <= 0, 5xx errors <= 0, critical findings <= 0, HTML pages >= 10, assets >= 10. File outputs (HTML, JSON, TXT reports) are not generated. To save reports in CI mode, specify the desired output explicitly, e.g. `--ci --output-html-report=report.html`.\r\n\r\n## 🏆 Quality Scoring\r\n\r\nThe crawler automatically calculates a quality score (0.0-10.0) across 5 weighted categories:\r\n\r\n| Category | Weight | What it measures |\r\n|----------|--------|------------------|\r\n| **Performance** | 20% | Response times, slow URLs |\r\n| **SEO** | 20% | Missing H1, title uniqueness, meta descriptions, 404s, redirects |\r\n| **Security** | 25% | SSL/TLS certificates, security headers, unsafe protocols |\r\n| **Accessibility** | 20% | Lang attribute, image alt text, form labels, ARIA, heading levels |\r\n| **Best Practices** | 15% | Duplicate/large SVGs, deep DOM, Brotli/WebP support |\r\n\r\nThe overall score is a weighted average of all categories. Scores are displayed in a colored box in the console output and included in JSON and HTML report outputs.\r\n\r\nScore labels:\r\n- **9.0-10.0** — Excellent (green)\r\n- **7.0-8.9** — Good (blue)\r\n- **5.0-6.9** — Fair (yellow)\r\n- **3.0-4.9** — Poor (purple)\r\n- **0.0-2.9** — Critical (red)\r\n\r\n## 🔄 CI/CD Integration\r\n\r\nThe `--ci` flag enables a quality gate that evaluates configurable thresholds after crawling completes. When any threshold is not met, the crawler exits with **code 10** (distinct from exit code 1 for runtime errors). In CI mode, default file outputs (HTML, JSON, TXT reports) are automatically suppressed — only the console output and exit code matter. If you need report files in CI, specify them explicitly (e.g. `--output-html-report=report.html`).\r\n\r\n**Bonus: Cache warming** — running the crawler as a post-deployment step in your CI/CD pipeline crawls every page and asset on your site, which populates the HTML/asset cache on your **reverse proxy** (Varnish, Nginx) or **CDN** (Cloudflare, CloudFront). This way, the first real visitors always hit a warm cache instead of cold origin requests.\r\n\r\n### Exit codes\r\n\r\n| Code | Meaning |\r\n|------|---------|\r\n| `0` | Success (with `--ci` this also means all quality thresholds passed) |\r\n| `1` | Runtime error |\r\n| `2` | Help/version displayed |\r\n| `3` | No pages crawled (e.g. DNS failure, timeout, connection refused) |\r\n| `10` | CI/CD quality gate failed |\r\n| `101` | Configuration error |\r\n\r\n### Example: GitHub Actions\r\n\r\n```yaml\r\n- name: Check website quality\r\n  run: |\r\n    ./siteone-crawler \\\r\n      --url=https://staging.example.com \\\r\n      --ci \\\r\n      --ci-min-score=7.0 \\\r\n      --ci-min-security=8.0 \\\r\n      --ci-max-404=0 \\\r\n      --ci-max-5xx=0\r\n```\r\n\r\n### Example: GitLab CI\r\n\r\n```yaml\r\nquality_check:\r\n  script:\r\n    - ./siteone-crawler --url=$STAGING_URL --ci --ci-min-score=6.0\r\n  allow_failure: false\r\n```\r\n\r\n### Console output\r\n\r\nWhen `--ci` is enabled, a quality gate box is displayed after the quality scores:\r\n\r\n```\r\n╔══════════════════════════════════════════════════════════════╗\r\n║                      CI/CD QUALITY GATE                      ║\r\n╠══════════════════════════════════════════════════════════════╣\r\n║  [PASS] Overall score: 7.2 >= 5                              ║\r\n║  [PASS] 404 errors: 0 <= 0                                   ║\r\n║  [PASS] 5xx errors: 0 <= 0                                   ║\r\n║  [FAIL] Critical findings: 2 > 0 (max: 0)                    ║\r\n╠══════════════════════════════════════════════════════════════╣\r\n║  RESULT: FAIL (1 of 4 checks failed) — exit code 10          ║\r\n╚══════════════════════════════════════════════════════════════╝\r\n```\r\n\r\n### JSON output\r\n\r\nWhen using `--output=json --ci`, the JSON includes a `ciGate` object:\r\n\r\n```json\r\n{\r\n  \"ciGate\": {\r\n    \"passed\": false,\r\n    \"exitCode\": 10,\r\n    \"checks\": [\r\n      {\"metric\": \"Overall score\", \"operator\": \">=\", \"threshold\": 5.0, \"actual\": 7.2, \"passed\": true},\r\n      {\"metric\": \"404 errors\", \"operator\": \"<=\", \"threshold\": 0.0, \"actual\": 0.0, \"passed\": true},\r\n      {\"metric\": \"Critical findings\", \"operator\": \"<=\", \"threshold\": 0.0, \"actual\": 2.0, \"passed\": false}\r\n    ]\r\n  }\r\n}\r\n```\r\n\r\n## 📄 Output Examples\r\n\r\nTo understand the richness of the data provided by the crawler, you can examine real output examples generated from crawling `crawler.siteone.io`:\r\n\r\n*   **Text Output Example:** [`docs/OUTPUT-crawler.siteone.io.txt`](docs/OUTPUT-crawler.siteone.io.txt)\r\n    *   Provides a human-readable summary suitable for quick review.\r\n    *   See the detailed [Text Output Documentation](docs/TEXT-OUTPUT.md).\r\n*   **JSON Output Example:** [`docs/OUTPUT-crawler.siteone.io.json`](docs/OUTPUT-crawler.siteone.io.json)\r\n    *   Provides structured data ideal for programmatic consumption and detailed analysis.\r\n    *   See the detailed [JSON Output Documentation](docs/JSON-OUTPUT.md).\r\n\r\nThese examples showcase the various tables and metrics generated, demonstrating the tool's capabilities in analyzing website structure, performance, SEO, security, and more.\r\n\r\n## 🧪 Testing\r\n\r\n```bash\r\ncargo test                                       # unit tests + offline integration tests\r\ncargo test --test integration_crawl -- --ignored --test-threads=1  # network integration tests (crawls crawler.siteone.io)\r\n```\r\n\r\nUnit tests live in each source file (`#[cfg(test)] mod tests`). Integration tests are in `tests/integration_crawl.rs` — network-dependent tests are `#[ignore]` by default so that `cargo test` stays fast and offline.\r\n\r\n## ⚠️ Disclaimer\r\n\r\nPlease use responsibly and ensure that you have the necessary permissions when crawling websites. Some sites may have\r\nrules against automated access detailed in their robots.txt.\r\n\r\n**The author is not responsible for any consequences caused by inappropriate use or deliberate misuse of this tool.**\r\n\r\n## 📜 License\r\n\r\nThis work is licensed under a [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) license.\r\n\r\n## Powered by\r\n\r\n[![Hosted By: Cloudsmith](https://img.shields.io/badge/OSS%20hosting%20by-cloudsmith-blue?logo=cloudsmith&style=for-the-badge)](https://cloudsmith.com)\r\n\r\nPackage repository hosting is graciously provided by  [Cloudsmith](https://cloudsmith.com).\r\nCloudsmith is the only fully hosted, cloud-native, universal package management solution, that\r\nenables your organization to create, store and share packages in any format, to any place, with total\r\nconfidence.\r\n\r\n[![PhpStorm logo.](https://resources.jetbrains.com/storage/products/company/brand/logos/PhpStorm.svg)](https://jb.gg/OpenSourceSupport)\r\n"
  },
  {
    "path": "docs/JSON-OUTPUT.md",
    "content": "# SiteOne Crawler: JSON Output Documentation\n\n## Table of Contents\n\n*   [1. Introduction](#1-introduction)\n*   [2. Potential Use Cases](#2-potential-use-cases)\n*   [3. Detailed JSON Structure](#3-detailed-json-structure)\n    *   [3.1. `crawler` (Object)](#31-crawler-object)\n    *   [3.2. `extraColumnsFromAnalysis` (Array)](#32-extracolumnsfromanalysis-array)\n    *   [3.3. `options` (Object)](#33-options-object)\n    *   [3.4. `qualityScores` (Object)](#34-qualityscores-object)\n    *   [3.5. `results` (Array)](#35-results-array)\n    *   [3.6. `stats` (Object)](#36-stats-object)\n    *   [3.7. `summary` (Object)](#37-summary-object)\n    *   [3.8. `tables` (Object)](#38-tables-object)\n*   [4. JSON Schema (Draft)](#4-json-schema-draft)\n*   [5. Analysis Tables Description (`tables` key)](#5-analysis-tables-description-tables-key)\n    *   [5.1. `skipped-summary` (Skipped URLs Summary)](#51-skipped-summary-skipped-urls-summary)\n    *   [5.2. `skipped` (Skipped URLs)](#52-skipped-skipped-urls)\n    *   [5.3. `redirects` (Redirected URLs)](#53-redirects-redirected-urls)\n    *   [5.4. `404` (404 URLs)](#54-404-404-urls)\n    *   [5.5. `certificate-info` (SSL/TLS info)](#55-certificate-info-ssltls-info)\n    *   [5.6. `fastest-urls` (TOP fastest URLs)](#56-fastest-urls-top-fastest-urls)\n    *   [5.7. `slowest-urls` (TOP slowest URLs)](#57-slowest-urls-top-slowest-urls)\n    *   [5.8. `seo` (SEO metadata)](#58-seo-seo-metadata)\n    *   [5.9. `open-graph` (OpenGraph metadata)](#59-open-graph-opengraph-metadata)\n    *   [5.10. `seo-headings` (Heading structure)](#510-seo-headings-heading-structure)\n    *   [5.11. `headers` (HTTP headers)](#511-headers-http-headers)\n    *   [5.12. `headers-values` (HTTP header values)](#512-headers-values-http-header-values)\n    *   [5.13. `caching-per-content-type` (HTTP Caching by content type)](#513-caching-per-content-type-http-caching-by-content-type)\n    *   [5.14. `caching-per-domain` (HTTP Caching by domain)](#514-caching-per-domain-http-caching-by-domain)\n    *   [5.15. `caching-per-domain-and-content-type` (HTTP Caching by domain and content type)](#515-caching-per-domain-and-content-type-http-caching-by-domain-and-content-type)\n    *   [5.16. `non-unique-titles` (TOP non-unique titles)](#516-non-unique-titles-top-non-unique-titles)\n    *   [5.17. `non-unique-descriptions` (TOP non-unique descriptions)](#517-non-unique-descriptions-top-non-unique-descriptions)\n    *   [5.18. `best-practices` (Best practices)](#518-best-practices-best-practices)\n    *   [5.19. `accessibility` (Accessibility)](#519-accessibility-accessibility)\n    *   [5.20. `source-domains` (Source domains)](#520-source-domains-source-domains)\n    *   [5.21. `content-types` (Content types)](#521-content-types-content-types)\n    *   [5.22. `content-types-raw` (Content types (MIME types))](#522-content-types-raw-content-types-mime-types)\n    *   [5.23. `dns` (DNS info)](#523-dns-dns-info)\n    *   [5.24. `security` (Security)](#524-security-security)\n    *   [5.25. `analysis-stats` (Analysis stats)](#525-analysis-stats-analysis-stats)\n    *   [5.26. `content-processors-stats` (Content processor stats)](#526-content-processors-stats-content-processor-stats)\n    *   [5.27. `external-urls` (External URLs)](#527-external-urls-external-urls)\n*   [6. Note on Text Output](#6-note-on-text-output)\n\n\nThis document describes the structure and content of the JSON output file generated by the SiteOne Crawler. This JSON file contains detailed information about the crawled website, including metadata about the crawl process, results for each visited URL, quality scores, summary findings, and various analysis tables.\n\n## 1. Introduction\n\nThe JSON output provides a comprehensive dataset about the crawled website. Key information includes:\n\n*   **Crawl Metadata:** Details about the crawler execution, such as version, execution time, command used, hostname, and the final user agent.\n*   **Options:** A complete record of all CLI configuration values used for the crawl.\n*   **Quality Scores:** Overall and per-category quality scores (0-10) with deduction details.\n*   **Visited URL Results:** For each URL visited during the crawl:\n    *   URL address\n    *   HTTP status code\n    *   Elapsed time for the request (performance)\n    *   Size of the response body\n    *   Content type (HTML, CSS, JS, Image, etc.)\n    *   Caching information (cache flags, lifetime)\n    *   Additional analysis results stored in the `extras` field.\n*   **Stats:** Aggregate statistics about the crawl (total URLs, sizes, timings, status code counts).\n*   **Summary:** A list of findings (OK, Warning, Critical, Info) that feed into quality scoring.\n*   **Analysis Tables:** Aggregated data and specific findings presented in structured tables:\n    *   **Skipped URLs:** Reasons why certain URLs were not crawled (e.g., external domain, disallowed by robots.txt, specific rules).\n    *   **Redirects:** List of URLs that resulted in redirects (3xx status codes).\n    *   **404 Errors:** List of URLs that resulted in a 404 Not Found status.\n    *   **SSL/TLS Info:** Details about the website's SSL certificate (issuer, subject, validity dates, supported protocols).\n    *   **Performance:** Tables listing the fastest and slowest URLs encountered during the crawl.\n    *   **SEO &amp; Content:**\n        *   SEO metadata (title, description, keywords, H1, indexing directives) for HTML pages.\n        *   OpenGraph and Twitter Card metadata.\n        *   Heading structure analysis (correctness of H1-H6 hierarchy).\n        *   Analysis of non-unique titles and descriptions across pages.\n    *   **Technical Details:**\n        *   HTTP Headers: Summary of headers found, their occurrences, and unique values.\n        *   Caching Analysis: Breakdown of caching strategies by content type and domain.\n        *   DNS Information: DNS resolution details for the target domain.\n        *   Security Analysis: Evaluation of security-related HTTP headers.\n        *   External URLs: List of external URLs discovered during the crawl.\n    *   **Crawler Statistics:** Performance metrics for the crawler itself, individual analyzers, and content processors.\n\n## 2. Potential Use Cases\n\nThe detailed data within the JSON output enables a wide variety of use cases:\n\n1.  **Comprehensive SEO Audits:** Analyze titles, descriptions, heading structures, indexing status, and OpenGraph tags across the entire site.\n2.  **Performance Monitoring &amp; Optimization:** Identify the slowest pages and resources, analyze load times, and check caching headers.\n3.  **Broken Link Checking:** Easily extract lists of all 404 errors and the pages where they were found.\n4.  **Redirect Chain Analysis:** Identify and analyze redirect chains.\n5.  **Security Header Audits:** Verify the implementation of crucial security headers (CSP, HSTS, X-Frame-Options, etc.) across the site.\n6.  **Content Inventory &amp; Analysis:** Get a list of all crawled resources, their types, sizes, and status codes. Analyze content type distribution.\n7.  **Website Archiving/Cloning:** While the crawler has a dedicated offline export, the JSON contains the list of all discovered resources, which could inform a custom archiving process.\n8.  **Competitive Analysis:** Run the crawler on competitor sites (respecting their `robots.txt`) to gather insights into their structure, performance, and technology.\n9.  **CI/CD Integration:** Integrate the crawler into deployment pipelines to automatically check for new errors (404s, performance regressions) after deployments. Use quality scores and thresholds for automated pass/fail decisions.\n10. **Technical Debt Assessment:** Identify outdated practices, missing security headers, or performance issues that need addressing.\n\n## 3. Detailed JSON Structure\n\nThe JSON output has 8 top-level keys:\n\n### 3.1. `crawler` (Object)\n\nContains metadata about the crawler execution:\n*   `name` (String): Name of the crawler software.\n*   `version` (String): Version of the crawler.\n*   `executedAt` (String): Timestamp when the crawl was executed, in the format `\"YYYY-MM-DD HH:MM:SS\"` (space separator, no timezone). Example: `\"2026-03-16 14:55:13\"`.\n*   `command` (String): The command-line arguments used to run the crawl.\n*   `hostname` (String): The hostname where the crawler was run.\n*   `finalUserAgent` (String): The User-Agent string used for the HTTP requests.\n\n### 3.2. `extraColumnsFromAnalysis` (Array)\n\nAn array of objects defining extra columns that might be added during specific analyses. These are primarily intended for augmenting report outputs. Each object contains:\n*   `name` (String): The display name of the column.\n*   `length` (Integer): Suggested display length/width.\n*   `truncate` (Boolean): Whether the content should be truncated if it exceeds the length.\n*   `customMethod`, `customPattern`, `customGroup`: Fields used for custom data extraction logic (null when not configured).\n\n### 3.3. `options` (Object)\n\nA flat object containing all 132 CLI configuration values used for the crawl. Every option from the command line (or its default value) is recorded here. Keys are the option names in camelCase (e.g., `url`, `workers`, `maxReqsPerSec`, `timeout`, `outputType`, `userAgent`, `acceptEncoding`, etc.). Values are strings, integers, booleans, or null, depending on the option type.\n\nThis is useful for reproducing a crawl or understanding the exact configuration that produced the results.\n\n### 3.4. `qualityScores` (Object)\n\nContains overall and per-category quality scores computed after analysis.\n\n*   `overall` (Object): The aggregate quality score.\n    *   `score` (Float): Overall score from 0.0 to 10.0.\n    *   `label` (String): Human-readable label (e.g., `\"A+\"`, `\"A\"`, `\"B\"`, `\"C\"`, `\"D\"`, `\"F\"`).\n    *   `weight` (Float): Total weight (1.0 for overall).\n    *   `deductions` (Array): Array of objects, each with:\n        *   `points` (Float): Number of points deducted.\n        *   `reason` (String): Explanation for the deduction.\n\n*   `categories` (Array): Array of 5 category objects, each with:\n    *   `code` (String): Category identifier. One of: `\"performance\"`, `\"seo\"`, `\"security\"`, `\"accessibility\"`, `\"bestPractices\"`.\n    *   `name` (String): Human-readable category name.\n    *   `score` (Float): Category score from 0.0 to 10.0.\n    *   `label` (String): Human-readable label.\n    *   `weight` (Float): Weight of this category in the overall score (e.g., 0.20 for SEO, 0.25 for Security).\n    *   `deductions` (Array): Array of deduction objects (same structure as overall deductions).\n\n### 3.5. `results` (Array)\n\nAn array of objects, where each object represents a single visited URL.\n*   `url` (String): The absolute URL that was visited.\n*   `status` (String): The HTTP status code returned (e.g., `\"200\"`, `\"404\"`).\n*   `elapsedTime` (Float): Time taken to fetch the URL in seconds (e.g., `0.005`).\n*   `size` (Integer): Size of the response body in bytes (e.g., `50961`).\n*   `type` (Integer): An enum representing the detected content type:\n    *   `1`: HTML\n    *   `2`: JavaScript\n    *   `3`: CSS\n    *   `4`: Image\n    *   `7`: Document (e.g., robots.txt)\n    *   `8`: JSON\n    *   Other types may exist (Audio, Font, Video, XML, Redirect, Other).\n*   `cacheTypeFlags` (Integer): Bitmask representing detected caching mechanisms (e.g., Cache-Control, ETag, Last-Modified). For example, `31` typically means Cache-Control + ETag + Last-Modified are all present. `32768` might indicate no caching headers found.\n*   `cacheLifetime` (Integer): Cache lifetime in seconds derived from `Cache-Control: max-age` or `Expires` header. `0` if no lifetime could be determined.\n*   `extras` (Array): Contains additional data from specific analyzers run on this URL. Typically an empty array `[]`.\n\n### 3.6. `stats` (Object)\n\nAggregate statistics about the entire crawl:\n*   `totalUrls` (Integer): Total number of URLs visited.\n*   `totalSize` (Integer): Total size of all responses in bytes.\n*   `totalSizeFormatted` (String): Human-readable formatted total size (e.g., `\"31.33 MB\"`).\n*   `totalExecutionTime` (Float): Total wall-clock execution time in seconds.\n*   `totalRequestsTimes` (Float): Sum of all individual request times in seconds.\n*   `totalRequestsTimesAvg` (Float): Average request time in seconds.\n*   `totalRequestsTimesMin` (Float): Minimum request time in seconds.\n*   `totalRequestsTimesMax` (Float): Maximum request time in seconds.\n*   `countByStatus` (Object): An object mapping HTTP status codes to counts. Keys are status code strings (e.g., `\"200\"`, `\"404\"`, `\"429\"`), values are integers. Only status codes that were actually encountered appear as keys.\n\n### 3.7. `summary` (Object)\n\nContains a list of summary findings that feed into quality scoring.\n\n*   `items` (Array): Array of finding objects, each with:\n    *   `aplCode` (String): A unique code identifying the finding (e.g., `\"s201\"`, `\"s404\"`, `\"s502\"`).\n    *   `status` (String): Severity level. One of: `\"CRITICAL\"`, `\"WARNING\"`, `\"OK\"`, `\"INFO\"`.\n    *   `text` (String): Human-readable description of the finding (e.g., `\"Brotli is supported for HTML\"`, `\"1 URL(s) returned a 404 status code\"`).\n\n### 3.8. `tables` (Object)\n\nAn object where each key is a table identifier (e.g., `skipped-summary`, `404`, `seo`) and the value is an object describing that table. Each table object contains:\n*   `aplCode` (String): A unique code for the table.\n*   `title` (String): A human-readable title for the table.\n*   `columns` (Object): An object describing the columns of the table. Each key is a column identifier (e.g., `reason`, `url`, `statusCode`). The value is an object detailing the column:\n    *   `aplCode` (String): Unique code for the column.\n    *   `name` (String): Display name for the column header.\n    *   `width` (Integer): Suggested display width (-1 might mean auto).\n    *   `formatter` (Object | null): Defines how the data should be formatted (e.g., adding units like 'ms' or 'kB'). Empty object `{}` indicates default formatting.\n    *   `renderer` (Object | null): Defines how the data should be rendered (e.g., adding color or links). Empty object `{}` indicates default rendering.\n    *   `truncateIfLonger` (Boolean): Whether to truncate the value if it exceeds the width.\n    *   Other fields like `formatterWillChangeValueLength`, `nonBreakingSpaces`, `escapeOutputHtml`, `getDataValueCallback`, `forcedDataType` provide more hints for rendering.\n*   `rows` (Array): An array of objects, where each object represents a row in the table. The keys in each row object correspond to the column identifiers defined in `columns`. **Important: All values in all table rows are strings**, regardless of whether the data represents a number, count, or other type. For example, a count of `51` appears as `\"51\"`, a request time of `0.003` appears as `\"0.003\"`, and an empty value appears as `\"\"`. Rows may also contain extra keys beyond the declared columns (see individual table descriptions for details).\n*   `position` (String): A hint about where this table should typically be positioned in a report (e.g., `before-url-table`, `after-url-table`).\n\n**Note:** The specific content and structure within `tables` depend on the analyzers enabled during the crawl. The set of tables may vary depending on what data was encountered (e.g., `certificate-info` only appears for HTTPS sites).\n\n## 4. JSON Schema (Draft)\n\nThis is a draft JSON schema based on the actual output. It may need refinement for edge cases.\n\n```json\n{\n  \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n  \"title\": \"SiteOne Crawler JSON Output\",\n  \"description\": \"Schema for the JSON output file generated by SiteOne Crawler.\",\n  \"type\": \"object\",\n  \"properties\": {\n    \"crawler\": {\n      \"description\": \"Metadata about the crawler execution.\",\n      \"type\": \"object\",\n      \"properties\": {\n        \"name\": { \"type\": \"string\" },\n        \"version\": { \"type\": \"string\" },\n        \"executedAt\": { \"type\": \"string\", \"description\": \"Format: YYYY-MM-DD HH:MM:SS\" },\n        \"command\": { \"type\": \"string\" },\n        \"hostname\": { \"type\": \"string\" },\n        \"finalUserAgent\": { \"type\": \"string\" }\n      },\n      \"required\": [\"name\", \"version\", \"executedAt\", \"command\", \"hostname\", \"finalUserAgent\"]\n    },\n    \"extraColumnsFromAnalysis\": {\n      \"description\": \"Definitions for extra columns used in analyses.\",\n      \"type\": \"array\",\n      \"items\": {\n        \"type\": \"object\",\n        \"properties\": {\n          \"name\": { \"type\": \"string\" },\n          \"length\": { \"type\": \"integer\" },\n          \"truncate\": { \"type\": \"boolean\" },\n          \"customMethod\": { \"type\": [\"string\", \"null\"] },\n          \"customPattern\": { \"type\": [\"string\", \"null\"] },\n          \"customGroup\": { \"type\": [\"string\", \"null\"] }\n        },\n        \"required\": [\"name\", \"length\", \"truncate\"]\n      }\n    },\n    \"options\": {\n      \"description\": \"All CLI configuration values used for the crawl.\",\n      \"type\": \"object\",\n      \"additionalProperties\": true\n    },\n    \"qualityScores\": {\n      \"description\": \"Overall and per-category quality scores.\",\n      \"type\": \"object\",\n      \"properties\": {\n        \"overall\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"score\": { \"type\": \"number\" },\n            \"label\": { \"type\": \"string\" },\n            \"weight\": { \"type\": \"number\" },\n            \"deductions\": {\n              \"type\": \"array\",\n              \"items\": {\n                \"type\": \"object\",\n                \"properties\": {\n                  \"points\": { \"type\": \"number\" },\n                  \"reason\": { \"type\": \"string\" }\n                },\n                \"required\": [\"points\", \"reason\"]\n              }\n            }\n          },\n          \"required\": [\"score\", \"label\", \"weight\", \"deductions\"]\n        },\n        \"categories\": {\n          \"type\": \"array\",\n          \"items\": {\n            \"type\": \"object\",\n            \"properties\": {\n              \"code\": { \"type\": \"string\", \"enum\": [\"performance\", \"seo\", \"security\", \"accessibility\", \"bestPractices\"] },\n              \"name\": { \"type\": \"string\" },\n              \"score\": { \"type\": \"number\" },\n              \"label\": { \"type\": \"string\" },\n              \"weight\": { \"type\": \"number\" },\n              \"deductions\": {\n                \"type\": \"array\",\n                \"items\": {\n                  \"type\": \"object\",\n                  \"properties\": {\n                    \"points\": { \"type\": \"number\" },\n                    \"reason\": { \"type\": \"string\" }\n                  },\n                  \"required\": [\"points\", \"reason\"]\n                }\n              }\n            },\n            \"required\": [\"code\", \"name\", \"score\", \"label\", \"weight\", \"deductions\"]\n          }\n        }\n      },\n      \"required\": [\"overall\", \"categories\"]\n    },\n    \"results\": {\n      \"description\": \"Array of results for each visited URL.\",\n      \"type\": \"array\",\n      \"items\": {\n        \"type\": \"object\",\n        \"properties\": {\n          \"url\": { \"type\": \"string\", \"format\": \"uri\" },\n          \"status\": { \"type\": \"string\" },\n          \"elapsedTime\": { \"type\": \"number\" },\n          \"size\": { \"type\": \"integer\" },\n          \"type\": { \"type\": \"integer\", \"description\": \"Enum for content type (1:HTML, 2:JS, 3:CSS, 4:Image, 7:Document, 8:JSON, ...)\" },\n          \"cacheTypeFlags\": { \"type\": \"integer\", \"description\": \"Bitmask for caching mechanisms\" },\n          \"cacheLifetime\": { \"type\": \"integer\", \"description\": \"Cache lifetime in seconds, 0 if undetermined\" },\n          \"extras\": {\n            \"type\": \"array\",\n            \"description\": \"Additional analysis data for this URL (typically empty)\"\n          }\n        },\n        \"required\": [\"url\", \"status\", \"elapsedTime\", \"size\", \"type\", \"cacheTypeFlags\", \"cacheLifetime\", \"extras\"]\n      }\n    },\n    \"stats\": {\n      \"description\": \"Aggregate crawl statistics.\",\n      \"type\": \"object\",\n      \"properties\": {\n        \"totalUrls\": { \"type\": \"integer\" },\n        \"totalSize\": { \"type\": \"integer\" },\n        \"totalSizeFormatted\": { \"type\": \"string\" },\n        \"totalExecutionTime\": { \"type\": \"number\" },\n        \"totalRequestsTimes\": { \"type\": \"number\" },\n        \"totalRequestsTimesAvg\": { \"type\": \"number\" },\n        \"totalRequestsTimesMin\": { \"type\": \"number\" },\n        \"totalRequestsTimesMax\": { \"type\": \"number\" },\n        \"countByStatus\": {\n          \"type\": \"object\",\n          \"additionalProperties\": { \"type\": \"integer\" }\n        }\n      },\n      \"required\": [\"totalUrls\", \"totalSize\", \"totalSizeFormatted\", \"totalExecutionTime\", \"totalRequestsTimes\", \"totalRequestsTimesAvg\", \"totalRequestsTimesMin\", \"totalRequestsTimesMax\", \"countByStatus\"]\n    },\n    \"summary\": {\n      \"description\": \"Summary findings that feed into quality scoring.\",\n      \"type\": \"object\",\n      \"properties\": {\n        \"items\": {\n          \"type\": \"array\",\n          \"items\": {\n            \"type\": \"object\",\n            \"properties\": {\n              \"aplCode\": { \"type\": \"string\" },\n              \"status\": { \"type\": \"string\", \"enum\": [\"CRITICAL\", \"WARNING\", \"OK\", \"INFO\"] },\n              \"text\": { \"type\": \"string\" }\n            },\n            \"required\": [\"aplCode\", \"status\", \"text\"]\n          }\n        }\n      },\n      \"required\": [\"items\"]\n    },\n    \"tables\": {\n      \"description\": \"Aggregated analysis results presented as tables.\",\n      \"type\": \"object\",\n      \"additionalProperties\": {\n        \"type\": \"object\",\n        \"properties\": {\n          \"aplCode\": { \"type\": \"string\" },\n          \"title\": { \"type\": \"string\" },\n          \"columns\": {\n            \"type\": \"object\",\n            \"additionalProperties\": {\n              \"type\": \"object\",\n              \"properties\": {\n                \"aplCode\": { \"type\": \"string\" },\n                \"name\": { \"type\": \"string\" },\n                \"width\": { \"type\": \"integer\" },\n                \"formatter\": { \"type\": [\"object\", \"null\"] },\n                \"renderer\": { \"type\": [\"object\", \"null\"] },\n                \"truncateIfLonger\": { \"type\": \"boolean\" }\n              },\n              \"required\": [\"aplCode\", \"name\", \"width\"]\n            }\n          },\n          \"rows\": {\n            \"type\": \"array\",\n            \"items\": {\n              \"type\": \"object\",\n              \"description\": \"All row values are strings. Rows may contain extra keys beyond the declared columns.\",\n              \"additionalProperties\": { \"type\": \"string\" }\n            }\n          },\n          \"position\": { \"type\": \"string\", \"enum\": [\"before-url-table\", \"after-url-table\"] }\n        },\n        \"required\": [\"aplCode\", \"title\", \"columns\", \"rows\", \"position\"]\n      }\n    }\n  },\n  \"required\": [\"crawler\", \"extraColumnsFromAnalysis\", \"options\", \"qualityScores\", \"results\", \"stats\", \"summary\", \"tables\"]\n}\n```\n\n## 5. Analysis Tables Description (`tables` key)\n\nThis section details the structure and columns of each table found under the `tables` key in the JSON output.\n\n**Important note on data types:** All values in all table rows are **strings**. Numeric values such as counts, times, and sizes are serialized as strings (e.g., `\"51\"` not `51`, `\"0.003\"` not `0.003`). Empty values appear as `\"\"`. This applies to every table described below. Where column descriptions say \"count\" or \"time\", the value is still a string representation of that number.\n\nSome tables include **extra row keys** beyond the declared columns. These are noted in the individual table descriptions.\n\n### 5.1. `skipped-summary` (Skipped URLs Summary)\n\nProvides a summary of skipped URLs grouped by domain and reason.\n\n| Column | Description |\n|--------|-------------|\n| `reason` | A human-readable string describing why URLs from this domain were skipped (e.g., `\"Not allowed host\"`, `\"Blocked by robots.txt\"`). |\n| `domain` | The domain name whose URLs were skipped. |\n| `count` | The number of unique URLs skipped for this domain and reason. |\n\n### 5.2. `skipped` (Skipped URLs)\n\nLists individual URLs that were skipped during the crawl.\n\n| Column | Description |\n|--------|-------------|\n| `reason` | A human-readable string describing why the URL was skipped (e.g., `\"Not allowed host\"`, `\"Blocked by robots.txt\"`, `\"File extension is not allowed\"`). |\n| `url` | The URL that was skipped. |\n| `sourceAttr` | A string describing the HTML attribute where the skipped URL was found (e.g., `\"<a href>\"`, `\"<link href>\"`, `\"<script src>\"`). |\n| `sourceUqId` | The URL path of the page where the skipped URL was discovered (e.g., `\"/\"`, `\"/docs/getting-started\"`). This allows linking back to the source page. |\n\n### 5.3. `redirects` (Redirected URLs)\n\nLists URLs that resulted in an HTTP redirect (3xx status code).\n\n| Column | Description |\n|--------|-------------|\n| `statusCode` | The specific redirect status code (e.g., `\"301\"`, `\"302\"`). |\n| `url` | The original URL that redirected. |\n| `targetUrl` | The target URL to which the original URL redirected. |\n| `sourceUqId` | URL path of the page where the redirected URL was found. |\n\n### 5.4. `404` (404 URLs)\n\nLists URLs that resulted in a \"404 Not Found\" status code.\n\n| Column | Description |\n|--------|-------------|\n| `statusCode` | The HTTP status code (typically `\"404\"`). |\n| `url` | The URL that resulted in the 404 error. |\n| `sourceUqId` | URL path of the page where the broken URL was found. |\n\n### 5.5. `certificate-info` (SSL/TLS info)\n\nProvides details about the SSL/TLS certificate of the crawled domain.\n\n| Column | Description |\n|--------|-------------|\n| `info` | The name of the certificate attribute (e.g., `\"Issuer\"`, `\"Subject\"`, `\"Valid from\"`, `\"Valid to\"`, `\"Supported protocols\"`, `\"RAW certificate output\"`, `\"RAW protocols output\"`). |\n| `value` | The value of the corresponding certificate attribute. Always a string. For multi-line values like raw certificate or protocol output, the entire content is a single string with embedded newlines. |\n\n### 5.6. `fastest-urls` (TOP fastest URLs)\n\nLists the URLs with the lowest request times encountered during the crawl.\n\n| Column | Description |\n|--------|-------------|\n| `requestTime` | The time taken to fetch the URL in seconds (e.g., `\"0.003\"`). |\n| `statusCode` | The HTTP status code of the URL (e.g., `\"200\"`). |\n| `url` | The URL itself. |\n\n### 5.7. `slowest-urls` (TOP slowest URLs)\n\nLists the URLs with the highest request times encountered during the crawl.\n\n| Column | Description |\n|--------|-------------|\n| `requestTime` | The time taken to fetch the URL in seconds (e.g., `\"1.234\"`). |\n| `statusCode` | The HTTP status code of the URL (e.g., `\"200\"`). |\n| `url` | The URL itself. |\n\n### 5.8. `seo` (SEO metadata)\n\nProvides SEO-related metadata extracted from HTML pages.\n\n| Column | Description |\n|--------|-------------|\n| `urlPathAndQuery` | The path and query string of the URL. |\n| `indexing` | A string describing the indexing status (e.g., `\"index, follow\"`, `\"noindex, follow\"`). |\n| `title` | The content of the `<title>` tag, or empty string if not found. |\n| `h1` | The content of the first `<h1>` tag found, or empty string. |\n| `description` | The content of the `meta name=\"description\"` tag, or empty string. |\n| `keywords` | The content of the `meta name=\"keywords\"` tag, or empty string. |\n\n**Extra row keys** (present in each row object but not declared as columns):\n*   `robotsIndex` (String): Whether the page allows indexing (e.g., `\"1\"` for index, `\"0\"` for noindex).\n*   `deniedByRobotsTxt` (String): Whether the page is denied by robots.txt (e.g., `\"0\"` for allowed, `\"1\"` for denied).\n\n### 5.9. `open-graph` (OpenGraph metadata)\n\nProvides Open Graph and Twitter Card metadata extracted from HTML pages.\n\n| Column | Description |\n|--------|-------------|\n| `urlPathAndQuery` | The path and query string of the URL. |\n| `ogTitle` | Content of the `og:title` meta tag, or empty string. |\n| `ogDescription` | Content of the `og:description` meta tag, or empty string. |\n| `ogImage` | Content of the `og:image` meta tag, or empty string. |\n| `twitterTitle` | Content of the `twitter:title` meta tag, or empty string. |\n| `twitterDescription` | Content of the `twitter:description` meta tag, or empty string. |\n| `twitterImage` | Content of the `twitter:image` meta tag, or empty string. |\n\n### 5.10. `seo-headings` (Heading structure)\n\nProvides analysis of the heading (H1-H6) structure for each HTML page.\n\n| Column | Description |\n|--------|-------------|\n| `headings` | A formatted string representation of the heading structure showing hierarchy and potential errors (e.g., `\"OK H1, H2, H2, H3\"` or `\"ERR H1, H3 (skipped H2)\"`). |\n| `headingsCount` | Total number of headings found on the page (e.g., `\"5\"`). |\n| `headingsErrorsCount` | Number of structural errors found in the headings (e.g., `\"0\"`, `\"2\"`). |\n| `urlPathAndQuery` | The path and query string of the URL. |\n\n**Extra row key:**\n*   `headingsHtml` (String): An HTML string containing the full heading tree with markup (e.g., `\"<b>H1</b> Title<br><b>H2</b> Section...\"`). Useful for rendering a visual heading tree in reports.\n\n### 5.11. `headers` (HTTP headers)\n\nSummarizes the HTTP response headers encountered across all crawled URLs.\n\n| Column | Description |\n|--------|-------------|\n| `header` | The name of the HTTP header. |\n| `occurrences` | The total number of times this header was found (e.g., `\"73\"`). |\n| `uniqueValues` | The count of distinct values found for this header, as a string (e.g., `\"3\"`). |\n| `valuesPreview` | A preview string showing some of the values encountered (truncated if many). |\n| `minValue` | The minimum value found (relevant for numerical or date headers), or empty string. |\n| `maxValue` | The maximum value found, or empty string. |\n\n### 5.12. `headers-values` (HTTP header values)\n\nLists unique values for each HTTP header and their occurrence count.\n\n| Column | Description |\n|--------|-------------|\n| `header` | The name of the HTTP header. |\n| `occurrences` | The number of times this specific value occurred for this header (e.g., `\"51\"`). |\n| `value` | The specific unique value of the HTTP header. |\n\n### 5.13. `caching-per-content-type` (HTTP Caching by content type)\n\nAnalyzes caching effectiveness grouped by general content type (HTML, Image, JS, CSS, etc.).\n\n| Column | Description |\n|--------|-------------|\n| `contentType` | The general content type category (e.g., `\"HTML\"`, `\"Image\"`, `\"JS\"`). |\n| `cacheType` | Description of the caching mechanism detected (e.g., `\"Cache-Control + ETag + Last-Modified\"`, `\"No cache headers\"`). |\n| `count` | Number of URLs matching this content type and cache type. |\n| `avgLifetime` | Average cache lifetime in seconds for URLs in this group, or empty string if not determinable. |\n| `minLifetime` | Minimum cache lifetime in seconds, or empty string. |\n| `maxLifetime` | Maximum cache lifetime in seconds, or empty string. |\n\n### 5.14. `caching-per-domain` (HTTP Caching by domain)\n\nAnalyzes caching effectiveness grouped by domain.\n\n| Column | Description |\n|--------|-------------|\n| `domain` | The domain name. |\n| `cacheType` | Description of the caching mechanism detected. |\n| `count` | Number of URLs from this domain matching this cache type. |\n| `avgLifetime` | Average cache lifetime in seconds, or empty string. |\n| `minLifetime` | Minimum cache lifetime in seconds, or empty string. |\n| `maxLifetime` | Maximum cache lifetime in seconds, or empty string. |\n\n### 5.15. `caching-per-domain-and-content-type` (HTTP Caching by domain and content type)\n\nAnalyzes caching effectiveness grouped by both domain and general content type.\n\n| Column | Description |\n|--------|-------------|\n| `domain` | The domain name. |\n| `contentType` | The general content type category. |\n| `cacheType` | Description of the caching mechanism detected. |\n| `count` | Number of URLs matching this domain, content type, and cache type. |\n| `avgLifetime` | Average cache lifetime in seconds, or empty string. |\n| `minLifetime` | Minimum cache lifetime in seconds, or empty string. |\n| `maxLifetime` | Maximum cache lifetime in seconds, or empty string. |\n\n### 5.16. `non-unique-titles` (TOP non-unique titles)\n\nLists page titles that appear on more than one page.\n\n| Column | Description |\n|--------|-------------|\n| `count` | The number of pages sharing this title. |\n| `title` | The non-unique page title. |\n\n### 5.17. `non-unique-descriptions` (TOP non-unique descriptions)\n\nLists meta descriptions that appear on more than one page.\n\n| Column | Description |\n|--------|-------------|\n| `count` | The number of pages sharing this description. |\n| `description` | The non-unique meta description content. |\n\n### 5.18. `best-practices` (Best practices)\n\nSummarizes the results of various best practice checks performed by analyzers.\n\n| Column | Description |\n|--------|-------------|\n| `analysisName` | The name of the specific best practice check (e.g., `\"Large inline SVGs\"`, `\"Heading structure\"`, `\"Brotli support\"`). |\n| `ok` | Count of URLs passing this check. |\n| `notice` | Count of URLs with a notice-level finding. |\n| `warning` | Count of URLs with a warning-level finding. |\n| `critical` | Count of URLs with a critical-level finding. |\n\n### 5.19. `accessibility` (Accessibility)\n\nSummarizes the results of accessibility checks.\n\n| Column | Description |\n|--------|-------------|\n| `analysisName` | The name of the specific accessibility check (e.g., `\"Missing image alt attributes\"`, `\"Missing html lang attribute\"`, `\"ARIA roles and landmarks\"`). |\n| `ok` | Count of elements/pages passing this check. |\n| `notice` | Count of notice-level findings. |\n| `warning` | Count of warning-level findings. |\n| `critical` | Count of critical-level findings. |\n\n### 5.20. `source-domains` (Source domains)\n\nProvides statistics about the domains from which resources were loaded.\n\n| Column | Description |\n|--------|-------------|\n| `domain` | The domain name. |\n| `totals` | A summary string showing total count, size, and time for resources from this domain (e.g., `\"67/30MB/6.2s\"`). |\n| `HTML` | Summary string (count/size/time) for HTML resources from this domain. |\n| `Image` | Summary string for Image resources. |\n| `JS` | Summary string for JavaScript resources. |\n| `CSS` | Summary string for CSS resources. |\n| `Document` | Summary string for Document resources (e.g., robots.txt). |\n\n**Extra row keys** (dynamic, present when data exists):\n*   `Audio`, `Font`, `JSON`, `Other`, `Redirect`, `Video`, `XML` (String): Summary strings for additional content types, included only when resources of that type are present.\n*   `totalCount` (String): Total number of resources loaded from this domain.\n\n**Note:** The set of content type columns is dynamic. The declared columns (`HTML`, `Image`, `JS`, `CSS`, `Document`) are always present, but additional content type columns appear in row data based on what resource types were actually encountered during the crawl.\n\n### 5.21. `content-types` (Content types)\n\nSummarizes statistics grouped by general content type.\n\n| Column | Description |\n|--------|-------------|\n| `contentType` | The general content type category (e.g., `\"HTML\"`, `\"Image\"`). |\n| `count` | Total number of URLs of this content type. |\n| `totalSize` | Total size in bytes for this content type. |\n| `totalTime` | Total time spent fetching resources of this content type. |\n| `avgTime` | Average time spent fetching a resource of this content type. |\n| `status20x` | Count of URLs with a 2xx status code. |\n| `status40x` | Count of URLs with a 4xx status code. |\n\n**Note:** The status columns are dynamic. Additional columns like `status42x` (for HTTP 429) or `status30x`, `status50x` may appear depending on which status codes were actually encountered during the crawl. These dynamic columns will also be declared in the table's `columns` object.\n\n### 5.22. `content-types-raw` (Content types (MIME types))\n\nSummarizes statistics grouped by the specific MIME type reported in the `Content-Type` HTTP header.\n\n| Column | Description |\n|--------|-------------|\n| `contentType` | The raw MIME type string (e.g., `\"text/html\"`, `\"image/svg+xml\"`, `\"text/html; charset=utf-8\"`). |\n| `count` | Total number of URLs with this MIME type. |\n| `totalSize` | Total size in bytes. |\n| `totalTime` | Total time spent fetching. |\n| `avgTime` | Average time spent fetching. |\n| `status20x` | Count of URLs with a 2xx status code. |\n| `status40x` | Count of URLs with a 4xx status code. |\n\n**Note:** Like `content-types`, the status columns are dynamic. Additional status columns (e.g., `status42x`) appear when the corresponding status codes are encountered.\n\n### 5.23. `dns` (DNS info)\n\nShows the DNS resolution information for the crawled domain(s).\n\n| Column | Description |\n|--------|-------------|\n| `info` | A line of text representing part of the DNS resolution (e.g., the domain name, an IP address, the DNS server used). Presented as a simple text tree. |\n\n### 5.24. `security` (Security)\n\nSummarizes findings related to security HTTP headers.\n\n| Column | Description |\n|--------|-------------|\n| `header` | The name of the security header being analyzed (e.g., `\"Strict-Transport-Security\"`, `\"X-Frame-Options\"`, `\"Content-Security-Policy\"`). |\n| `ok` | Count of URLs where the header was configured correctly. |\n| `notice` | Count of URLs with a notice-level finding. |\n| `warning` | Count of URLs with a warning-level finding. |\n| `critical` | Count of URLs with a critical-level finding. |\n| `recommendation` | A string containing textual recommendations for improving the configuration of this header. |\n\n**Extra row key:**\n*   `highestSeverity` (String): The highest severity level found for this header across all URLs (e.g., `\"ok\"`, `\"warning\"`, `\"critical\"`).\n\n### 5.25. `analysis-stats` (Analysis stats)\n\nProvides performance metrics for individual analyzer methods.\n\n| Column | Description |\n|--------|-------------|\n| `classAndMethod` | The class and method name of the analyzer function. |\n| `execTime` | Total execution time in seconds spent in this method across all relevant URLs/data points. |\n| `execCount` | The number of times this method was executed. |\n\n**Extra row key:**\n*   `execTimeFormatted` (String): Human-readable formatted execution time (e.g., `\"0.012 s\"`, `\"1.234 s\"`).\n\n### 5.26. `content-processors-stats` (Content processor stats)\n\nProvides performance metrics for content processor methods (HTML, CSS, JS, XML processors that run during the crawl).\n\n| Column | Description |\n|--------|-------------|\n| `classAndMethod` | The class and method name of the content processor function. |\n| `execTime` | Total execution time in seconds spent in this method. |\n| `execCount` | The number of times this method was executed. |\n\n**Extra row key:**\n*   `execTimeFormatted` (String): Human-readable formatted execution time.\n\n### 5.27. `external-urls` (External URLs)\n\nLists external URLs discovered during the crawl along with where they were found.\n\n| Column | Description |\n|--------|-------------|\n| `url` | The external URL that was discovered. |\n| `count` | The number of times this external URL was found across all crawled pages. |\n| `foundOn` | The URL of the page where this external URL was found (typically the first occurrence). |\n\n## 6. Note on Text Output\n\nWhile this document focuses on the JSON output, SiteOne Crawler also offers a simpler Text output format (`--output-text-file`). The Text output provides a human-readable summary suitable for quick review in a terminal or text editor.\n\nSee the [Text Output Documentation](TEXT-OUTPUT.md) for more details on the Text format.\n"
  },
  {
    "path": "docs/OUTPUT-crawler.siteone.io.json",
    "content": "{\n  \"crawler\": {\n    \"command\": \"./siteone-crawler --url=https://crawler.siteone.io/ --output=json --http-cache-dir=\",\n    \"executedAt\": \"2026-03-16 14:55:13\",\n    \"finalUserAgent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/26.0.0.0 Safari/537.36 siteone-crawler/2.0.0.20260316\",\n    \"hostname\": \"DESKTOP-PC\",\n    \"name\": \"SiteOne Crawler\",\n    \"version\": \"2.0.0.20260316\"\n  },\n  \"extraColumnsFromAnalysis\": [\n    {\n      \"customGroup\": null,\n      \"customMethod\": null,\n      \"customPattern\": null,\n      \"length\": 8,\n      \"name\": \"Access.\",\n      \"truncate\": false\n    },\n    {\n      \"customGroup\": null,\n      \"customMethod\": null,\n      \"customPattern\": null,\n      \"length\": 8,\n      \"name\": \"Best pr.\",\n      \"truncate\": false\n    }\n  ],\n  \"options\": {\n    \"acceptEncoding\": \"gzip, deflate, br\",\n    \"addHostToOutputFile\": false,\n    \"addRandomQueryParams\": false,\n    \"addTimestampToOutputFile\": false,\n    \"allowedDomainsForCrawling\": [],\n    \"allowedDomainsForExternalFiles\": [],\n    \"analyzerFilterRegex\": null,\n    \"ci\": false,\n    \"ciMax404\": 0,\n    \"ciMax5xx\": 0,\n    \"ciMaxAvgResponse\": null,\n    \"ciMaxCriticals\": 0,\n    \"ciMaxWarnings\": null,\n    \"ciMinAccessibility\": 3.0,\n    \"ciMinAssets\": 10,\n    \"ciMinBestPractices\": 5.0,\n    \"ciMinDocuments\": 0,\n    \"ciMinPages\": 10,\n    \"ciMinPerformance\": 5.0,\n    \"ciMinScore\": 5.0,\n    \"ciMinSecurity\": 5.0,\n    \"ciMinSeo\": 5.0,\n    \"consoleWidth\": null,\n    \"debug\": false,\n    \"debugLogFile\": null,\n    \"debugUrlRegex\": [],\n    \"device\": \"desktop\",\n    \"disableAllAssets\": false,\n    \"disableAstroInlineModules\": false,\n    \"disableFiles\": false,\n    \"disableFonts\": false,\n    \"disableImages\": false,\n    \"disableJavascript\": false,\n    \"disableStyles\": false,\n    \"doNotTruncateUrl\": false,\n    \"extraColumns\": [],\n    \"extraColumnsNamesOnly\": [],\n    \"fastestMaxTime\": 1.0,\n    \"fastestTopLimit\": 20,\n    \"forceColor\": false,\n    \"forceRelativeUrls\": false,\n    \"hideProgressBar\": false,\n    \"htmlReportOptions\": null,\n    \"httpAuth\": null,\n    \"httpCacheCompression\": false,\n    \"httpCacheDir\": \"\",\n    \"httpCacheTtl\": 86400,\n    \"ignoreRegex\": [],\n    \"ignoreRobotsTxt\": false,\n    \"ignoreStoreFileError\": false,\n    \"includeRegex\": [],\n    \"mailFrom\": \"siteone-crawler@your-hostname.com\",\n    \"mailFromName\": \"SiteOne Crawler\",\n    \"mailSmtpHost\": \"localhost\",\n    \"mailSmtpPass\": null,\n    \"mailSmtpPort\": 25,\n    \"mailSmtpUser\": null,\n    \"mailSubjectTemplate\": \"Crawler Report for %domain% (%date%)\",\n    \"mailTo\": [],\n    \"markdownDisableFiles\": false,\n    \"markdownDisableImages\": false,\n    \"markdownExcludeSelector\": [],\n    \"markdownExportDir\": null,\n    \"markdownExportSingleFile\": null,\n    \"markdownExportStoreOnlyUrlRegex\": [],\n    \"markdownIgnoreStoreFileError\": false,\n    \"markdownMoveContentBeforeH1ToEnd\": false,\n    \"markdownRemoveLinksAndImagesFromSingleFile\": false,\n    \"markdownReplaceContent\": [],\n    \"markdownReplaceQueryString\": [],\n    \"maxDepth\": 0,\n    \"maxHeadingLevel\": 3,\n    \"maxNon200ResponsesPerBasename\": 5,\n    \"maxQueueLength\": 9000,\n    \"maxReqsPerSec\": 10.0,\n    \"maxSkippedUrls\": 10000,\n    \"maxUrlLength\": 2083,\n    \"maxVisitedUrls\": 10000,\n    \"memoryLimit\": \"2048M\",\n    \"noColor\": false,\n    \"offlineExportDir\": null,\n    \"offlineExportLowercase\": false,\n    \"offlineExportNoAutoRedirectHtml\": false,\n    \"offlineExportPreserveUrlStructure\": false,\n    \"offlineExportRemoveUnwantedCode\": true,\n    \"offlineExportStoreOnlyUrlRegex\": [],\n    \"outputHtmlReport\": \"/home/janreges/siteone-crawler/tmp/crawler.siteone.io.report.20260316-155513.html\",\n    \"outputJsonFile\": \"/home/janreges/siteone-crawler/tmp/crawler.siteone.io.output.20260316-155513.json\",\n    \"outputTextFile\": \"/home/janreges/siteone-crawler/tmp/crawler.siteone.io.output.20260316-155513.txt\",\n    \"outputType\": \"json\",\n    \"proxy\": null,\n    \"regexFilteringOnlyForPages\": false,\n    \"removeAllAnchorListeners\": false,\n    \"removeQueryParams\": false,\n    \"replaceContent\": [],\n    \"replaceQueryString\": [],\n    \"resolve\": [],\n    \"resultStorage\": \"memory\",\n    \"resultStorageCompression\": false,\n    \"resultStorageDir\": \"/home/janreges/siteone-crawler/tmp/result-storage\",\n    \"rowsLimit\": 200,\n    \"serveBindAddress\": \"127.0.0.1\",\n    \"serveMarkdownDir\": null,\n    \"serveOfflineDir\": null,\n    \"servePort\": 8321,\n    \"showHelpOnly\": false,\n    \"showInlineCriticals\": false,\n    \"showInlineWarnings\": false,\n    \"showSchemeAndHost\": false,\n    \"showVersionOnly\": false,\n    \"singleForeignPage\": false,\n    \"singlePage\": false,\n    \"sitemapBasePriority\": 0.5,\n    \"sitemapPriorityIncrease\": 0.1,\n    \"sitemapTxtFile\": null,\n    \"sitemapXmlFile\": null,\n    \"slowestMaxTime\": 3.0,\n    \"slowestMinTime\": 0.01,\n    \"slowestTopLimit\": 20,\n    \"timeout\": 5,\n    \"timezone\": null,\n    \"transformUrl\": [],\n    \"uploadEnabled\": false,\n    \"uploadPassword\": null,\n    \"uploadRetention\": \"30d\",\n    \"uploadTimeout\": 3600,\n    \"uploadTo\": \"https://crawler.siteone.io/up\",\n    \"url\": \"https://crawler.siteone.io/\",\n    \"urlColumnSize\": null,\n    \"userAgent\": null,\n    \"websocketServer\": null,\n    \"workers\": 3\n  },\n  \"qualityScores\": {\n    \"categories\": [\n      {\n        \"code\": \"performance\",\n        \"deductions\": [],\n        \"label\": \"Excellent\",\n        \"name\": \"Performance\",\n        \"score\": 10.0,\n        \"weight\": 0.2\n      },\n      {\n        \"code\": \"seo\",\n        \"deductions\": [\n          {\n            \"points\": 0.5,\n            \"reason\": \"1 page(s) returned 404\"\n          }\n        ],\n        \"label\": \"Excellent\",\n        \"name\": \"SEO\",\n        \"score\": 9.5,\n        \"weight\": 0.2\n      },\n      {\n        \"code\": \"security\",\n        \"deductions\": [\n          {\n            \"points\": 2.0,\n            \"reason\": \"3 page(s) with critical security findings\"\n          }\n        ],\n        \"label\": \"Good\",\n        \"name\": \"Security\",\n        \"score\": 8.0,\n        \"weight\": 0.25\n      },\n      {\n        \"code\": \"accessibility\",\n        \"deductions\": [\n          {\n            \"points\": 0.5,\n            \"reason\": \"1 page(s) without image alt attributes\"\n          },\n          {\n            \"points\": 2.5,\n            \"reason\": \"50 page(s) with skipped heading levels\"\n          },\n          {\n            \"points\": 2.0,\n            \"reason\": \"51 page(s) without aria labels\"\n          }\n        ],\n        \"label\": \"Fair\",\n        \"name\": \"Accessibility\",\n        \"score\": 5.0,\n        \"weight\": 0.2\n      },\n      {\n        \"code\": \"best-practices\",\n        \"deductions\": [\n          {\n            \"points\": 0.5,\n            \"reason\": \"No Brotli compression support\"\n          }\n        ],\n        \"label\": \"Excellent\",\n        \"name\": \"Best Practices\",\n        \"score\": 9.5,\n        \"weight\": 0.15\n      }\n    ],\n    \"overall\": {\n      \"code\": \"overall\",\n      \"deductions\": [],\n      \"label\": \"Good\",\n      \"name\": \"Overall\",\n      \"score\": 8.3,\n      \"weight\": 1.0\n    }\n  },\n  \"results\": [\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 50961,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.004,\n      \"extras\": [],\n      \"size\": 60465,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/introduction/overview/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 54792,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/introduction/key-features/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.033,\n      \"extras\": [],\n      \"size\": 43381,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/ease-of-use/\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.138,\n      \"extras\": [],\n      \"size\": 3605887,\n      \"status\": \"200\",\n      \"type\": 4,\n      \"url\": \"https://crawler.siteone.io/siteone-crawler-app-demo.gif\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 52620,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/offline-website-generator/\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.003,\n      \"extras\": [],\n      \"size\": 31582,\n      \"status\": \"200\",\n      \"type\": 4,\n      \"url\": \"https://crawler.siteone.io/_astro/siteone-crawler-mascot.CPk15tXh_HGMwJ.webp\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.055,\n      \"extras\": [],\n      \"size\": 109656,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/configuration/command-line-options/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.073,\n      \"extras\": [],\n      \"size\": 61848,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/getting-started/basic-usage/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.04,\n      \"extras\": [],\n      \"size\": 54226,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/introduction/contact-and-community/\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.354,\n      \"extras\": [],\n      \"size\": 8701267,\n      \"status\": \"200\",\n      \"type\": 4,\n      \"url\": \"https://crawler.siteone.io/siteone-crawler-command-line-demo-full.gif\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.056,\n      \"extras\": [],\n      \"size\": 673,\n      \"status\": \"200\",\n      \"type\": 4,\n      \"url\": \"https://crawler.siteone.io/_astro/siteone-crawler-logo-dark.DaIuiR1U.svg\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.003,\n      \"extras\": [],\n      \"size\": 673,\n      \"status\": \"200\",\n      \"type\": 4,\n      \"url\": \"https://crawler.siteone.io/favicon.svg\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.004,\n      \"extras\": [],\n      \"size\": 56366,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/security-analysis/\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.003,\n      \"extras\": [],\n      \"size\": 2690,\n      \"status\": \"200\",\n      \"type\": 2,\n      \"url\": \"https://crawler.siteone.io/_astro/Search.astro_astro_type_script_index_0_lang.DMZ5WJ-J.js\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.003,\n      \"extras\": [],\n      \"size\": 3554,\n      \"status\": \"200\",\n      \"type\": 3,\n      \"url\": \"https://crawler.siteone.io/_astro/print.DNXP8c50.css\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 61764,\n      \"status\": \"200\",\n      \"type\": 3,\n      \"url\": \"https://crawler.siteone.io/_astro/index.BRwACyc2.css\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 44537,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/dev-devops-assistant/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 51420,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/redirect-and-404-analysis/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 58940,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/performance-analysis/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 44922,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/seo-and-opengraph-analysis/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 51612,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/installation-and-requirements/desktop-application/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 81246,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/configuration/examples/\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.049,\n      \"extras\": [],\n      \"size\": 1333725,\n      \"status\": \"200\",\n      \"type\": 4,\n      \"url\": \"https://crawler.siteone.io/siteone-crawler-command-line-demo-w960.avif\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 53521,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/website-to-markdown-converter/\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.024,\n      \"extras\": [],\n      \"size\": 605564,\n      \"status\": \"200\",\n      \"type\": 4,\n      \"url\": \"https://crawler.siteone.io/siteone-crawler-app-demo.avif\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 68186,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/installation-and-requirements/ready-to-use-packages/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 57907,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/installation-and-requirements/system-requirements/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 53752,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/introduction/ideas-and-roadmap/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 74321,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/technical-analysis/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 53689,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/audit-report/\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.047,\n      \"extras\": [],\n      \"size\": 2165,\n      \"status\": \"200\",\n      \"type\": 2,\n      \"url\": \"https://crawler.siteone.io/_astro/page.7qqag-5g.js\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.15,\n      \"extras\": [],\n      \"size\": 4134394,\n      \"status\": \"200\",\n      \"type\": 4,\n      \"url\": \"https://crawler.siteone.io/siteone-crawler-command-line-demo-w960.gif\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.006,\n      \"extras\": [],\n      \"size\": 72133,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/installation-and-requirements/manual-installation/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.004,\n      \"extras\": [],\n      \"size\": 43295,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/heading-analysis/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.003,\n      \"extras\": [],\n      \"size\": 152,\n      \"status\": \"200\",\n      \"type\": 7,\n      \"url\": \"https://crawler.siteone.io/robots.txt\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.004,\n      \"extras\": [],\n      \"size\": 50887,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/accessibility-analysis/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.006,\n      \"extras\": [],\n      \"size\": 59500,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/online-html-report-upload/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.006,\n      \"extras\": [],\n      \"size\": 45363,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/deep-website-crawling/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.006,\n      \"extras\": [],\n      \"size\": 53489,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/getting-started/quick-start-guide/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.007,\n      \"extras\": [],\n      \"size\": 114569,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/advanced-topics/extending/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 47494,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/stress-testing/\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.003,\n      \"extras\": [],\n      \"size\": 1673,\n      \"status\": \"200\",\n      \"type\": 2,\n      \"url\": \"https://crawler.siteone.io/_astro/TableOfContents.astro_astro_type_script_index_0_lang.CKWWgpjV.js\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 57380,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/dns-analysis/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 64607,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/performance-metrics/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.006,\n      \"extras\": [],\n      \"size\": 72648,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/advanced-topics/troubleshooting/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.006,\n      \"extras\": [],\n      \"size\": 65733,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/introduction/faq/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.006,\n      \"extras\": [],\n      \"size\": 55178,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/content-type-analysis/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 45600,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/improvement-meter/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.006,\n      \"extras\": [],\n      \"size\": 59838,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/source-domains-analysis/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.006,\n      \"extras\": [],\n      \"size\": 65679,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/advanced-topics/crawler-behavior/\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.003,\n      \"extras\": [],\n      \"size\": 667,\n      \"status\": \"200\",\n      \"type\": 2,\n      \"url\": \"https://crawler.siteone.io/_astro/MobileTableOfContents.astro_astro_type_script_index_0_lang.C181hMzK.js\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.004,\n      \"extras\": [],\n      \"size\": 45737,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/mailer/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 46601,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/exports-and-reports/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 51057,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/introduction/motivation/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 52033,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/best-practices-analysis/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.007,\n      \"extras\": [],\n      \"size\": 72685,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/getting-started/advanced-usage/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.004,\n      \"extras\": [],\n      \"size\": 44108,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/availability/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 47561,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/introduction/thanks/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 46558,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/sitemap-generator/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.004,\n      \"extras\": [],\n      \"size\": 63763,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/caching-analysis/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 69474,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/headers-analysis/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 60076,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/features/ssl-tls-analysis/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 78066,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/advanced-topics/contribution-and-development/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.004,\n      \"extras\": [],\n      \"size\": 41701,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/introduction/support-us/\"\n    },\n    {\n      \"cacheLifetime\": 3600,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.005,\n      \"extras\": [],\n      \"size\": 58031,\n      \"status\": \"200\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/advanced-topics/caching/\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.004,\n      \"extras\": [],\n      \"size\": 15632,\n      \"status\": \"200\",\n      \"type\": 3,\n      \"url\": \"https://crawler.siteone.io/_astro/ec.5wl1j.css\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.003,\n      \"extras\": [],\n      \"size\": 2416,\n      \"status\": \"200\",\n      \"type\": 2,\n      \"url\": \"https://crawler.siteone.io/_astro/ec.8zarh.js\"\n    },\n    {\n      \"cacheLifetime\": null,\n      \"cacheTypeFlags\": 4,\n      \"elapsedTime\": 0.003,\n      \"extras\": [],\n      \"size\": 788,\n      \"status\": \"429\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/_astro/desktop-app-release-assets.DBTr-vv8_Z2rPu7O.webp\"\n    },\n    {\n      \"cacheLifetime\": 31536000,\n      \"cacheTypeFlags\": 31,\n      \"elapsedTime\": 0.003,\n      \"extras\": [],\n      \"size\": 17372,\n      \"status\": \"200\",\n      \"type\": 4,\n      \"url\": \"https://crawler.siteone.io/_astro/ready-to-use-packages.BYCPharn_Z1ivwN5.webp\"\n    },\n    {\n      \"cacheLifetime\": null,\n      \"cacheTypeFlags\": 4,\n      \"elapsedTime\": 0.003,\n      \"extras\": [],\n      \"size\": 788,\n      \"status\": \"429\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/docs/features/best-practices-analysis\"\n    },\n    {\n      \"cacheLifetime\": null,\n      \"cacheTypeFlags\": 4,\n      \"elapsedTime\": 0.003,\n      \"extras\": [],\n      \"size\": 780,\n      \"status\": \"404\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/docs/features/content-type-analysis\"\n    },\n    {\n      \"cacheLifetime\": null,\n      \"cacheTypeFlags\": 4,\n      \"elapsedTime\": 0.003,\n      \"extras\": [],\n      \"size\": 788,\n      \"status\": \"429\",\n      \"type\": 1,\n      \"url\": \"https://crawler.siteone.io/docs/features/technical-analysis\"\n    }\n  ],\n  \"stats\": {\n    \"countByStatus\": {\n      \"200\": 69,\n      \"404\": 1,\n      \"429\": 3\n    },\n    \"totalExecutionTime\": 9.241,\n    \"totalRequestsTimes\": 1.308,\n    \"totalRequestsTimesAvg\": 0.018,\n    \"totalRequestsTimesMax\": 0.354,\n    \"totalRequestsTimesMin\": 0.003,\n    \"totalSize\": 21514206,\n    \"totalSizeFormatted\": \"21 MB\",\n    \"totalUrls\": 73\n  },\n  \"summary\": {\n    \"items\": [\n      {\n        \"aplCode\": \"skipped\",\n        \"status\": \"CRITICAL\",\n        \"text\": \"Skipped URLs - 95 skipped URLs found\"\n      },\n      {\n        \"aplCode\": \"security\",\n        \"status\": \"CRITICAL\",\n        \"text\": \"Security - 3 pages(s) with critical finding(s).\"\n      },\n      {\n        \"aplCode\": \"ssl-protocol-hint\",\n        \"status\": \"WARNING\",\n        \"text\": \"Latest SSL/TLS protocol TLSv1.3 is not supported. Ask your admin/provider to add TLSv1.3 support.\"\n      },\n      {\n        \"aplCode\": \"brotli-support\",\n        \"status\": \"WARNING\",\n        \"text\": \"51 page(s) do not support Brotli compression.\"\n      },\n      {\n        \"aplCode\": \"pages-with-skipped-heading-levels\",\n        \"status\": \"WARNING\",\n        \"text\": \"50 page(s) with skipped heading levels\"\n      },\n      {\n        \"aplCode\": \"pages-without-image-alt-attributes\",\n        \"status\": \"WARNING\",\n        \"text\": \"1 page(s) without image alt attributes\"\n      },\n      {\n        \"aplCode\": \"pages-without-aria-labels\",\n        \"status\": \"WARNING\",\n        \"text\": \"51 page(s) without aria labels\"\n      },\n      {\n        \"aplCode\": \"pages-without-roles\",\n        \"status\": \"WARNING\",\n        \"text\": \"51 page(s) without role attributes\"\n      },\n      {\n        \"aplCode\": \"robots-txt-crawler.siteone.io\",\n        \"status\": \"NOTICE\",\n        \"text\": \"Loaded robots.txt for domain 'crawler.siteone.io': status code 200, size 152 B and took 25 ms.\"\n      },\n      {\n        \"aplCode\": \"external-urls\",\n        \"status\": \"NOTICE\",\n        \"text\": \"External URLs - 89 external URL(s) found\"\n      },\n      {\n        \"aplCode\": \"404\",\n        \"status\": \"NOTICE\",\n        \"text\": \"404 NOTICE - 1 non-existent page(s) found\"\n      },\n      {\n        \"aplCode\": \"dns-ipv6\",\n        \"status\": \"NOTICE\",\n        \"text\": \"DNS IPv6: domain crawler.siteone.io does not support IPv6 (DNS server: 10.255.255.254)\"\n      },\n      {\n        \"aplCode\": \"redirects\",\n        \"status\": \"OK\",\n        \"text\": \"Redirects - no redirects found\"\n      },\n      {\n        \"aplCode\": \"ssl-certificate-valid\",\n        \"status\": \"OK\",\n        \"text\": \"SSL/TLS certificate is valid until Mar 13 15:43:29 2027 GMT. Issued by C = BE, O = GlobalSign nv-sa, CN = GlobalSign GCC R6 AlphaSSL CA 2025. Subject is CN = *.siteone.io.\"\n      },\n      {\n        \"aplCode\": \"certificate-info\",\n        \"status\": \"OK\",\n        \"text\": \"SSL/TLS certificate issued by 'C = BE, O = GlobalSign nv-sa, CN = GlobalSign GCC R6 AlphaSSL CA 2025'.\"\n      },\n      {\n        \"aplCode\": \"slowUrls\",\n        \"status\": \"OK\",\n        \"text\": \"Performance OK - all non-media URLs are faster than 3 seconds\"\n      },\n      {\n        \"aplCode\": \"unique-headers\",\n        \"status\": \"OK\",\n        \"text\": \"HTTP headers - found 18 unique headers\"\n      },\n      {\n        \"aplCode\": \"title-uniqueness\",\n        \"status\": \"OK\",\n        \"text\": \"All 51 unique title(s) are within the allowed 10% duplicity. Highest duplicity title has 1%.\"\n      },\n      {\n        \"aplCode\": \"meta-description-uniqueness\",\n        \"status\": \"OK\",\n        \"text\": \"All 50 description(s) are within the allowed 10% duplicity. Highest duplicity description has 3%.\"\n      },\n      {\n        \"aplCode\": \"webp-support\",\n        \"status\": \"OK\",\n        \"text\": \"2 WebP image(s) found on the website.\"\n      },\n      {\n        \"aplCode\": \"avif-support\",\n        \"status\": \"OK\",\n        \"text\": \"2 AVIF image(s) found on the website.\"\n      },\n      {\n        \"aplCode\": \"pages-with-missing-quotes\",\n        \"status\": \"OK\",\n        \"text\": \"All pages have quoted attributes\"\n      },\n      {\n        \"aplCode\": \"pages-with-large-svgs\",\n        \"status\": \"OK\",\n        \"text\": \"All pages have inline SVGs smaller than 5120 bytes\"\n      },\n      {\n        \"aplCode\": \"pages-with-duplicated-svgs\",\n        \"status\": \"OK\",\n        \"text\": \"All pages have inline SVGs with less than 5 duplicates\"\n      },\n      {\n        \"aplCode\": \"pages-with-invalid-svgs\",\n        \"status\": \"OK\",\n        \"text\": \"All pages have valid or none inline SVGs\"\n      },\n      {\n        \"aplCode\": \"pages-with-multiple-h1\",\n        \"status\": \"OK\",\n        \"text\": \"All pages without multiple <h1> headings\"\n      },\n      {\n        \"aplCode\": \"pages-without-h1\",\n        \"status\": \"OK\",\n        \"text\": \"All pages have <h1> heading\"\n      },\n      {\n        \"aplCode\": \"pages-with-deep-dom\",\n        \"status\": \"OK\",\n        \"text\": \"All pages have DOM depth less than 30\"\n      },\n      {\n        \"aplCode\": \"pages-with-non-clickable-phone-numbers\",\n        \"status\": \"OK\",\n        \"text\": \"All pages have clickable (interactive) phone numbers\"\n      },\n      {\n        \"aplCode\": \"pages-with-invalid-html\",\n        \"status\": \"OK\",\n        \"text\": \"All pages have valid HTML\"\n      },\n      {\n        \"aplCode\": \"pages-without-form-labels\",\n        \"status\": \"OK\",\n        \"text\": \"All pages have form labels\"\n      },\n      {\n        \"aplCode\": \"pages-without-lang\",\n        \"status\": \"OK\",\n        \"text\": \"All pages have lang attribute\"\n      },\n      {\n        \"aplCode\": \"dns-ipv4\",\n        \"status\": \"OK\",\n        \"text\": \"DNS IPv4 OK: domain crawler.siteone.io resolved to 86.49.167.242 (DNS server: 10.255.255.254)\"\n      },\n      {\n        \"aplCode\": \"export-to-text\",\n        \"status\": \"INFO\",\n        \"text\": \"Text report saved to '/home/janreges/siteone-crawler/tmp/crawler.siteone.io.output.20260316-155513.txt' and took 0 ms\"\n      },\n      {\n        \"aplCode\": \"export-to-json\",\n        \"status\": \"INFO\",\n        \"text\": \"JSON report saved to '/home/janreges/siteone-crawler/tmp/crawler.siteone.io.output.20260316-155513.json' and took 0 ms\"\n      },\n      {\n        \"aplCode\": \"export-to-html\",\n        \"status\": \"INFO\",\n        \"text\": \"HTML report saved to '/home/janreges/siteone-crawler/tmp/crawler.siteone.io.report.20260316-155513.html' and took 0 ms\"\n      }\n    ]\n  },\n  \"tables\": {\n    \"404\": {\n      \"aplCode\": \"404\",\n      \"columns\": {\n        \"sourceUqId\": {\n          \"aplCode\": \"sourceUqId\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"Found at URL\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 61\n        },\n        \"statusCode\": {\n          \"aplCode\": \"statusCode\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Status\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 6\n        },\n        \"url\": {\n          \"aplCode\": \"url\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"URL 404\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 61\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"sourceUqId\": \"https://crawler.siteone.io/features/performance-metrics/\",\n          \"statusCode\": \"404\",\n          \"url\": \"https://crawler.siteone.io/docs/features/content-type-analysis\"\n        }\n      ],\n      \"title\": \"404 URLs\"\n    },\n    \"accessibility\": {\n      \"aplCode\": \"accessibility\",\n      \"columns\": {\n        \"analysisName\": {\n          \"aplCode\": \"analysisName\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Analysis name\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"critical\": {\n          \"aplCode\": \"critical\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"Critical\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 8\n        },\n        \"notice\": {\n          \"aplCode\": \"notice\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"Notice\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 6\n        },\n        \"ok\": {\n          \"aplCode\": \"ok\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"OK\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 5\n        },\n        \"warning\": {\n          \"aplCode\": \"warning\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"Warning\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 7\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"analysisName\": \"Missing aria labels\",\n          \"critical\": \"0\",\n          \"notice\": \"0\",\n          \"ok\": \"2\",\n          \"warning\": \"119\"\n        },\n        {\n          \"analysisName\": \"Missing roles\",\n          \"critical\": \"0\",\n          \"notice\": \"0\",\n          \"ok\": \"0\",\n          \"warning\": \"35\"\n        },\n        {\n          \"analysisName\": \"Missing image alt attributes\",\n          \"critical\": \"0\",\n          \"notice\": \"0\",\n          \"ok\": \"6\",\n          \"warning\": \"1\"\n        },\n        {\n          \"analysisName\": \"Missing html lang attribute\",\n          \"critical\": \"0\",\n          \"notice\": \"0\",\n          \"ok\": \"1\",\n          \"warning\": \"0\"\n        }\n      ],\n      \"title\": \"Accessibility\"\n    },\n    \"analysis-stats\": {\n      \"aplCode\": \"analysis-stats\",\n      \"columns\": {\n        \"classAndMethod\": {\n          \"aplCode\": \"classAndMethod\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Class::method\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"execCount\": {\n          \"aplCode\": \"execCount\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Exec count\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"execTime\": {\n          \"aplCode\": \"execTime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Exec time\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 9\n        }\n      },\n      \"position\": \"after-url-table\",\n      \"rows\": [\n        {\n          \"classAndMethod\": \"SslTlsAnalyzer::getTLSandSSLCertificateInfo\",\n          \"execCount\": \"1\",\n          \"execTime\": \"0.26621116\",\n          \"execTimeFormatted\": \"266 ms\"\n        },\n        {\n          \"classAndMethod\": \"BestPracticeAnalyzer::checkHeadingStructure\",\n          \"execCount\": \"55\",\n          \"execTime\": \"0.04904129400000001\",\n          \"execTimeFormatted\": \"49 ms\"\n        },\n        {\n          \"classAndMethod\": \"AccessibilityAnalyzer::checkMissingAriaLabels\",\n          \"execCount\": \"51\",\n          \"execTime\": \"0.04710557000000001\",\n          \"execTimeFormatted\": \"47 ms\"\n        },\n        {\n          \"classAndMethod\": \"AccessibilityAnalyzer::checkMissingLabels\",\n          \"execCount\": \"51\",\n          \"execTime\": \"0.043233411\",\n          \"execTimeFormatted\": \"43 ms\"\n        },\n        {\n          \"classAndMethod\": \"AccessibilityAnalyzer::checkMissingRoles\",\n          \"execCount\": \"51\",\n          \"execTime\": \"0.040707958000000016\",\n          \"execTimeFormatted\": \"40 ms\"\n        },\n        {\n          \"classAndMethod\": \"BestPracticeAnalyzer::checkMaxDOMDepth\",\n          \"execCount\": \"55\",\n          \"execTime\": \"0.03819707099999999\",\n          \"execTimeFormatted\": \"38 ms\"\n        },\n        {\n          \"classAndMethod\": \"AccessibilityAnalyzer::checkMissingLang\",\n          \"execCount\": \"51\",\n          \"execTime\": \"0.03680993700000001\",\n          \"execTimeFormatted\": \"36 ms\"\n        },\n        {\n          \"classAndMethod\": \"BestPracticeAnalyzer::checkNonClickablePhoneNumbers\",\n          \"execCount\": \"55\",\n          \"execTime\": \"0.025210428\",\n          \"execTimeFormatted\": \"25 ms\"\n        },\n        {\n          \"classAndMethod\": \"BestPracticeAnalyzer::checkInlineSvg\",\n          \"execCount\": \"55\",\n          \"execTime\": \"0.012483105000000001\",\n          \"execTimeFormatted\": \"12 ms\"\n        },\n        {\n          \"classAndMethod\": \"BestPracticeAnalyzer::checkMissingQuotesOnAttributes\",\n          \"execCount\": \"55\",\n          \"execTime\": \"0.003967633999999999\",\n          \"execTimeFormatted\": \"3 ms\"\n        },\n        {\n          \"classAndMethod\": \"SeoAndOpenGraphAnalyzer::analyzeHeadings\",\n          \"execCount\": \"1\",\n          \"execTime\": \"0.002403547\",\n          \"execTimeFormatted\": \"2 ms\"\n        },\n        {\n          \"classAndMethod\": \"SecurityAnalyzer::checkHtmlSecurity\",\n          \"execCount\": \"54\",\n          \"execTime\": \"0.0019036469999999996\",\n          \"execTimeFormatted\": \"1 ms\"\n        },\n        {\n          \"classAndMethod\": \"AccessibilityAnalyzer::checkImageAltAttributes\",\n          \"execCount\": \"51\",\n          \"execTime\": \"0.0015152590000000004\",\n          \"execTimeFormatted\": \"1 ms\"\n        },\n        {\n          \"classAndMethod\": \"SecurityAnalyzer::checkHeaders\",\n          \"execCount\": \"54\",\n          \"execTime\": \"0.000987299\",\n          \"execTimeFormatted\": \"0 ms\"\n        },\n        {\n          \"classAndMethod\": \"SeoAndOpenGraphAnalyzer::analyzeSeo\",\n          \"execCount\": \"1\",\n          \"execTime\": \"0.000208111\",\n          \"execTimeFormatted\": \"0 ms\"\n        },\n        {\n          \"classAndMethod\": \"SeoAndOpenGraphAnalyzer::analyzeOpenGraph\",\n          \"execCount\": \"1\",\n          \"execTime\": \"0.000155641\",\n          \"execTimeFormatted\": \"0 ms\"\n        },\n        {\n          \"classAndMethod\": \"BestPracticeAnalyzer::checkTitleUniqueness\",\n          \"execCount\": \"1\",\n          \"execTime\": \"0.000021892\",\n          \"execTimeFormatted\": \"0 ms\"\n        },\n        {\n          \"classAndMethod\": \"BestPracticeAnalyzer::checkMetaDescriptionUniqueness\",\n          \"execCount\": \"1\",\n          \"execTime\": \"0.000020891\",\n          \"execTimeFormatted\": \"0 ms\"\n        },\n        {\n          \"classAndMethod\": \"BestPracticeAnalyzer::checkBrotliSupport\",\n          \"execCount\": \"1\",\n          \"execTime\": \"0.000001541\",\n          \"execTimeFormatted\": \"0 ms\"\n        },\n        {\n          \"classAndMethod\": \"BestPracticeAnalyzer::checkWebpSupport\",\n          \"execCount\": \"1\",\n          \"execTime\": \"0.00000104\",\n          \"execTimeFormatted\": \"0 ms\"\n        },\n        {\n          \"classAndMethod\": \"BestPracticeAnalyzer::checkAvifSupport\",\n          \"execCount\": \"1\",\n          \"execTime\": \"0.000000961\",\n          \"execTimeFormatted\": \"0 ms\"\n        }\n      ],\n      \"title\": \"Analysis stats\"\n    },\n    \"best-practices\": {\n      \"aplCode\": \"best-practices\",\n      \"columns\": {\n        \"analysisName\": {\n          \"aplCode\": \"analysisName\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"Analysis name\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"critical\": {\n          \"aplCode\": \"critical\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Critical\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 8\n        },\n        \"notice\": {\n          \"aplCode\": \"notice\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Notice\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 6\n        },\n        \"ok\": {\n          \"aplCode\": \"ok\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"OK\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 5\n        },\n        \"warning\": {\n          \"aplCode\": \"warning\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Warning\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 7\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"analysisName\": \"Invalid inline SVGs\",\n          \"critical\": \"0\",\n          \"notice\": \"0\",\n          \"ok\": \"34\",\n          \"warning\": \"0\"\n        },\n        {\n          \"analysisName\": \"DOM depth\",\n          \"critical\": \"0\",\n          \"notice\": \"0\",\n          \"ok\": \"55\",\n          \"warning\": \"0\"\n        },\n        {\n          \"analysisName\": \"Duplicate inline SVGs\",\n          \"critical\": \"0\",\n          \"notice\": \"0\",\n          \"ok\": \"34\",\n          \"warning\": \"0\"\n        },\n        {\n          \"analysisName\": \"Large inline SVGs\",\n          \"critical\": \"0\",\n          \"notice\": \"0\",\n          \"ok\": \"34\",\n          \"warning\": \"0\"\n        },\n        {\n          \"analysisName\": \"Heading structure\",\n          \"critical\": \"0\",\n          \"notice\": \"0\",\n          \"ok\": \"56\",\n          \"warning\": \"54\"\n        },\n        {\n          \"analysisName\": \"Title uniqueness\",\n          \"critical\": \"0\",\n          \"notice\": \"0\",\n          \"ok\": \"51\",\n          \"warning\": \"0\"\n        },\n        {\n          \"analysisName\": \"Description uniqueness\",\n          \"critical\": \"0\",\n          \"notice\": \"0\",\n          \"ok\": \"50\",\n          \"warning\": \"0\"\n        },\n        {\n          \"analysisName\": \"Brotli support\",\n          \"critical\": \"0\",\n          \"notice\": \"0\",\n          \"ok\": \"0\",\n          \"warning\": \"51\"\n        },\n        {\n          \"analysisName\": \"WebP support\",\n          \"critical\": \"0\",\n          \"notice\": \"0\",\n          \"ok\": \"2\",\n          \"warning\": \"0\"\n        },\n        {\n          \"analysisName\": \"AVIF support\",\n          \"critical\": \"0\",\n          \"notice\": \"0\",\n          \"ok\": \"2\",\n          \"warning\": \"0\"\n        }\n      ],\n      \"title\": \"Best practices\"\n    },\n    \"caching-per-content-type\": {\n      \"aplCode\": \"caching-per-content-type\",\n      \"columns\": {\n        \"avgLifetime\": {\n          \"aplCode\": \"avgLifetime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"AVG lifetime\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        },\n        \"cacheType\": {\n          \"aplCode\": \"cacheType\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Cache type\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 12\n        },\n        \"contentType\": {\n          \"aplCode\": \"contentType\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Content type\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 12\n        },\n        \"count\": {\n          \"aplCode\": \"count\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"URLs\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 5\n        },\n        \"maxLifetime\": {\n          \"aplCode\": \"maxLifetime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"MAX lifetime\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        },\n        \"minLifetime\": {\n          \"aplCode\": \"minLifetime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"MIN lifetime\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"avgLifetime\": \"3600\",\n          \"cacheType\": \"Cache-Control + ETag + Last-Modified\",\n          \"contentType\": \"HTML\",\n          \"count\": \"51\",\n          \"maxLifetime\": \"3600\",\n          \"minLifetime\": \"3600\"\n        },\n        {\n          \"avgLifetime\": \"31536000\",\n          \"cacheType\": \"Cache-Control + ETag + Last-Modified\",\n          \"contentType\": \"Image\",\n          \"count\": \"9\",\n          \"maxLifetime\": \"31536000\",\n          \"minLifetime\": \"31536000\"\n        },\n        {\n          \"avgLifetime\": \"31536000\",\n          \"cacheType\": \"Cache-Control + ETag + Last-Modified\",\n          \"contentType\": \"JS\",\n          \"count\": \"5\",\n          \"maxLifetime\": \"31536000\",\n          \"minLifetime\": \"31536000\"\n        },\n        {\n          \"avgLifetime\": \"\",\n          \"cacheType\": \"ETag\",\n          \"contentType\": \"HTML\",\n          \"count\": \"4\",\n          \"maxLifetime\": \"\",\n          \"minLifetime\": \"\"\n        },\n        {\n          \"avgLifetime\": \"31536000\",\n          \"cacheType\": \"Cache-Control + ETag + Last-Modified\",\n          \"contentType\": \"CSS\",\n          \"count\": \"3\",\n          \"maxLifetime\": \"31536000\",\n          \"minLifetime\": \"31536000\"\n        },\n        {\n          \"avgLifetime\": \"3600\",\n          \"cacheType\": \"Cache-Control + ETag + Last-Modified\",\n          \"contentType\": \"Document\",\n          \"count\": \"1\",\n          \"maxLifetime\": \"3600\",\n          \"minLifetime\": \"3600\"\n        }\n      ],\n      \"title\": \"HTTP Caching by content type (only from crawlable domains)\"\n    },\n    \"caching-per-domain\": {\n      \"aplCode\": \"caching-per-domain\",\n      \"columns\": {\n        \"avgLifetime\": {\n          \"aplCode\": \"avgLifetime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"AVG lifetime\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        },\n        \"cacheType\": {\n          \"aplCode\": \"cacheType\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Cache type\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 12\n        },\n        \"count\": {\n          \"aplCode\": \"count\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"URLs\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 5\n        },\n        \"domain\": {\n          \"aplCode\": \"domain\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Domain\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 20\n        },\n        \"maxLifetime\": {\n          \"aplCode\": \"maxLifetime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"MAX lifetime\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        },\n        \"minLifetime\": {\n          \"aplCode\": \"minLifetime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"MIN lifetime\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"avgLifetime\": \"7772452\",\n          \"cacheType\": \"Cache-Control + ETag + Last-Modified\",\n          \"count\": \"69\",\n          \"domain\": \"crawler.siteone.io\",\n          \"maxLifetime\": \"31536000\",\n          \"minLifetime\": \"3600\"\n        },\n        {\n          \"avgLifetime\": \"\",\n          \"cacheType\": \"ETag\",\n          \"count\": \"4\",\n          \"domain\": \"crawler.siteone.io\",\n          \"maxLifetime\": \"\",\n          \"minLifetime\": \"\"\n        }\n      ],\n      \"title\": \"HTTP Caching by domain\"\n    },\n    \"caching-per-domain-and-content-type\": {\n      \"aplCode\": \"caching-per-domain-and-content-type\",\n      \"columns\": {\n        \"avgLifetime\": {\n          \"aplCode\": \"avgLifetime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"AVG lifetime\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        },\n        \"cacheType\": {\n          \"aplCode\": \"cacheType\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Cache type\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 12\n        },\n        \"contentType\": {\n          \"aplCode\": \"contentType\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Content type\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 12\n        },\n        \"count\": {\n          \"aplCode\": \"count\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"URLs\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 5\n        },\n        \"domain\": {\n          \"aplCode\": \"domain\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Domain\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 20\n        },\n        \"maxLifetime\": {\n          \"aplCode\": \"maxLifetime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"MAX lifetime\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        },\n        \"minLifetime\": {\n          \"aplCode\": \"minLifetime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"MIN lifetime\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"avgLifetime\": \"3600\",\n          \"cacheType\": \"Cache-Control + ETag + Last-Modified\",\n          \"contentType\": \"HTML\",\n          \"count\": \"51\",\n          \"domain\": \"crawler.siteone.io\",\n          \"maxLifetime\": \"3600\",\n          \"minLifetime\": \"3600\"\n        },\n        {\n          \"avgLifetime\": \"31536000\",\n          \"cacheType\": \"Cache-Control + ETag + Last-Modified\",\n          \"contentType\": \"Image\",\n          \"count\": \"9\",\n          \"domain\": \"crawler.siteone.io\",\n          \"maxLifetime\": \"31536000\",\n          \"minLifetime\": \"31536000\"\n        },\n        {\n          \"avgLifetime\": \"31536000\",\n          \"cacheType\": \"Cache-Control + ETag + Last-Modified\",\n          \"contentType\": \"JS\",\n          \"count\": \"5\",\n          \"domain\": \"crawler.siteone.io\",\n          \"maxLifetime\": \"31536000\",\n          \"minLifetime\": \"31536000\"\n        },\n        {\n          \"avgLifetime\": \"\",\n          \"cacheType\": \"ETag\",\n          \"contentType\": \"HTML\",\n          \"count\": \"4\",\n          \"domain\": \"crawler.siteone.io\",\n          \"maxLifetime\": \"\",\n          \"minLifetime\": \"\"\n        },\n        {\n          \"avgLifetime\": \"31536000\",\n          \"cacheType\": \"Cache-Control + ETag + Last-Modified\",\n          \"contentType\": \"CSS\",\n          \"count\": \"3\",\n          \"domain\": \"crawler.siteone.io\",\n          \"maxLifetime\": \"31536000\",\n          \"minLifetime\": \"31536000\"\n        },\n        {\n          \"avgLifetime\": \"3600\",\n          \"cacheType\": \"Cache-Control + ETag + Last-Modified\",\n          \"contentType\": \"Document\",\n          \"count\": \"1\",\n          \"domain\": \"crawler.siteone.io\",\n          \"maxLifetime\": \"3600\",\n          \"minLifetime\": \"3600\"\n        }\n      ],\n      \"title\": \"HTTP Caching by domain and content type\"\n    },\n    \"certificate-info\": {\n      \"aplCode\": \"certificate-info\",\n      \"columns\": {\n        \"info\": {\n          \"aplCode\": \"info\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Info\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"value\": {\n          \"aplCode\": \"value\",\n          \"escapeOutputHtml\": false,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"Text\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 108\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"info\": \"Issuer\",\n          \"value\": \"C = BE, O = GlobalSign nv-sa, CN = GlobalSign GCC R6 AlphaSSL CA 2025\"\n        },\n        {\n          \"info\": \"Subject\",\n          \"value\": \"CN = *.siteone.io\"\n        },\n        {\n          \"info\": \"Valid from\",\n          \"value\": \"Feb  9 15:43:30 2026 GMT (VALID already 35 day(s))\"\n        },\n        {\n          \"info\": \"Valid to\",\n          \"value\": \"Mar 13 15:43:29 2027 GMT (VALID still for 362 day(s))\"\n        },\n        {\n          \"info\": \"Supported protocols\",\n          \"value\": \"TLSv1.2\"\n        },\n        {\n          \"info\": \"RAW certificate output\",\n          \"value\": \"Certificate:\\n    Data:\\n        Version: 3 (0x2)\\n        Serial Number:\\n            3a:5e:e1:92:b3:18:b1:0c:8a:ff:d6:d6\\n        Signature Algorithm: sha256WithRSAEncryption\\n        Issuer: C = BE, O = GlobalSign nv-sa, CN = GlobalSign GCC R6 AlphaSSL CA 2025\\n        Validity\\n            Not Before: Feb  9 15:43:30 2026 GMT\\n            Not After : Mar 13 15:43:29 2027 GMT\\n        Subject: CN = *.siteone.io\\n        Subject Public Key Info:\\n            Public Key Algorithm: id-ecPublicKey\\n                Public-Key: (256 bit)\\n                pub:\\n                    04:6f:74:08:f9:5a:a7:0c:ff:69:30:7f:15:12:90:\\n                    9e:91:8e:80:3e:12:2b:cf:26:69:03:42:42:88:bc:\\n                    b5:3d:4d:73:76:90:bb:39:30:cd:fd:82:79:c3:88:\\n                    1f:d9:40:06:74:04:37:a5:48:86:2b:d7:ef:cb:b6:\\n                    f1:9f:39:7f:9a\\n                ASN1 OID: prime256v1\\n                NIST CURVE: P-256\\n        X509v3 extensions:\\n            X509v3 Key Usage: critical\\n                Digital Signature\\n            X509v3 Basic Constraints: critical\\n                CA:FALSE\\n            Authority Information Access: \\n                CA Issuers - URI:http://secure.globalsign.com/cacert/gsgccr6alphasslca2025.crt\\n                OCSP - URI:http://ocsp.globalsign.com/gsgccr6alphasslca2025\\n            X509v3 Certificate Policies: \\n                Policy: 2.23.140.1.2.1\\n                Policy: 1.3.6.1.4.1.4146.10.1.3\\n                  CPS: https://www.globalsign.com/repository/\\n            X509v3 CRL Distribution Points: \\n                Full Name:\\n                  URI:http://crl.globalsign.com/gsgccr6alphasslca2025.crl\\n            X509v3 Subject Alternative Name: \\n                DNS:*.siteone.io, DNS:siteone.io\\n            X509v3 Extended Key Usage: \\n                TLS Web Server Authentication, TLS Web Client Authentication\\n            X509v3 Authority Key Identifier: \\n                C5:B4:93:8F:6F:2B:DC:1E:48:BF:B7:10:30:85:CE:D1:B2:BB:48:2D\\n            X509v3 Subject Key Identifier: \\n                C7:E1:4D:93:BD:A2:18:AA:F4:FF:3B:F0:8D:7F:7F:8D:2E:C0:2F:C4\\n            CT Precertificate SCTs: \\n                Signed Certificate Timestamp:\\n                    Version   : v1 (0x0)\\n                    Log ID    : 1C:9F:68:2C:E9:FA:F0:45:69:50:F8:1B:96:8A:87:DD:\\n                                DB:32:10:D8:4C:E6:C8:B2:E3:82:52:4A:C4:CF:59:9F\\n                    Timestamp : Feb  9 15:43:33.998 2026 GMT\\n                    Extensions: none\\n                    Signature : ecdsa-with-SHA256\\n                                30:45:02:21:00:B9:2C:BE:99:33:6A:9A:E2:6B:0F:45:\\n                                21:1D:61:57:A1:44:E0:2F:1A:97:8F:6E:B2:20:90:EA:\\n                                29:C6:8E:2B:B0:02:20:68:88:48:D5:DB:91:05:B6:BD:\\n                                C3:8A:BB:B4:4B:06:AF:86:6A:C0:14:47:9C:F4:49:51:\\n                                04:A0:7E:C7:24:43:84\\n                Signed Certificate Timestamp:\\n                    Version   : v1 (0x0)\\n                    Log ID    : 8E:CA:47:0B:AC:DE:6A:F3:A2:06:B0:A4:7A:84:B7:46:\\n                                FE:1F:C6:BF:95:3E:25:E6:9B:4E:E4:02:48:F3:C6:E8\\n                    Timestamp : Feb  9 15:43:33.773 2026 GMT\\n                    Extensions: 00:00:05:00:02:87:F7:42\\n                    Signature : ecdsa-with-SHA256\\n                                30:46:02:21:00:F8:9D:85:F9:39:A8:45:BA:B7:E5:66:\\n                                0E:F4:30:96:25:E1:DC:68:8A:27:FD:50:09:CB:B1:E8:\\n                                7B:62:AC:81:43:02:21:00:D0:8C:58:9B:E9:33:B2:69:\\n                                98:2F:67:63:92:09:A8:BA:28:86:91:1D:B8:1A:AA:76:\\n                                24:11:25:57:95:AC:E4:6C\\n                Signed Certificate Timestamp:\\n                    Version   : v1 (0x0)\\n                    Log ID    : 4C:63:DC:98:E5:9C:1D:AB:88:F6:1E:8A:3D:DE:AE:8F:\\n                                AB:44:A3:37:7B:5F:9B:94:C3:FB:A1:9C:FC:C1:BE:26\\n                    Timestamp : Feb  9 15:43:33.289 2026 GMT\\n                    Extensions: none\\n                    Signature : ecdsa-with-SHA256\\n                                30:44:02:20:69:87:55:72:C8:42:25:16:96:0D:84:9F:\\n                                59:E8:76:38:6C:4E:F3:7B:B3:F9:A6:B9:01:13:C6:61:\\n                                85:D3:2F:E8:02:20:0A:AE:F2:AE:5E:AA:7D:A9:92:96:\\n                                06:DE:1A:A5:39:26:CE:EB:74:F9:C5:12:7F:DC:9F:5B:\\n                                A9:AD:4A:0C:33:81\\n    Signature Algorithm: sha256WithRSAEncryption\\n    Signature Value:\\n        54:38:06:65:1b:89:af:97:d5:b5:ab:88:03:49:99:7c:0f:fa:\\n        99:d2:cf:6d:01:a3:eb:67:9f:f6:d2:0f:f2:18:41:4f:78:93:\\n        6a:b8:c8:65:4c:09:b4:30:fb:51:45:4e:26:a2:0c:cf:d9:71:\\n        e4:5c:ab:29:20:87:ad:87:49:03:22:28:0b:aa:67:89:0a:99:\\n        e1:05:27:ae:c2:80:7d:d1:5e:16:95:a6:9c:3e:d3:4c:4f:d0:\\n        70:4a:a4:d6:b5:a4:7f:ba:be:d1:91:37:d3:41:63:c7:7d:34:\\n        de:d8:a1:12:1d:af:0b:2d:c3:49:be:6b:7c:46:cc:b2:f1:5d:\\n        e3:f8:d2:ed:0e:49:bf:50:e7:c8:c8:06:4b:2f:8f:ea:d4:f7:\\n        1b:42:10:b1:c6:d7:74:42:01:6e:57:ec:46:b9:73:e7:72:4d:\\n        f9:66:a9:e3:2a:2e:f1:75:7c:34:19:e0:4d:83:8d:e2:df:a6:\\n        49:54:84:3a:70:f0:55:ea:94:de:8c:4f:e7:cc:27:1d:2a:39:\\n        dd:45:ed:a7:67:f9:fe:42:d1:8e:dd:99:67:41:1a:13:e8:0f:\\n        be:66:ec:28:51:44:6f:f9:4b:81:45:5f:20:99:98:a2:88:40:\\n        64:e7:86:dd:4f:56:91:5e:d3:b7:91:69:e7:d2:7c:16:53:e8:\\n        91:24:48:5b\\n\"\n        },\n        {\n          \"info\": \"RAW protocols output\",\n          \"value\": \"\\n=== ssl2 ===\\ns_client: Unknown option: -ssl2\\ns_client: Use -help for summary.\\n\\n=== ssl3 ===\\ns_client: Unknown option: -ssl3\\ns_client: Use -help for summary.\\n\\n=== tls1 ===\\n40A73279927E0000:error:0A00042E:SSL routines:ssl3_read_bytes:tlsv1 alert protocol version:ssl/record/rec_layer_s3.c:1605:SSL alert number 70\\nCONNECTED(00000003)\\n---\\nno peer certificate available\\n---\\nNo client certificate CA names sent\\n---\\nSSL handshake has read 7 bytes and written 131 bytes\\nVerification: OK\\n---\\nNew, (NONE), Cipher is (NONE)\\nSecure Renegotiation IS NOT supported\\nCompression: NONE\\nExpansion: NONE\\nNo ALPN negotiated\\nSSL-Session:\\n    Protocol  : TLSv1\\n    Cipher    : 0000\\n    Session-ID: \\n    Session-ID-ctx: \\n    Master-Key: \\n    PSK identity: None\\n    PSK identity hint: None\\n    SRP username: None\\n    Start Time: 1773672922\\n    Timeout   : 7200 (sec)\\n    Verify return code: 0 (ok)\\n    Extended master secret: no\\n---\\n\\n=== tls1_1 ===\\n4097DCBCC9700000:error:0A00042E:SSL routines:ssl3_read_bytes:tlsv1 alert protocol version:ssl/record/rec_layer_s3.c:1605:SSL alert number 70\\nCONNECTED(00000003)\\n---\\nno peer certificate available\\n---\\nNo client certificate CA names sent\\n---\\nSSL handshake has read 7 bytes and written 131 bytes\\nVerification: OK\\n---\\nNew, (NONE), Cipher is (NONE)\\nSecure Renegotiation IS NOT supported\\nCompression: NONE\\nExpansion: NONE\\nNo ALPN negotiated\\nSSL-Session:\\n    Protocol  : TLSv1.1\\n    Cipher    : 0000\\n    Session-ID: \\n    Session-ID-ctx: \\n    Master-Key: \\n    PSK identity: None\\n    PSK identity hint: None\\n    SRP username: None\\n    Start Time: 1773672922\\n    Timeout   : 7200 (sec)\\n    Verify return code: 0 (ok)\\n    Extended master secret: no\\n---\\n\\n=== tls1_2 ===\\ndepth=2 OU = GlobalSign Root CA - R6, O = GlobalSign, CN = GlobalSign\\nverify return:1\\ndepth=1 C = BE, O = GlobalSign nv-sa, CN = GlobalSign GCC R6 AlphaSSL CA 2025\\nverify return:1\\ndepth=0 CN = *.siteone.io\\nverify return:1\\nCONNECTED(00000003)\\n---\\nCertificate chain\\n 0 s:CN = *.siteone.io\\n   i:C = BE, O = GlobalSign nv-sa, CN = GlobalSign GCC R6 AlphaSSL CA 2025\\n   a:PKEY: id-ecPublicKey, 256 (bit); sigalg: RSA-SHA256\\n   v:NotBefore: Feb  9 15:43:30 2026 GMT; NotAfter: Mar 13 15:43:29 2027 GMT\\n 1 s:C = BE, O = GlobalSign nv-sa, CN = GlobalSign GCC R6 AlphaSSL CA 2025\\n   i:OU = GlobalSign Root CA - R6, O = GlobalSign, CN = GlobalSign\\n   a:PKEY: rsaEncryption, 2048 (bit); sigalg: RSA-SHA256\\n   v:NotBefore: May 21 02:36:52 2025 GMT; NotAfter: May 21 00:00:00 2027 GMT\\n---\\nServer certificate\\n-----BEGIN CERTIFICATE-----\\nMIIFlDCCBHygAwIBAgIMOl7hkrMYsQyK/9bWMA0GCSqGSIb3DQEBCwUAMFUxCzAJ\\nBgNVBAYTAkJFMRkwFwYDVQQKExBHbG9iYWxTaWduIG52LXNhMSswKQYDVQQDEyJH\\nbG9iYWxTaWduIEdDQyBSNiBBbHBoYVNTTCBDQSAyMDI1MB4XDTI2MDIwOTE1NDMz\\nMFoXDTI3MDMxMzE1NDMyOVowFzEVMBMGA1UEAwwMKi5zaXRlb25lLmlvMFkwEwYH\\nKoZIzj0CAQYIKoZIzj0DAQcDQgAEb3QI+VqnDP9pMH8VEpCekY6APhIrzyZpA0JC\\niLy1PU1zdpC7OTDN/YJ5w4gf2UAGdAQ3pUiGK9fvy7bxnzl/mqOCA2swggNnMA4G\\nA1UdDwEB/wQEAwIHgDAMBgNVHRMBAf8EAjAAMIGZBggrBgEFBQcBAQSBjDCBiTBJ\\nBggrBgEFBQcwAoY9aHR0cDovL3NlY3VyZS5nbG9iYWxzaWduLmNvbS9jYWNlcnQv\\nZ3NnY2NyNmFscGhhc3NsY2EyMDI1LmNydDA8BggrBgEFBQcwAYYwaHR0cDovL29j\\nc3AuZ2xvYmFsc2lnbi5jb20vZ3NnY2NyNmFscGhhc3NsY2EyMDI1MFcGA1UdIARQ\\nME4wCAYGZ4EMAQIBMEIGCisGAQQBoDIKAQMwNDAyBggrBgEFBQcCARYmaHR0cHM6\\nLy93d3cuZ2xvYmFsc2lnbi5jb20vcmVwb3NpdG9yeS8wRAYDVR0fBD0wOzA5oDeg\\nNYYzaHR0cDovL2NybC5nbG9iYWxzaWduLmNvbS9nc2djY3I2YWxwaGFzc2xjYTIw\\nMjUuY3JsMCMGA1UdEQQcMBqCDCouc2l0ZW9uZS5pb4IKc2l0ZW9uZS5pbzAdBgNV\\nHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwHwYDVR0jBBgwFoAUxbSTj28r3B5I\\nv7cQMIXO0bK7SC0wHQYDVR0OBBYEFMfhTZO9ohiq9P878I1/f40uwC/EMIIBhgYK\\nKwYBBAHWeQIEAgSCAXYEggFyAXAAdgAcn2gs6frwRWlQ+BuWiofd2zIQ2EzmyLLj\\nglJKxM9ZnwAAAZxDEohuAAAEAwBHMEUCIQC5LL6ZM2qa4msPRSEdYVehROAvGpeP\\nbrIgkOopxo4rsAIgaIhI1duRBba9w4q7tEsGr4ZqwBRHnPRJUQSgfsckQ4QAfwCO\\nykcLrN5q86IGsKR6hLdG/h/Gv5U+JeabTuQCSPPG6AAAAZxDEoeNAAgAAAUAAof3\\nQgQDAEgwRgIhAPidhfk5qEW6t+VmDvQwliXh3GiKJ/1QCcux6HtirIFDAiEA0IxY\\nm+kzsmmYL2djkgmouiiGkR24Gqp2JBElV5Ws5GwAdQBMY9yY5Zwdq4j2Hoo93q6P\\nq0SjN3tfm5TD+6Gc/MG+JgAAAZxDEoWpAAAEAwBGMEQCIGmHVXLIQiUWlg2En1no\\ndjhsTvN7s/mmuQETxmGF0y/oAiAKrvKuXqp9qZKWBt4apTkmzut0+cUSf9yfW6mt\\nSgwzgTANBgkqhkiG9w0BAQsFAAOCAQEAVDgGZRuJr5fVtauIA0mZfA/6mdLPbQGj\\n62ef9tIP8hhBT3iTarjIZUwJtDD7UUVOJqIMz9lx5FyrKSCHrYdJAyIoC6pniQqZ\\n4QUnrsKAfdFeFpWmnD7TTE/QcEqk1rWkf7q+0ZE300Fjx3003tihEh2vCy3DSb5r\\nfEbMsvFd4/jS7Q5Jv1DnyMgGSy+P6tT3G0IQscbXdEIBblfsRrlz53JN+Wap4you\\n8XV8NBngTYON4t+mSVSEOnDwVeqU3oxP58wnHSo53UXtp2f5/kLRjt2ZZ0EaE+gP\\nvmbsKFFEb/lLgUVfIJmYoohAZOeG3U9WkV7Tt5Fp59J8FlPokSRIWw==\\n-----END CERTIFICATE-----\\nsubject=CN = *.siteone.io\\nissuer=C = BE, O = GlobalSign nv-sa, CN = GlobalSign GCC R6 AlphaSSL CA 2025\\n---\\nNo client certificate CA names sent\\nPeer signing digest: SHA256\\nPeer signature type: ECDSA\\nServer Temp Key: X25519, 253 bits\\n---\\nSSL handshake has read 3352 bytes and written 308 bytes\\nVerification: OK\\n---\\nNew, TLSv1.2, Cipher is ECDHE-ECDSA-AES256-GCM-SHA384\\nServer public key is 256 bit\\nSecure Renegotiation IS supported\\nCompression: NONE\\nExpansion: NONE\\nNo ALPN negotiated\\nSSL-Session:\\n    Protocol  : TLSv1.2\\n    Cipher    : ECDHE-ECDSA-AES256-GCM-SHA384\\n    Session-ID: 19B120BD7161E7019CEDF16CB4797E62F7446CD3EDEFFEDB608BF413921B4A7B\\n    Session-ID-ctx: \\n    Master-Key: 328F39D262554060A1EA39DF60ADF39266874AD839492BC4015B4C5549D01B077F950819D7E595B0A467ED00490F61A9\\n    PSK identity: None\\n    PSK identity hint: None\\n    SRP username: None\\n    TLS session ticket lifetime hint: 600 (seconds)\\n    TLS session ticket:\\n    0000 - 1b c2 18 67 f0 52 b2 61-ea 4f a2 db 95 f6 e4 91   ...g.R.a.O......\\n    0010 - dd ec bc 83 82 0a 46 a5-6d a2 2c 4b bf 49 90 5c   ......F.m.,K.I.\\\\\\n    0020 - 3c 3a b5 c6 01 db a3 31-24 02 83 c6 7e b1 94 91   <:.....1$...~...\\n    0030 - 35 94 26 12 fc 24 bb 55-74 0e b9 cd a8 9d 55 8c   5.&..$.Ut.....U.\\n    0040 - ac 2b d1 28 f3 4d 5f c3-f9 84 a5 24 99 ce 1f 32   .+.(.M_....$...2\\n    0050 - 25 bb 63 49 33 55 3b 5e-74 fd 4e 76 f7 94 e8 52   %.cI3U;^t.Nv...R\\n    0060 - f5 a1 ad 53 04 2e 13 c0-60 4d 6e fb 70 e4 a0 07   ...S....`Mn.p...\\n    0070 - 4e 16 ab 18 f1 9e 43 92-9f a5 0c ad c8 6c 42 50   N.....C......lBP\\n    0080 - da 1f cf a5 47 e2 38 3a-03 3f d6 a9 5b d5 e0 79   ....G.8:.?..[..y\\n    0090 - 7e 49 1f 9c 5c 9d b0 f9-70 63 ff 19 9e 05 ff 35   ~I..\\\\...pc.....5\\n    00a0 - 6b a4 e0 88 60 92 65 cf-88 19 db 4f d8 92 10 5f   k...`.e....O..._\\n    00b0 - f9 e0 3b d7 d1 c0 91 9d-ee 4e f9 31 1d a2 cd 0b   ..;......N.1....\\n    00c0 - a2 dd ab 49 63 e5 38 40-a0 00 72 54 fe f3 e8 c8   ...Ic.8@..rT....\\n\\n    Start Time: 1773672922\\n    Timeout   : 7200 (sec)\\n    Verify return code: 0 (ok)\\n    Extended master secret: yes\\n---\\nDONE\\n\\n=== tls1_3 ===\\n401761A9FC7D0000:error:0A00042E:SSL routines:ssl3_read_bytes:tlsv1 alert protocol version:ssl/record/rec_layer_s3.c:1605:SSL alert number 70\\nCONNECTED(00000003)\\n---\\nno peer certificate available\\n---\\nNo client certificate CA names sent\\n---\\nSSL handshake has read 7 bytes and written 252 bytes\\nVerification: OK\\n---\\nNew, (NONE), Cipher is (NONE)\\nSecure Renegotiation IS NOT supported\\nCompression: NONE\\nExpansion: NONE\\nNo ALPN negotiated\\nEarly data was not sent\\nVerify return code: 0 (ok)\\n---\\n\"\n        }\n      ],\n      \"title\": \"SSL/TLS info\"\n    },\n    \"content-processors-stats\": {\n      \"aplCode\": \"content-processors-stats\",\n      \"columns\": {\n        \"classAndMethod\": {\n          \"aplCode\": \"classAndMethod\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Class::method\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"execCount\": {\n          \"aplCode\": \"execCount\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Exec count\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"execTime\": {\n          \"aplCode\": \"execTime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Exec time\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 9\n        }\n      },\n      \"position\": \"after-url-table\",\n      \"rows\": [\n        {\n          \"classAndMethod\": \"HtmlProcessor::findUrls\",\n          \"execCount\": \"55\",\n          \"execTime\": \"0.04634062399999998\",\n          \"execTimeFormatted\": \"46 ms\"\n        },\n        {\n          \"classAndMethod\": \"NextJsProcessor::applyContentChangesBeforeUrlParsing\",\n          \"execCount\": \"63\",\n          \"execTime\": \"0.010084196\",\n          \"execTimeFormatted\": \"10 ms\"\n        },\n        {\n          \"classAndMethod\": \"JavaScriptProcessor::findUrls\",\n          \"execCount\": \"60\",\n          \"execTime\": \"0.008742057000000001\",\n          \"execTimeFormatted\": \"8 ms\"\n        },\n        {\n          \"classAndMethod\": \"AstroProcessor::findUrls\",\n          \"execCount\": \"60\",\n          \"execTime\": \"0.001524671000000001\",\n          \"execTimeFormatted\": \"1 ms\"\n        },\n        {\n          \"classAndMethod\": \"CssProcessor::findUrls\",\n          \"execCount\": \"58\",\n          \"execTime\": \"0.001067747\",\n          \"execTimeFormatted\": \"1 ms\"\n        },\n        {\n          \"classAndMethod\": \"AstroProcessor::applyContentChangesBeforeUrlParsing\",\n          \"execCount\": \"60\",\n          \"execTime\": \"0.000086117\",\n          \"execTimeFormatted\": \"0 ms\"\n        },\n        {\n          \"classAndMethod\": \"NextJsProcessor::findUrls\",\n          \"execCount\": \"63\",\n          \"execTime\": \"0.000026580999999999995\",\n          \"execTimeFormatted\": \"0 ms\"\n        },\n        {\n          \"classAndMethod\": \"JavaScriptProcessor::applyContentChangesBeforeUrlParsing\",\n          \"execCount\": \"60\",\n          \"execTime\": \"0.000020106999999999992\",\n          \"execTimeFormatted\": \"0 ms\"\n        },\n        {\n          \"classAndMethod\": \"CssProcessor::applyContentChangesBeforeUrlParsing\",\n          \"execCount\": \"58\",\n          \"execTime\": \"0.0000033529999999999995\",\n          \"execTimeFormatted\": \"0 ms\"\n        },\n        {\n          \"classAndMethod\": \"SvelteProcessor::applyContentChangesBeforeUrlParsing\",\n          \"execCount\": \"55\",\n          \"execTime\": \"0.0000025889999999999997\",\n          \"execTimeFormatted\": \"0 ms\"\n        },\n        {\n          \"classAndMethod\": \"HtmlProcessor::applyContentChangesBeforeUrlParsing\",\n          \"execCount\": \"55\",\n          \"execTime\": \"0.000002129\",\n          \"execTimeFormatted\": \"0 ms\"\n        },\n        {\n          \"classAndMethod\": \"SvelteProcessor::findUrls\",\n          \"execCount\": \"55\",\n          \"execTime\": \"0.0000020479999999999997\",\n          \"execTimeFormatted\": \"0 ms\"\n        }\n      ],\n      \"title\": \"Content processor stats\"\n    },\n    \"content-types\": {\n      \"aplCode\": \"content-types\",\n      \"columns\": {\n        \"avgTime\": {\n          \"aplCode\": \"avgTime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Avg time\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 8\n        },\n        \"contentType\": {\n          \"aplCode\": \"contentType\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Content type\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 12\n        },\n        \"count\": {\n          \"aplCode\": \"count\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"URLs\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 5\n        },\n        \"status20x\": {\n          \"aplCode\": \"status20x\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Status 20x\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        },\n        \"status40x\": {\n          \"aplCode\": \"status40x\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Status 40x\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        },\n        \"status42x\": {\n          \"aplCode\": \"status42x\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Status 42x\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        },\n        \"totalSize\": {\n          \"aplCode\": \"totalSize\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Total size\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        },\n        \"totalTime\": {\n          \"aplCode\": \"totalTime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Total time\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"avgTime\": \"0.0083\",\n          \"contentType\": \"HTML\",\n          \"count\": \"55\",\n          \"status20x\": \"51\",\n          \"status40x\": \"1\",\n          \"status42x\": \"3\",\n          \"totalSize\": \"2992356\",\n          \"totalTime\": \"0.4539\"\n        },\n        {\n          \"avgTime\": \"0.0866\",\n          \"contentType\": \"Image\",\n          \"count\": \"9\",\n          \"status20x\": \"9\",\n          \"status40x\": \"0\",\n          \"status42x\": \"0\",\n          \"totalSize\": \"18431137\",\n          \"totalTime\": \"0.7797\"\n        },\n        {\n          \"avgTime\": \"0.0119\",\n          \"contentType\": \"JS\",\n          \"count\": \"5\",\n          \"status20x\": \"5\",\n          \"status40x\": \"0\",\n          \"status42x\": \"0\",\n          \"totalSize\": \"9611\",\n          \"totalTime\": \"0.0596\"\n        },\n        {\n          \"avgTime\": \"0.0039\",\n          \"contentType\": \"CSS\",\n          \"count\": \"3\",\n          \"status20x\": \"3\",\n          \"status40x\": \"0\",\n          \"status42x\": \"0\",\n          \"totalSize\": \"80950\",\n          \"totalTime\": \"0.0118\"\n        },\n        {\n          \"avgTime\": \"0.0031\",\n          \"contentType\": \"Document\",\n          \"count\": \"1\",\n          \"status20x\": \"1\",\n          \"status40x\": \"0\",\n          \"status42x\": \"0\",\n          \"totalSize\": \"152\",\n          \"totalTime\": \"0.0031\"\n        }\n      ],\n      \"title\": \"Content types\"\n    },\n    \"content-types-raw\": {\n      \"aplCode\": \"content-types-raw\",\n      \"columns\": {\n        \"avgTime\": {\n          \"aplCode\": \"avgTime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Avg time\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 8\n        },\n        \"contentType\": {\n          \"aplCode\": \"contentType\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Content type\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 26\n        },\n        \"count\": {\n          \"aplCode\": \"count\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"URLs\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 5\n        },\n        \"status20x\": {\n          \"aplCode\": \"status20x\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Status 20x\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        },\n        \"status40x\": {\n          \"aplCode\": \"status40x\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Status 40x\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        },\n        \"status42x\": {\n          \"aplCode\": \"status42x\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Status 42x\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        },\n        \"totalSize\": {\n          \"aplCode\": \"totalSize\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Total size\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        },\n        \"totalTime\": {\n          \"aplCode\": \"totalTime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Total time\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"avgTime\": \"0.0083\",\n          \"contentType\": \"text/html\",\n          \"count\": \"55\",\n          \"status20x\": \"51\",\n          \"status40x\": \"1\",\n          \"status42x\": \"3\",\n          \"totalSize\": \"2992356\",\n          \"totalTime\": \"0.4539\"\n        },\n        {\n          \"avgTime\": \"0.0119\",\n          \"contentType\": \"application/javascript\",\n          \"count\": \"5\",\n          \"status20x\": \"5\",\n          \"status40x\": \"0\",\n          \"status42x\": \"0\",\n          \"totalSize\": \"9611\",\n          \"totalTime\": \"0.0596\"\n        },\n        {\n          \"avgTime\": \"0.0039\",\n          \"contentType\": \"text/css\",\n          \"count\": \"3\",\n          \"status20x\": \"3\",\n          \"status40x\": \"0\",\n          \"status42x\": \"0\",\n          \"totalSize\": \"80950\",\n          \"totalTime\": \"0.0118\"\n        },\n        {\n          \"avgTime\": \"0.2140\",\n          \"contentType\": \"image/gif\",\n          \"count\": \"3\",\n          \"status20x\": \"3\",\n          \"status40x\": \"0\",\n          \"status42x\": \"0\",\n          \"totalSize\": \"16441548\",\n          \"totalTime\": \"0.6421\"\n        },\n        {\n          \"avgTime\": \"0.0363\",\n          \"contentType\": \"image/avif\",\n          \"count\": \"2\",\n          \"status20x\": \"2\",\n          \"status40x\": \"0\",\n          \"status42x\": \"0\",\n          \"totalSize\": \"1939289\",\n          \"totalTime\": \"0.0726\"\n        },\n        {\n          \"avgTime\": \"0.0032\",\n          \"contentType\": \"image/webp\",\n          \"count\": \"2\",\n          \"status20x\": \"2\",\n          \"status40x\": \"0\",\n          \"status42x\": \"0\",\n          \"totalSize\": \"48954\",\n          \"totalTime\": \"0.0064\"\n        },\n        {\n          \"avgTime\": \"0.0293\",\n          \"contentType\": \"image/svg+xml\",\n          \"count\": \"2\",\n          \"status20x\": \"2\",\n          \"status40x\": \"0\",\n          \"status42x\": \"0\",\n          \"totalSize\": \"1346\",\n          \"totalTime\": \"0.0585\"\n        },\n        {\n          \"avgTime\": \"0.0031\",\n          \"contentType\": \"text/plain\",\n          \"count\": \"1\",\n          \"status20x\": \"1\",\n          \"status40x\": \"0\",\n          \"status42x\": \"0\",\n          \"totalSize\": \"152\",\n          \"totalTime\": \"0.0031\"\n        }\n      ],\n      \"title\": \"Content types (MIME types)\"\n    },\n    \"dns\": {\n      \"aplCode\": \"dns\",\n      \"columns\": {\n        \"info\": {\n          \"aplCode\": \"info\",\n          \"escapeOutputHtml\": false,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"DNS resolving tree\",\n          \"nonBreakingSpaces\": true,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 70\n        }\n      },\n      \"position\": \"after-url-table\",\n      \"rows\": [\n        {\n          \"info\": \"crawler.siteone.io\"\n        },\n        {\n          \"info\": \"  IPv4: 86.49.167.242\"\n        },\n        {\n          \"info\": \"\"\n        },\n        {\n          \"info\": \"DNS server: 10.255.255.254\"\n        }\n      ],\n      \"title\": \"DNS info\"\n    },\n    \"external-urls\": {\n      \"aplCode\": \"external-urls\",\n      \"columns\": {\n        \"count\": {\n          \"aplCode\": \"count\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Pages\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 5\n        },\n        \"foundOn\": {\n          \"aplCode\": \"foundOn\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Found on URL (max 5)\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"url\": {\n          \"aplCode\": \"url\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"External URL\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://adamwathan.me/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/support-us/\",\n          \"url\": \"https://alternativeto.net/software/siteone-crawler--deep-website-analyzer/about/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://chat.openai.com/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://cz.linkedin.com/in/janbezdek\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://daisyui.com/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://discord.gg/Uh66HaZJ\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/contact-and-community/\",\n          \"url\": \"https://discord.gg/fdm7KE8Z\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://en.wikipedia.org/wiki/Larry_Page\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://en.wikipedia.org/wiki/Sergey_Brin\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://en.wikipedia.org/wiki/Steve_Jobs\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://en.wikipedia.org/wiki/Tilman_Hausherr\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/features/ease-of-use/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/issues/new\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/installation-and-requirements/desktop-application/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-arm64-1.0.8.AppImage\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/installation-and-requirements/desktop-application/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-arm64-1.0.8.deb\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/installation-and-requirements/desktop-application/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-arm64-1.0.8.snap\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-x64-1.0.8.AppImage\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-x64-1.0.8.deb\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-x64-1.0.8.snap\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-mac-arm64-1.0.8.dmg\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-mac-x64-1.0.8.dmg\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-win-x64-1.0.8-portable.exe\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-win-x64-1.0.8-setup.exe\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-win-x64-1.0.8.msi\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-markdown-examples/blob/main/react.dev/index.md\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/faq/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/contact-and-community/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler/discussions\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler/issues\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/features/ease-of-use/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler/issues/new\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/installation-and-requirements/ready-to-use-packages/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler/releases\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/installation-and-requirements/ready-to-use-packages/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler/releases/download/v1.0.8/siteone-crawler-v1.0.8-win-x64.zip\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://github.com/matyhtf\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/ideas-and-roadmap/\",\n          \"url\": \"https://github.com/swoole/swoole-src\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/installation-and-requirements/manual-installation/\",\n          \"url\": \"https://github.com/swoole/swoole-src/releases\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/installation-and-requirements/manual-installation/\",\n          \"url\": \"https://github.com/swoole/swoole-src/releases/download/v4.8.13/swoole-cli-v4.8.13-cygwin-x64.zip\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/installation-and-requirements/manual-installation/\",\n          \"url\": \"https://github.com/swoole/swoole-src/releases/download/v4.8.13/swoole-cli-v4.8.13-linux-arm64.tar.xz\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/installation-and-requirements/manual-installation/\",\n          \"url\": \"https://github.com/swoole/swoole-src/releases/download/v4.8.13/swoole-cli-v4.8.13-macos-arm64.tar.xz\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/installation-and-requirements/manual-installation/\",\n          \"url\": \"https://github.com/swoole/swoole-src/releases/download/v4.8.13/swoole-cli-v4.8.13-macos-x64.tar.xz\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://home.snafu.de/tilman/xenulink.html\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/installation-and-requirements/manual-installation/\",\n          \"url\": \"https://learn.microsoft.com/en-us/windows/wsl/about\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/installation-and-requirements/ready-to-use-packages/\",\n          \"url\": \"https://learn.microsoft.com/en-us/windows/wsl/install\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://nette.org/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/advanced-topics/contribution-and-development/\",\n          \"url\": \"https://opensource.guide/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/ideas-and-roadmap/\",\n          \"url\": \"https://openswoole.com/docs/modules/swoole-table\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/advanced-topics/contribution-and-development/\",\n          \"url\": \"https://phpbestpractices.org/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://phpstan.org/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/advanced-topics/contribution-and-development/\",\n          \"url\": \"https://phptherightway.com/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://platform-api.sharethis.com/js/sharethis.js\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/ideas-and-roadmap/\",\n          \"url\": \"https://reactphp.org/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://starlight.astro.build/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://svelte.dev/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://tailwindcss.com/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://twitter.com/BillGates\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://twitter.com/DavidGrudl\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://twitter.com/OndrejMirtes\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://twitter.com/elonmusk\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://twitter.com/machal\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://twitter.com/rich_harris\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://twitter.com/ryancarniato\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://twitter.com/saadeghi?lang=cs\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://twitter.com/sama\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://twitter.com/siteone_crawler\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://twitter.com/spazef0rze\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://twitter.com/swithinbank\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://twitter.com/zdendac\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/installation-and-requirements/manual-installation/\",\n          \"url\": \"https://ubuntu.com/wsl\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://www.amd.com/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://www.cdn77.com/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/ideas-and-roadmap/\",\n          \"url\": \"https://www.cygwin.com/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://www.electronjs.org/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://www.jetbrains.com/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://www.lenovo.com/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://www.linkedin.com/in/linustorvalds\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/installation-and-requirements/manual-installation/\",\n          \"url\": \"https://www.linuxfordevices.com/tutorials/linux/install-debian-on-windows-wsl\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://www.michalspacek.cz/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/advanced-topics/contribution-and-development/\",\n          \"url\": \"https://www.php-fig.org/psr/psr-12/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/configuration/command-line-options/\",\n          \"url\": \"https://www.php.net/manual/en/timezones.php\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/contact-and-community/\",\n          \"url\": \"https://www.reddit.com/r/siteone_crawler/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/ideas-and-roadmap/\",\n          \"url\": \"https://www.rust-lang.org/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://www.siteone.io/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://www.solidjs.com/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://www.spse-po.sk/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://www.swoole.com/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/thanks/\",\n          \"url\": \"https://www.vzhurudolu.cz/\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/configuration/command-line-options/\",\n          \"url\": \"https://www.w3schools.com/xml/xpath_syntax.asp\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/\",\n          \"url\": \"https://www.youtube.com/@SiteOne-Crawler\"\n        },\n        {\n          \"count\": \"1\",\n          \"foundOn\": \"https://crawler.siteone.io/introduction/faq/\",\n          \"url\": \"https://x.com/janreges\"\n        }\n      ],\n      \"title\": \"External URLs\"\n    },\n    \"fastest-urls\": {\n      \"aplCode\": \"fastest-urls\",\n      \"columns\": {\n        \"requestTime\": {\n          \"aplCode\": \"requestTime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Time\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 6\n        },\n        \"statusCode\": {\n          \"aplCode\": \"statusCode\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Status\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 6\n        },\n        \"url\": {\n          \"aplCode\": \"url\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"Fast URL\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 118\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"requestTime\": \"0.0041\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/introduction/support-us/\"\n        },\n        {\n          \"requestTime\": \"0.0042\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/features/heading-analysis/\"\n        },\n        {\n          \"requestTime\": \"0.0044\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/features/accessibility-analysis/\"\n        },\n        {\n          \"requestTime\": \"0.0044\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/features/availability/\"\n        },\n        {\n          \"requestTime\": \"0.0044\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/introduction/overview/\"\n        },\n        {\n          \"requestTime\": \"0.0045\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/features/caching-analysis/\"\n        },\n        {\n          \"requestTime\": \"0.0045\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/features/mailer/\"\n        },\n        {\n          \"requestTime\": \"0.0045\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/features/security-analysis/\"\n        },\n        {\n          \"requestTime\": \"0.0045\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/features/exports-and-reports/\"\n        },\n        {\n          \"requestTime\": \"0.0046\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/introduction/thanks/\"\n        },\n        {\n          \"requestTime\": \"0.0046\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/features/sitemap-generator/\"\n        },\n        {\n          \"requestTime\": \"0.0046\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/installation-and-requirements/system-requirements/\"\n        },\n        {\n          \"requestTime\": \"0.0046\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/features/dns-analysis/\"\n        },\n        {\n          \"requestTime\": \"0.0046\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/features/redirect-and-404-analysis/\"\n        },\n        {\n          \"requestTime\": \"0.0047\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/installation-and-requirements/desktop-application/\"\n        },\n        {\n          \"requestTime\": \"0.0048\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/features/ssl-tls-analysis/\"\n        },\n        {\n          \"requestTime\": \"0.0048\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/features/audit-report/\"\n        },\n        {\n          \"requestTime\": \"0.0048\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/features/dev-devops-assistant/\"\n        },\n        {\n          \"requestTime\": \"0.0048\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/features/website-to-markdown-converter/\"\n        },\n        {\n          \"requestTime\": \"0.0048\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/introduction/key-features/\"\n        }\n      ],\n      \"title\": \"TOP fastest URLs\"\n    },\n    \"headers\": {\n      \"aplCode\": \"headers\",\n      \"columns\": {\n        \"header\": {\n          \"aplCode\": \"header\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Header\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"maxValue\": {\n          \"aplCode\": \"maxValue\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Max value\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        },\n        \"minValue\": {\n          \"aplCode\": \"minValue\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Min value\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 10\n        },\n        \"occurrences\": {\n          \"aplCode\": \"occurrences\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Occurs\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 6\n        },\n        \"uniqueValues\": {\n          \"aplCode\": \"uniqueValues\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Unique\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 6\n        },\n        \"valuesPreview\": {\n          \"aplCode\": \"valuesPreview\",\n          \"escapeOutputHtml\": false,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"Values preview\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 48\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"header\": \"Accept-Ranges\",\n          \"maxValue\": \"\",\n          \"minValue\": \"\",\n          \"occurrences\": \"11\",\n          \"uniqueValues\": \"1\",\n          \"valuesPreview\": \"bytes\"\n        },\n        {\n          \"header\": \"Cache-Control\",\n          \"maxValue\": \"\",\n          \"minValue\": \"\",\n          \"occurrences\": \"69\",\n          \"uniqueValues\": \"2\",\n          \"valuesPreview\": \"max-age=3600 (52) / max-age=31536000 (17)\"\n        },\n        {\n          \"header\": \"Content-Length\",\n          \"maxValue\": \"8 MB\",\n          \"minValue\": \"152 B\",\n          \"occurrences\": \"15\",\n          \"uniqueValues\": \"-\",\n          \"valuesPreview\": \"[ignored generic values]\"\n        },\n        {\n          \"header\": \"Content-Security-Policy\",\n          \"maxValue\": \"\",\n          \"minValue\": \"\",\n          \"occurrences\": \"52\",\n          \"uniqueValues\": \"1\",\n          \"valuesPreview\": \"default-src 'self' 'unsafe-inline' 'unsafe-eval' data: https://www.youtube.com h\\u001b[0;31m…\\u001b[0ms://*.sharethis.com https://*.ytimg.com\"\n        },\n        {\n          \"header\": \"Content-Type\",\n          \"maxValue\": \"\",\n          \"minValue\": \"\",\n          \"occurrences\": \"73\",\n          \"uniqueValues\": \"8\",\n          \"valuesPreview\": \"text/html (55) / application/javascript (5) / image/gif (3) / text/css (3) / ima\\u001b[0;31m…\\u001b[0ml (2) / image/avif (2) / text/plain (1)\"\n        },\n        {\n          \"header\": \"Date\",\n          \"maxValue\": \"2026-03-16\",\n          \"minValue\": \"2026-03-16\",\n          \"occurrences\": \"73\",\n          \"uniqueValues\": \"-\",\n          \"valuesPreview\": \"[ignored generic values]\"\n        },\n        {\n          \"header\": \"Etag\",\n          \"maxValue\": \"\",\n          \"minValue\": \"\",\n          \"occurrences\": \"73\",\n          \"uniqueValues\": \"-\",\n          \"valuesPreview\": \"[ignored generic values]\"\n        },\n        {\n          \"header\": \"Expires\",\n          \"maxValue\": \"2027-03-16\",\n          \"minValue\": \"2026-03-16\",\n          \"occurrences\": \"69\",\n          \"uniqueValues\": \"-\",\n          \"valuesPreview\": \"[ignored generic values]\"\n        },\n        {\n          \"header\": \"Feature-Policy\",\n          \"maxValue\": \"\",\n          \"minValue\": \"\",\n          \"occurrences\": \"73\",\n          \"uniqueValues\": \"1\",\n          \"valuesPreview\": \"accelerometer 'none'; camera 'none'; geolocation 'self'; gyroscope 'none'; magne\\u001b[0;31m…\\u001b[0mmidi 'none'; payment 'none'; usb 'none'\"\n        },\n        {\n          \"header\": \"Last-Modified\",\n          \"maxValue\": \"2025-06-08\",\n          \"minValue\": \"2025-05-06\",\n          \"occurrences\": \"69\",\n          \"uniqueValues\": \"-\",\n          \"valuesPreview\": \"[ignored generic values]\"\n        },\n        {\n          \"header\": \"Permissions-Policy\",\n          \"maxValue\": \"\",\n          \"minValue\": \"\",\n          \"occurrences\": \"73\",\n          \"uniqueValues\": \"1\",\n          \"valuesPreview\": \"accelerometer=(), camera=(), geolocation=(self), gyroscope=(), magnetometer=(),\\u001b[0;31m…\\u001b[0mrophone=(), midi=(), payment=(), usb=()\"\n        },\n        {\n          \"header\": \"Referrer-Policy\",\n          \"maxValue\": \"\",\n          \"minValue\": \"\",\n          \"occurrences\": \"73\",\n          \"uniqueValues\": \"1\",\n          \"valuesPreview\": \"no-referrer-when-downgrade\"\n        },\n        {\n          \"header\": \"Server\",\n          \"maxValue\": \"\",\n          \"minValue\": \"\",\n          \"occurrences\": \"73\",\n          \"uniqueValues\": \"1\",\n          \"valuesPreview\": \"-\"\n        },\n        {\n          \"header\": \"Strict-Transport-Security\",\n          \"maxValue\": \"\",\n          \"minValue\": \"\",\n          \"occurrences\": \"73\",\n          \"uniqueValues\": \"1\",\n          \"valuesPreview\": \"max-age=15552000\"\n        },\n        {\n          \"header\": \"Vary\",\n          \"maxValue\": \"\",\n          \"minValue\": \"\",\n          \"occurrences\": \"58\",\n          \"uniqueValues\": \"1\",\n          \"valuesPreview\": \"Accept-Encoding\"\n        },\n        {\n          \"header\": \"X-Content-Type-Options\",\n          \"maxValue\": \"\",\n          \"minValue\": \"\",\n          \"occurrences\": \"73\",\n          \"uniqueValues\": \"2\",\n          \"valuesPreview\": \"nosniff (56) / nosniff, nosniff (17)\"\n        },\n        {\n          \"header\": \"X-Frame-Options\",\n          \"maxValue\": \"\",\n          \"minValue\": \"\",\n          \"occurrences\": \"73\",\n          \"uniqueValues\": \"1\",\n          \"valuesPreview\": \"SAMEORIGIN\"\n        },\n        {\n          \"header\": \"X-XSS-Protection\",\n          \"maxValue\": \"\",\n          \"minValue\": \"\",\n          \"occurrences\": \"73\",\n          \"uniqueValues\": \"1\",\n          \"valuesPreview\": \"1; mode=block\"\n        }\n      ],\n      \"title\": \"HTTP headers\"\n    },\n    \"headers-values\": {\n      \"aplCode\": \"headers-values\",\n      \"columns\": {\n        \"header\": {\n          \"aplCode\": \"header\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Header\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"occurrences\": {\n          \"aplCode\": \"occurrences\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Occurs\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 6\n        },\n        \"value\": {\n          \"aplCode\": \"value\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"Value\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 82\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"header\": \"Accept-Ranges\",\n          \"occurrences\": \"11\",\n          \"value\": \"bytes\"\n        },\n        {\n          \"header\": \"Cache-Control\",\n          \"occurrences\": \"52\",\n          \"value\": \"max-age=3600\"\n        },\n        {\n          \"header\": \"Cache-Control\",\n          \"occurrences\": \"17\",\n          \"value\": \"max-age=31536000\"\n        },\n        {\n          \"header\": \"Content-Security-Policy\",\n          \"occurrences\": \"52\",\n          \"value\": \"default-src 'self' 'unsafe-inline' 'unsafe-eval' data: https://www.youtube.com https://*.ggpht.com https://*.gstatic.com https://*.google.com https://*.googleapis.com https://static.doubleclick.net https://*.sharethis.com https://*.ytimg.com\"\n        },\n        {\n          \"header\": \"Content-Type\",\n          \"occurrences\": \"55\",\n          \"value\": \"text/html\"\n        },\n        {\n          \"header\": \"Content-Type\",\n          \"occurrences\": \"5\",\n          \"value\": \"application/javascript\"\n        },\n        {\n          \"header\": \"Content-Type\",\n          \"occurrences\": \"3\",\n          \"value\": \"image/gif\"\n        },\n        {\n          \"header\": \"Content-Type\",\n          \"occurrences\": \"3\",\n          \"value\": \"text/css\"\n        },\n        {\n          \"header\": \"Content-Type\",\n          \"occurrences\": \"2\",\n          \"value\": \"image/webp\"\n        },\n        {\n          \"header\": \"Content-Type\",\n          \"occurrences\": \"2\",\n          \"value\": \"image/svg+xml\"\n        },\n        {\n          \"header\": \"Content-Type\",\n          \"occurrences\": \"2\",\n          \"value\": \"image/avif\"\n        },\n        {\n          \"header\": \"Content-Type\",\n          \"occurrences\": \"1\",\n          \"value\": \"text/plain\"\n        },\n        {\n          \"header\": \"Feature-Policy\",\n          \"occurrences\": \"73\",\n          \"value\": \"accelerometer 'none'; camera 'none'; geolocation 'self'; gyroscope 'none'; magnetometer 'none'; microphone 'none'; midi 'none'; payment 'none'; usb 'none'\"\n        },\n        {\n          \"header\": \"Permissions-Policy\",\n          \"occurrences\": \"73\",\n          \"value\": \"accelerometer=(), camera=(), geolocation=(self), gyroscope=(), magnetometer=(), microphone=(), midi=(), payment=(), usb=()\"\n        },\n        {\n          \"header\": \"Referrer-Policy\",\n          \"occurrences\": \"73\",\n          \"value\": \"no-referrer-when-downgrade\"\n        },\n        {\n          \"header\": \"Server\",\n          \"occurrences\": \"73\",\n          \"value\": \"-\"\n        },\n        {\n          \"header\": \"Strict-Transport-Security\",\n          \"occurrences\": \"73\",\n          \"value\": \"max-age=15552000\"\n        },\n        {\n          \"header\": \"Vary\",\n          \"occurrences\": \"58\",\n          \"value\": \"Accept-Encoding\"\n        },\n        {\n          \"header\": \"X-Content-Type-Options\",\n          \"occurrences\": \"56\",\n          \"value\": \"nosniff\"\n        },\n        {\n          \"header\": \"X-Content-Type-Options\",\n          \"occurrences\": \"17\",\n          \"value\": \"nosniff, nosniff\"\n        },\n        {\n          \"header\": \"X-Frame-Options\",\n          \"occurrences\": \"73\",\n          \"value\": \"SAMEORIGIN\"\n        },\n        {\n          \"header\": \"X-XSS-Protection\",\n          \"occurrences\": \"73\",\n          \"value\": \"1; mode=block\"\n        }\n      ],\n      \"title\": \"HTTP header values\"\n    },\n    \"non-unique-descriptions\": {\n      \"aplCode\": \"non-unique-descriptions\",\n      \"columns\": {\n        \"count\": {\n          \"aplCode\": \"count\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Count\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 5\n        },\n        \"description\": {\n          \"aplCode\": \"description\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Description\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 128\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"count\": \"2\",\n          \"description\": \"\"\n        }\n      ],\n      \"title\": \"TOP non-unique descriptions\"\n    },\n    \"non-unique-titles\": {\n      \"aplCode\": \"non-unique-titles\",\n      \"columns\": {\n        \"count\": {\n          \"aplCode\": \"count\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Count\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 5\n        },\n        \"title\": {\n          \"aplCode\": \"title\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Title\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 128\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [],\n      \"title\": \"TOP non-unique titles\"\n    },\n    \"open-graph\": {\n      \"aplCode\": \"open-graph\",\n      \"columns\": {\n        \"ogDescription\": {\n          \"aplCode\": \"ogDescription\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"OG Description\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 10\n        },\n        \"ogImage\": {\n          \"aplCode\": \"ogImage\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"OG Image\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 18\n        },\n        \"ogTitle\": {\n          \"aplCode\": \"ogTitle\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"OG Title\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 10\n        },\n        \"twitterDescription\": {\n          \"aplCode\": \"twitterDescription\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Twitter Description\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 10\n        },\n        \"twitterImage\": {\n          \"aplCode\": \"twitterImage\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Twitter Image\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 18\n        },\n        \"twitterTitle\": {\n          \"aplCode\": \"twitterTitle\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Twitter Title\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 10\n        },\n        \"urlPathAndQuery\": {\n          \"aplCode\": \"urlPathAndQuery\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"URL\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 50\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"ogDescription\": \"A very useful and free website analyzer you'll ♥ as a Dev/DevOps, QA engineer, SEO or Security specialist, website owner or consultant. It performs in-depth analyzes of your website, generates an offline or markdown version of the website, provides a detailed HTML audit report and works on all popular platforms - Windows, macOS and Linux (x64 and arm64 too).\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"SiteOne Crawler - free website analyzer and exporter (cloner)\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/\"\n        },\n        {\n          \"ogDescription\": \"SiteOne Crawler implements an efficient HTTP response caching system to reduce network traffic and speed up repeated crawls.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Caching\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/advanced-topics/caching/\"\n        },\n        {\n          \"ogDescription\": \"Guidelines for contributing to SiteOne Crawler, including development setup, coding standards, and submission process for pull requests.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Contribution and Development\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/advanced-topics/contribution-and-development/\"\n        },\n        {\n          \"ogDescription\": \"Understand the core crawling mechanism of SiteOne Crawler, including URL handling, content processing, and navigational decisions.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Crawler Behavior\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/advanced-topics/crawler-behavior/\"\n        },\n        {\n          \"ogDescription\": \"SiteOne Crawler is designed to be extensible, allowing developers to add custom analyzers, content processors, and exporters to enhance its functionality.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Extending\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/advanced-topics/extending/\"\n        },\n        {\n          \"ogDescription\": \"Solutions to common issues encountered when using SiteOne Crawler, including performance problems, memory limitations, and crawling challenges.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Troubleshooting\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/advanced-topics/troubleshooting/\"\n        },\n        {\n          \"ogDescription\": \"This section describes all the available command-line options of the SiteOne Crawler tool.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Command-line options\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/configuration/command-line-options/\"\n        },\n        {\n          \"ogDescription\": \"This section describes examples and typical scenarios of using the SiteOne Crawler tool.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Examples\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/configuration/examples/\"\n        },\n        {\n          \"ogDescription\": \"SiteOne Crawler provides basic accessibility analysis, which can help developers to improve the accessibility of their websites.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Accessibility Analysis\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/accessibility-analysis/\"\n        },\n        {\n          \"ogDescription\": \"Learn about the comprehensive HTML audit report generated by SiteOne Crawler, which provides detailed analysis and insights about your website in an organized, interactive format.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Audit Report\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/audit-report/\"\n        },\n        {\n          \"ogDescription\": \"Maximum availability for a wide range of users and platforms is our priority. For this reason, you can download and install SiteOne Crawler on the following platforms: Windows (x64), macOS (Intel/Apple Silicon), Linux (x64/ARM).\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Availability\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/availability/\"\n        },\n        {\n          \"ogDescription\": \"SiteOne Crawler evaluates websites against a set of best practices to ensure optimal performance, SEO, and user experience.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Best Practices Analysis\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/best-practices-analysis/\"\n        },\n        {\n          \"ogDescription\": \"SiteOne Crawler analyzes HTTP cache headers across your website to identify optimization opportunities for faster loading times and better user experience.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Caching Analysis\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/caching-analysis/\"\n        },\n        {\n          \"ogDescription\": \"The Content Type analysis provides detailed insights into the types of content served by a website, focusing on their distribution, size, load times, and HTTP status codes.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Content Type Analysis\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/content-type-analysis/\"\n        },\n        {\n          \"ogDescription\": \"SiteOne Crawler is designed to crawl every aspect of your website, including all files, styles, scripts, images, and more. Learn how it can help you find and inspect everything that can be crawled on your website.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Deep Website Crawling\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/deep-website-crawling/\"\n        },\n        {\n          \"ogDescription\": \"SiteOne Crawler offers several functionalities that can be useful for DevOps, such as testing public and local projects, password-protected websites, and stress tests.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Dev/DevOps assistant\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/dev-devops-assistant/\"\n        },\n        {\n          \"ogDescription\": \"The DNS Analysis feature examines domain name resolution, providing insights into DNS configuration, IPv4/IPv6 support, and potential DNS-related issues.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"DNS Analysis\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/dns-analysis/\"\n        },\n        {\n          \"ogDescription\": \"SiteOne Crawler is designed for maximum availability, ease of use, and functionality for the command-line and desktop application.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Ease of Use\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/ease-of-use/\"\n        },\n        {\n          \"ogDescription\": \"SiteOne Crawler offers export and reporting features, enabling users to generate comprehensive reports in various formats and for different use cases.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Exports and Reports\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/exports-and-reports/\"\n        },\n        {\n          \"ogDescription\": \"Detailed analysis of HTTP response headers across your website, helping you identify security issues, performance opportunities, and best practices in header configuration.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"HTTP Headers Analysis\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/headers-analysis/\"\n        },\n        {\n          \"ogDescription\": \"The Heading Analysis evaluates the structure and organization of headings (H1, H2, H3, etc.) on a webpage to ensure they follow best practices for both accessibility and SEO.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Heading Analysis\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/heading-analysis/\"\n        },\n        {\n          \"ogDescription\": \"Track and visualize improvements in your website metrics over time, providing a clear measure of progress in areas like performance, SEO, accessibility, and security.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Improvement Meter\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/improvement-meter/\"\n        },\n        {\n          \"ogDescription\": \"The Mailer feature allows automated email delivery of HTML audit reports after a crawl is completed. This is particularly useful for sharing results with team members, proactively, e.g. as part of the CI/CD process.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Mailer\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/mailer/\"\n        },\n        {\n          \"ogDescription\": \"Export the entire website to offline form (clone, mirror), where it is possible to browse the site through local HTML files (without webserver) including all document, images, styles, scripts, fonts, etc.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Offline Website Generator (clone, mirror)\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/offline-website-generator/\"\n        },\n        {\n          \"ogDescription\": \"Upload an HTML report to our or your infrastructure and get a secure sharable unique URL for the report.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Audit Report Sharing (upload)\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/online-html-report-upload/\"\n        },\n        {\n          \"ogDescription\": \"The Performance Analysis offers detailed insights into the loading times of individual URLs. This is crucial for developers and site administrators to identify performance bottlenecks and optimize web pages.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Performance Analysis\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/performance-analysis/\"\n        },\n        {\n          \"ogDescription\": \"Identify the fastest and slowest pages on your website with detailed performance analysis, helping you optimize critical pages and fix bottlenecks.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Performance Metrics\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/performance-metrics/\"\n        },\n        {\n          \"ogDescription\": \"The redirects and 404 analysis reports on problematic URLs, focusing on 404 errors and redirects (301/302). This helps identify broken links and unnecessary redirects, which can negatively impact user experience and SEO.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Redirect and 404 Analysis\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/redirect-and-404-analysis/\"\n        },\n        {\n          \"ogDescription\": \"Evaluate the security of your website by analyzing HTTP headers, TLS/SSL configurations and cookie security.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Security Analysis\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/security-analysis/\"\n        },\n        {\n          \"ogDescription\": \"Analyze your website for SEO best practices and social media optimization with detailed insights into metadata, heading structure, duplicate content, and OpenGraph tag implementation.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"SEO and OpenGraph Analysis\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/seo-and-opengraph-analysis/\"\n        },\n        {\n          \"ogDescription\": \"Automatically generate comprehensive XML and TXT sitemaps of your website to improve search engine visibility and indexing efficiency.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Sitemap Generator\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/sitemap-generator/\"\n        },\n        {\n          \"ogDescription\": \"Analyze how resources are distributed across different domains in your website, helping you identify opportunities for optimization and potential issues with third-party dependencies.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Source Domains Analysis\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/source-domains-analysis/\"\n        },\n        {\n          \"ogDescription\": \"Comprehensive analysis of your website SSL/TLS implementation, examining certificate configuration, protocol support, and security best practices.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"SSL/TLS Analysis\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/ssl-tls-analysis/\"\n        },\n        {\n          \"ogDescription\": \"The \\\"Stress Testing\\\" feature in SiteOne Crawler allows you to evaluate how a website performs under various load conditions by customizing the number of workers and maximum requests per second.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Stress Testing\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/stress-testing/\"\n        },\n        {\n          \"ogDescription\": \"SiteOne Crawler provides in-depth technical analysis of your website, examining HTTP headers, server configurations, and technical implementations to identify issues and optimization opportunities.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Technical Analysis\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/technical-analysis/\"\n        },\n        {\n          \"ogDescription\": \"Export or convert an entire website with all subpages to browsable markdown.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Website to Markdown Converter\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/features/website-to-markdown-converter/\"\n        },\n        {\n          \"ogDescription\": \"This section describes the advanced usage of the SiteOne Crawler tool.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Advanced usage\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/getting-started/advanced-usage/\"\n        },\n        {\n          \"ogDescription\": \"Basic usage of the Crawler - how to crawl a website, generate offline version or send HTML report to your e-mail.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Basic Usage\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/getting-started/basic-usage/\"\n        },\n        {\n          \"ogDescription\": \"This guide will help you get started with the SiteOne Crawler tool in a few minutes.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Quick Start Guide\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/getting-started/quick-start-guide/\"\n        },\n        {\n          \"ogDescription\": \"Install and use the user-friendly desktop application of SiteOne Crawler, available for Windows, macOS, and Linux with a full graphical interface for easier website analysis and reporting.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Desktop Application (GUI)\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/installation-and-requirements/desktop-application/\"\n        },\n        {\n          \"ogDescription\": \"Learn how to manually install SiteOne Crawler from source code on various operating systems, including detailed steps for meeting dependencies and compilation requirements.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"CLI: Manual Installation\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/installation-and-requirements/manual-installation/\"\n        },\n        {\n          \"ogDescription\": \"Download and install SiteOne Crawler quickly using pre-built packages for Windows, macOS, and Linux, eliminating the need for complex setup procedures or dependencies.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"CLI: Ready-to-use Packages\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/installation-and-requirements/ready-to-use-packages/\"\n        },\n        {\n          \"ogDescription\": \"The crawler can handle even one core of any common Intel/AMD CPU of the last 10 years and hundreds of MB of RAM. ARM CPUs are also supported.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"System Requirements\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/installation-and-requirements/system-requirements/\"\n        },\n        {\n          \"ogDescription\": \"\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Contact and Community\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/introduction/contact-and-community/\"\n        },\n        {\n          \"ogDescription\": \"Answers to frequently asked questions about SiteOne Crawler, including details about its purpose, target users, unique features, and practical usage scenarios.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"FAQ\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/introduction/faq/\"\n        },\n        {\n          \"ogDescription\": \"Explore the future development plans for SiteOne Crawler, including upcoming features, community-driven priorities, and the vision for enhancing website analysis, export capabilities, and quality assessment tools.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Ideas and Roadmap\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/introduction/ideas-and-roadmap/\"\n        },\n        {\n          \"ogDescription\": \"SiteOne Crawler is a free tool for website analysis and optimization. It provides a wide range of features, including deep website crawling, SEO analysis, accessibility analysis, and more. Available for all platforms.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Key Features\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/introduction/key-features/\"\n        },\n        {\n          \"ogDescription\": \"Learn about the personal journey and professional background of SiteOne Crawler creator Ján Regeš, and understand the inspiration and experiences that led to the development of this tool.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Motivation\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/introduction/motivation/\"\n        },\n        {\n          \"ogDescription\": \"The main purpose of SiteOne Crawler is to help owners, consultants, developers, QA engineers and DevOps find weaknesses and help improve the quality of their websites in various areas - SEO, security, performance, accessibility, socnets-sharing, best practices or content issues.\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Overview\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/introduction/overview/\"\n        },\n        {\n          \"ogDescription\": \"\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Support Us\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/introduction/support-us/\"\n        },\n        {\n          \"ogDescription\": \"This tool wouldn't have been created if it weren't for great people in my near and far surroundings. I would like to thank them all...\",\n          \"ogImage\": \"https://crawler.siteone.io/siteone-crawler-collage.png\",\n          \"ogTitle\": \"Thanks\",\n          \"twitterDescription\": \"\",\n          \"twitterImage\": \"\",\n          \"twitterTitle\": \"\",\n          \"urlPathAndQuery\": \"/introduction/thanks/\"\n        }\n      ],\n      \"title\": \"OpenGraph metadata\"\n    },\n    \"redirects\": {\n      \"aplCode\": \"redirects\",\n      \"columns\": {\n        \"sourceUqId\": {\n          \"aplCode\": \"sourceUqId\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"Found at URL\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 39\n        },\n        \"statusCode\": {\n          \"aplCode\": \"statusCode\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Status\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 6\n        },\n        \"targetUrl\": {\n          \"aplCode\": \"targetUrl\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"Target URL\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 39\n        },\n        \"url\": {\n          \"aplCode\": \"url\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"Redirected URL\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 39\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [],\n      \"title\": \"Redirected URLs\"\n    },\n    \"security\": {\n      \"aplCode\": \"security\",\n      \"columns\": {\n        \"critical\": {\n          \"aplCode\": \"critical\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Critical\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 8\n        },\n        \"header\": {\n          \"aplCode\": \"header\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Header\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 26\n        },\n        \"notice\": {\n          \"aplCode\": \"notice\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Notice\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 6\n        },\n        \"ok\": {\n          \"aplCode\": \"ok\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"OK\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 5\n        },\n        \"recommendation\": {\n          \"aplCode\": \"recommendation\",\n          \"escapeOutputHtml\": false,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"Recommendation\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 68\n        },\n        \"warning\": {\n          \"aplCode\": \"warning\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Warning\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 7\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"critical\": \"3\",\n          \"header\": \"Content-Security-Policy\",\n          \"highestSeverity\": \"4\",\n          \"notice\": \"0\",\n          \"ok\": \"51\",\n          \"recommendation\": \"Content-Security-Policy header is not set. It restricts resources the page can load and prevents XSS attacks.\",\n          \"warning\": \"0\"\n        },\n        {\n          \"critical\": \"0\",\n          \"header\": \"X-Frame-Options\",\n          \"highestSeverity\": \"2\",\n          \"notice\": \"54\",\n          \"ok\": \"0\",\n          \"recommendation\": \"X-Frame-Options header is set to SAMEORIGIN which allows this origin to embed the resource in a frame.\",\n          \"warning\": \"0\"\n        },\n        {\n          \"critical\": \"0\",\n          \"header\": \"X-XSS-Protection\",\n          \"highestSeverity\": \"2\",\n          \"notice\": \"54\",\n          \"ok\": \"0\",\n          \"recommendation\": \"X-XSS-Protection header is set but deprecated. Consider removing it and using Content-Security-Policy instead.\",\n          \"warning\": \"0\"\n        },\n        {\n          \"critical\": \"0\",\n          \"header\": \"Strict-Transport-Security\",\n          \"highestSeverity\": \"1\",\n          \"notice\": \"0\",\n          \"ok\": \"54\",\n          \"recommendation\": \"\",\n          \"warning\": \"0\"\n        },\n        {\n          \"critical\": \"0\",\n          \"header\": \"X-Content-Type-Options\",\n          \"highestSeverity\": \"1\",\n          \"notice\": \"0\",\n          \"ok\": \"54\",\n          \"recommendation\": \"\",\n          \"warning\": \"0\"\n        },\n        {\n          \"critical\": \"0\",\n          \"header\": \"Referrer-Policy\",\n          \"highestSeverity\": \"1\",\n          \"notice\": \"0\",\n          \"ok\": \"54\",\n          \"recommendation\": \"\",\n          \"warning\": \"0\"\n        },\n        {\n          \"critical\": \"0\",\n          \"header\": \"Feature-Policy\",\n          \"highestSeverity\": \"1\",\n          \"notice\": \"0\",\n          \"ok\": \"54\",\n          \"recommendation\": \"\",\n          \"warning\": \"0\"\n        },\n        {\n          \"critical\": \"0\",\n          \"header\": \"Permissions-Policy\",\n          \"highestSeverity\": \"1\",\n          \"notice\": \"0\",\n          \"ok\": \"54\",\n          \"recommendation\": \"\",\n          \"warning\": \"0\"\n        },\n        {\n          \"critical\": \"0\",\n          \"header\": \"Server\",\n          \"highestSeverity\": \"1\",\n          \"notice\": \"0\",\n          \"ok\": \"54\",\n          \"recommendation\": \"Server header is not set or empty. This is recommended.\",\n          \"warning\": \"0\"\n        }\n      ],\n      \"title\": \"Security\"\n    },\n    \"seo\": {\n      \"aplCode\": \"seo\",\n      \"columns\": {\n        \"description\": {\n          \"aplCode\": \"description\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Description\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 12\n        },\n        \"h1\": {\n          \"aplCode\": \"h1\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"H1\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 12\n        },\n        \"indexing\": {\n          \"aplCode\": \"indexing\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Indexing\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": {},\n          \"truncateIfLonger\": false,\n          \"width\": 20\n        },\n        \"keywords\": {\n          \"aplCode\": \"keywords\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Keywords\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 12\n        },\n        \"title\": {\n          \"aplCode\": \"title\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Title\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 12\n        },\n        \"urlPathAndQuery\": {\n          \"aplCode\": \"urlPathAndQuery\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"URL\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 50\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"A very useful and free website analyzer you'll ♥ as a Dev/DevOps, QA engineer, SEO or Security specialist, website owner or consultant. It performs in-depth analyzes of your website, generates an offline or markdown version of the website, provides a detailed HTML audit report and works on all popular platforms - Windows, macOS and Linux (x64 and arm64 too).\",\n          \"h1\": \"SiteOne Crawler\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"SiteOne Crawler - free website analyzer, offline exporter, sitemap generator and Swiss Army Knife, you will love\",\n          \"urlPathAndQuery\": \"/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"SiteOne Crawler implements an efficient HTTP response caching system to reduce network traffic and speed up repeated crawls.\",\n          \"h1\": \"Caching\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Caching • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/advanced-topics/caching/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Guidelines for contributing to SiteOne Crawler, including development setup, coding standards, and submission process for pull requests.\",\n          \"h1\": \"Contribution and Development\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Contribution and Development • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/advanced-topics/contribution-and-development/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Understand the core crawling mechanism of SiteOne Crawler, including URL handling, content processing, and navigational decisions.\",\n          \"h1\": \"Crawler Behavior\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Crawler Behavior • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/advanced-topics/crawler-behavior/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"SiteOne Crawler is designed to be extensible, allowing developers to add custom analyzers, content processors, and exporters to enhance its functionality.\",\n          \"h1\": \"Extending\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Extending • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/advanced-topics/extending/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Solutions to common issues encountered when using SiteOne Crawler, including performance problems, memory limitations, and crawling challenges.\",\n          \"h1\": \"Troubleshooting\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Troubleshooting • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/advanced-topics/troubleshooting/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"This section describes all the available command-line options of the SiteOne Crawler tool.\",\n          \"h1\": \"Command-line options\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Command-line options • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/configuration/command-line-options/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"This section describes examples and typical scenarios of using the SiteOne Crawler tool.\",\n          \"h1\": \"Examples\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Examples • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/configuration/examples/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"SiteOne Crawler provides basic accessibility analysis, which can help developers to improve the accessibility of their websites.\",\n          \"h1\": \"Accessibility Analysis\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Accessibility Analysis • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/accessibility-analysis/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Learn about the comprehensive HTML audit report generated by SiteOne Crawler, which provides detailed analysis and insights about your website in an organized, interactive format.\",\n          \"h1\": \"Audit Report\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Audit Report • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/audit-report/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Maximum availability for a wide range of users and platforms is our priority. For this reason, you can download and install SiteOne Crawler on the following platforms: Windows (x64), macOS (Intel/Apple Silicon), Linux (x64/ARM).\",\n          \"h1\": \"Availability\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Availability • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/availability/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"SiteOne Crawler evaluates websites against a set of best practices to ensure optimal performance, SEO, and user experience.\",\n          \"h1\": \"Best Practices Analysis\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Best Practices Analysis • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/best-practices-analysis/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"SiteOne Crawler analyzes HTTP cache headers across your website to identify optimization opportunities for faster loading times and better user experience.\",\n          \"h1\": \"Caching Analysis\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Caching Analysis • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/caching-analysis/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"The Content Type analysis provides detailed insights into the types of content served by a website, focusing on their distribution, size, load times, and HTTP status codes.\",\n          \"h1\": \"Content Type Analysis\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Content Type Analysis • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/content-type-analysis/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"SiteOne Crawler is designed to crawl every aspect of your website, including all files, styles, scripts, images, and more. Learn how it can help you find and inspect everything that can be crawled on your website.\",\n          \"h1\": \"Deep Website Crawling\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Deep Website Crawling • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/deep-website-crawling/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"SiteOne Crawler offers several functionalities that can be useful for DevOps, such as testing public and local projects, password-protected websites, and stress tests.\",\n          \"h1\": \"Dev/DevOps assistant\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Dev/DevOps assistant • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/dev-devops-assistant/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"The DNS Analysis feature examines domain name resolution, providing insights into DNS configuration, IPv4/IPv6 support, and potential DNS-related issues.\",\n          \"h1\": \"DNS Analysis\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"DNS Analysis • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/dns-analysis/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"SiteOne Crawler is designed for maximum availability, ease of use, and functionality for the command-line and desktop application.\",\n          \"h1\": \"Ease of Use\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Ease of Use • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/ease-of-use/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"SiteOne Crawler offers export and reporting features, enabling users to generate comprehensive reports in various formats and for different use cases.\",\n          \"h1\": \"Exports and Reports\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Exports and Reports • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/exports-and-reports/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Detailed analysis of HTTP response headers across your website, helping you identify security issues, performance opportunities, and best practices in header configuration.\",\n          \"h1\": \"HTTP Headers Analysis\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"HTTP Headers Analysis • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/headers-analysis/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"The Heading Analysis evaluates the structure and organization of headings (H1, H2, H3, etc.) on a webpage to ensure they follow best practices for both accessibility and SEO.\",\n          \"h1\": \"Heading Analysis\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Heading Analysis • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/heading-analysis/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Track and visualize improvements in your website metrics over time, providing a clear measure of progress in areas like performance, SEO, accessibility, and security.\",\n          \"h1\": \"Improvement Meter\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Improvement Meter • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/improvement-meter/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"The Mailer feature allows automated email delivery of HTML audit reports after a crawl is completed. This is particularly useful for sharing results with team members, proactively, e.g. as part of the CI/CD process.\",\n          \"h1\": \"Mailer\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Mailer • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/mailer/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Export the entire website to offline form (clone, mirror), where it is possible to browse the site through local HTML files (without webserver) including all document, images, styles, scripts, fonts, etc.\",\n          \"h1\": \"Offline Website Generator (clone, mirror)\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Offline Website Generator (clone, mirror) • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/offline-website-generator/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Upload an HTML report to our or your infrastructure and get a secure sharable unique URL for the report.\",\n          \"h1\": \"Audit Report Sharing (upload)\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Audit Report Sharing (upload) • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/online-html-report-upload/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"The Performance Analysis offers detailed insights into the loading times of individual URLs. This is crucial for developers and site administrators to identify performance bottlenecks and optimize web pages.\",\n          \"h1\": \"Performance Analysis\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Performance Analysis • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/performance-analysis/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Identify the fastest and slowest pages on your website with detailed performance analysis, helping you optimize critical pages and fix bottlenecks.\",\n          \"h1\": \"Performance Metrics\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Performance Metrics • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/performance-metrics/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"The redirects and 404 analysis reports on problematic URLs, focusing on 404 errors and redirects (301/302). This helps identify broken links and unnecessary redirects, which can negatively impact user experience and SEO.\",\n          \"h1\": \"Redirect and 404 Analysis\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Redirect and 404 Analysis • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/redirect-and-404-analysis/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Evaluate the security of your website by analyzing HTTP headers, TLS/SSL configurations and cookie security.\",\n          \"h1\": \"Security Analysis\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Security Analysis • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/security-analysis/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Analyze your website for SEO best practices and social media optimization with detailed insights into metadata, heading structure, duplicate content, and OpenGraph tag implementation.\",\n          \"h1\": \"SEO and OpenGraph Analysis\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"SEO and OpenGraph Analysis • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/seo-and-opengraph-analysis/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Automatically generate comprehensive XML and TXT sitemaps of your website to improve search engine visibility and indexing efficiency.\",\n          \"h1\": \"Sitemap Generator\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Sitemap Generator • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/sitemap-generator/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Analyze how resources are distributed across different domains in your website, helping you identify opportunities for optimization and potential issues with third-party dependencies.\",\n          \"h1\": \"Source Domains Analysis\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Source Domains Analysis • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/source-domains-analysis/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Comprehensive analysis of your website SSL/TLS implementation, examining certificate configuration, protocol support, and security best practices.\",\n          \"h1\": \"SSL/TLS Analysis\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"SSL/TLS Analysis • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/ssl-tls-analysis/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"The \\\"Stress Testing\\\" feature in SiteOne Crawler allows you to evaluate how a website performs under various load conditions by customizing the number of workers and maximum requests per second.\",\n          \"h1\": \"Stress Testing\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Stress Testing • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/stress-testing/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"SiteOne Crawler provides in-depth technical analysis of your website, examining HTTP headers, server configurations, and technical implementations to identify issues and optimization opportunities.\",\n          \"h1\": \"Technical Analysis\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Technical Analysis • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/technical-analysis/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Export or convert an entire website with all subpages to browsable markdown.\",\n          \"h1\": \"Website to Markdown Converter\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Website to Markdown Converter • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/features/website-to-markdown-converter/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"This section describes the advanced usage of the SiteOne Crawler tool.\",\n          \"h1\": \"Advanced usage\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Advanced usage • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/getting-started/advanced-usage/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Basic usage of the Crawler - how to crawl a website, generate offline version or send HTML report to your e-mail.\",\n          \"h1\": \"Basic Usage\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Basic Usage • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/getting-started/basic-usage/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"This guide will help you get started with the SiteOne Crawler tool in a few minutes.\",\n          \"h1\": \"Quick Start Guide\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Quick Start Guide • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/getting-started/quick-start-guide/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Install and use the user-friendly desktop application of SiteOne Crawler, available for Windows, macOS, and Linux with a full graphical interface for easier website analysis and reporting.\",\n          \"h1\": \"Desktop Application (GUI)\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Desktop Application (GUI) • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/installation-and-requirements/desktop-application/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Learn how to manually install SiteOne Crawler from source code on various operating systems, including detailed steps for meeting dependencies and compilation requirements.\",\n          \"h1\": \"CLI: Manual Installation\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"CLI: Manual Installation • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/installation-and-requirements/manual-installation/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Download and install SiteOne Crawler quickly using pre-built packages for Windows, macOS, and Linux, eliminating the need for complex setup procedures or dependencies.\",\n          \"h1\": \"CLI: Ready-to-use Packages\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"CLI: Ready-to-use Packages • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/installation-and-requirements/ready-to-use-packages/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"The crawler can handle even one core of any common Intel/AMD CPU of the last 10 years and hundreds of MB of RAM. ARM CPUs are also supported.\",\n          \"h1\": \"System Requirements\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"System Requirements • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/installation-and-requirements/system-requirements/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"\",\n          \"h1\": \"Contact and Community\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Contact and Community • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/introduction/contact-and-community/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Answers to frequently asked questions about SiteOne Crawler, including details about its purpose, target users, unique features, and practical usage scenarios.\",\n          \"h1\": \"FAQ\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"FAQ • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/introduction/faq/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Explore the future development plans for SiteOne Crawler, including upcoming features, community-driven priorities, and the vision for enhancing website analysis, export capabilities, and quality assessment tools.\",\n          \"h1\": \"Ideas and Roadmap\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Ideas and Roadmap • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/introduction/ideas-and-roadmap/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"SiteOne Crawler is a free tool for website analysis and optimization. It provides a wide range of features, including deep website crawling, SEO analysis, accessibility analysis, and more. Available for all platforms.\",\n          \"h1\": \"Key Features\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Key Features • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/introduction/key-features/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"Learn about the personal journey and professional background of SiteOne Crawler creator Ján Regeš, and understand the inspiration and experiences that led to the development of this tool.\",\n          \"h1\": \"Motivation\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Motivation • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/introduction/motivation/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"The main purpose of SiteOne Crawler is to help owners, consultants, developers, QA engineers and DevOps find weaknesses and help improve the quality of their websites in various areas - SEO, security, performance, accessibility, socnets-sharing, best practices or content issues.\",\n          \"h1\": \"Overview\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Overview • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/introduction/overview/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"\",\n          \"h1\": \"Support Us\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Support Us • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/introduction/support-us/\"\n        },\n        {\n          \"deniedByRobotsTxt\": \"false\",\n          \"description\": \"This tool wouldn't have been created if it weren't for great people in my near and far surroundings. I would like to thank them all...\",\n          \"h1\": \"Thanks\",\n          \"indexing\": \"\",\n          \"keywords\": \"\",\n          \"robotsIndex\": \"1\",\n          \"title\": \"Thanks • SiteOne Crawler\",\n          \"urlPathAndQuery\": \"/introduction/thanks/\"\n        }\n      ],\n      \"title\": \"SEO metadata\"\n    },\n    \"seo-headings\": {\n      \"aplCode\": \"seo-headings\",\n      \"columns\": {\n        \"headings\": {\n          \"aplCode\": \"headings\",\n          \"escapeOutputHtml\": false,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Heading structure\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": {},\n          \"truncateIfLonger\": true,\n          \"width\": 84\n        },\n        \"headingsCount\": {\n          \"aplCode\": \"headingsCount\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Count\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 5\n        },\n        \"headingsErrorsCount\": {\n          \"aplCode\": \"headingsErrorsCount\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Errors\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 6\n        },\n        \"urlPathAndQuery\": {\n          \"aplCode\": \"urlPathAndQuery\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"URL\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 30\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> FAQ [#_top] <h3> Who is this tool for? [#who-is-this-tool-for] <h3> What is the difference between this tool and other tools? [#what-is-the-difference-between-this-tool-and-other-tools] <h3> Can I easily share analysis results with colleagues? [#can-i-easily-share-analysis-results-with-colleagues] <h3> Is this tool difficult to use? [#is-this-tool-difficult-to-use] <h3> Is this tool safe to use? [#is-this-tool-safe-to-use] <h3> How can I prevent SiteOne Crawler from crawling my website? [#how-can-i-prevent-siteone-crawler-from-crawling-my-website] <h3> What are the key features of this tool? [#what-are-the-key-features-of-this-tool] <h3> What are the known limitations of this tool? [#what-are-the-known-limitations-of-this-tool] <h3> What are the future plans for this tool? [#what-are-the-future-plans-for-this-tool] <h3> How can I contribute to this tool? [#how-can-i-contribute-to-this-tool] <h3> How can I report a bug or request a new feature? [#how-can-i-report-a-bug-or-request-a-new-feature] <h3> How can I contact the author? [#how-can-i-contact-the-author]\",\n          \"headingsCount\": \"14\",\n          \"headingsErrorsCount\": \"13\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; FAQ [#_top]<ul><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; Who is this tool for? [#who-is-this-tool-for]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; What is the difference between this tool and other tools? [#what-is-the-difference-between-this-tool-and-other-tools]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; Can I easily share analysis results with colleagues? [#can-i-easily-share-analysis-results-with-colleagues]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; Is this tool difficult to use? [#is-this-tool-difficult-to-use]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; Is this tool safe to use? [#is-this-tool-safe-to-use]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; How can I prevent SiteOne Crawler from crawling my website? [#how-can-i-prevent-siteone-crawler-from-crawling-my-website]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; What are the key features of this tool? [#what-are-the-key-features-of-this-tool]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; What are the known limitations of this tool? [#what-are-the-known-limitations-of-this-tool]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; What are the future plans for this tool? [#what-are-the-future-plans-for-this-tool]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; How can I contribute to this tool? [#how-can-i-contribute-to-this-tool]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; How can I report a bug or request a new feature? [#how-can-i-report-a-bug-or-request-a-new-feature]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; How can I contact the author? [#how-can-i-contact-the-author]</span></span></li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/introduction/faq/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Examples [#_top] <h3> Analysis of the entire website with default settings [#analysis-of-the-entire-website-with-default-settings] <h3> Analysis and upload HTML report to the online service [#analysis-and-upload-html-report-to-the-online-service] <h3> Analysis and sending of e-mail with the HTML report [#analysis-and-sending-of-e-mail-with-the-html-report] <h3> Simulate a tablet and crawl only the first 100 URLs [#simulate-a-tablet-and-crawl-only-the-first-100-urls] <h3> Internal password-protected web behind the proxy [#internal-password-protected-web-behind-the-proxy] <h3> SEO oriented analysis and output (ignore assets) [#seo-oriented-analysis-and-output-ignore-assets] <h3> Stress test with 10 workers and 100 reqs/sec [#stress-test-with-10-workers-and-100-reqssec] <h3> Analysis and export of a large website ~ 1 mio URLs [#analysis-and-export-of-a-large-website--1-mio-urls] <h3> Generate an offline version of the website [#generate-an-offline-version-of-the-website] <h3> Generate sitemaps for the website [#generate-sitemaps-for-the-website] <h3> Help with all available options [#help-with-all-available-options]\",\n          \"headingsCount\": \"13\",\n          \"headingsErrorsCount\": \"12\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Examples [#_top]<ul><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; Analysis of the entire website with default settings [#analysis-of-the-entire-website-with-default-settings]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; Analysis and upload HTML report to the online service [#analysis-and-upload-html-report-to-the-online-service]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; Analysis and sending of e-mail with the HTML report [#analysis-and-sending-of-e-mail-with-the-html-report]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; Simulate a tablet and crawl only the first 100 URLs [#simulate-a-tablet-and-crawl-only-the-first-100-urls]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; Internal password-protected web behind the proxy [#internal-password-protected-web-behind-the-proxy]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; SEO oriented analysis and output (ignore assets) [#seo-oriented-analysis-and-output-ignore-assets]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; Stress test with 10 workers and 100 reqs/sec [#stress-test-with-10-workers-and-100-reqssec]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; Analysis and export of a large website ~ 1 mio URLs [#analysis-and-export-of-a-large-website--1-mio-urls]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; Generate an offline version of the website [#generate-an-offline-version-of-the-website]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; Generate sitemaps for the website [#generate-sitemaps-for-the-website]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; Help with all available options [#help-with-all-available-options]</span></span></li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/configuration/examples/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Quick Start Guide [#_top] <h3> How to use Desktop Application [#how-to-use-desktop-application] <h3> How to use Command-line Interface [#how-to-use-command-line-interface]\",\n          \"headingsCount\": \"4\",\n          \"headingsErrorsCount\": \"3\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Quick Start Guide [#_top]<ul><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; How to use Desktop Application [#how-to-use-desktop-application]</span></span></li><li><span class=\\\"help\\\" title=\\\"Heading level 3 is not correct. Should be 2.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h3&gt; How to use Command-line Interface [#how-to-use-command-line-interface]</span></span></li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/getting-started/quick-start-guide/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Overview [#_top] <h2> Purpose of the SiteOne Crawler [#purpose-of-the-siteone-crawler] <h2> 3 videos, more than a thousand words [#3-videos-more-than-a-thousand-words] <h2> Typical Use Cases [#typical-use-cases] <h3> Website owner or consultant [#website-owner-or-consultant] <h3> Developer [#developer] <h3> DevOps [#devops] <h3> QA engineer [#qa-engineer] <h2> Core Principles [#core-principles]\",\n          \"headingsCount\": \"10\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Overview [#_top]<ul><li>&lt;h2&gt; Purpose of the SiteOne Crawler [#purpose-of-the-siteone-crawler]</li><li>&lt;h2&gt; 3 videos, more than a thousand words [#3-videos-more-than-a-thousand-words]</li><li>&lt;h2&gt; Typical Use Cases [#typical-use-cases]<ul><li>&lt;h3&gt; Website owner or consultant [#website-owner-or-consultant]</li><li>&lt;h3&gt; Developer [#developer]</li><li>&lt;h3&gt; DevOps [#devops]</li><li>&lt;h3&gt; QA engineer [#qa-engineer]</li></ul></li><li>&lt;h2&gt; Core Principles [#core-principles]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/introduction/overview/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Key Features [#_top] <h2> Thinking about features [#thinking-about-features] <h2> Future plans [#future-plans] <h2> List of features [#list-of-features]\",\n          \"headingsCount\": \"5\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Key Features [#_top]<ul><li>&lt;h2&gt; Thinking about features [#thinking-about-features]</li><li>&lt;h2&gt; Future plans [#future-plans]</li><li>&lt;h2&gt; List of features [#list-of-features]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/introduction/key-features/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Ease of Use [#_top] <h2> 💡What would you improve? [#what-would-you-improve]\",\n          \"headingsCount\": \"3\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Ease of Use [#_top]<ul><li>&lt;h2&gt; 💡What would you improve? [#what-would-you-improve]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/ease-of-use/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Offline Website Generator (clone, mirror) [#_top] <h2> Features [#features] <h2> 💡Further development ideas [#further-development-ideas]\",\n          \"headingsCount\": \"4\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Offline Website Generator (clone, mirror) [#_top]<ul><li>&lt;h2&gt; Features [#features]</li><li>&lt;h2&gt; 💡Further development ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/offline-website-generator/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Command-line options [#_top] <h2> Basic Settings [#basic-settings] <h2> Output Settings [#output-settings] <h2> Upload options [#upload-options] <h2> Resource Filtering [#resource-filtering] <h2> Advanced Crawler Settings [#advanced-crawler-settings] <h2> Expert Settings [#expert-settings] <h2> File Export Settings [#file-export-settings] <h2> Mailer Options [#mailer-options] <h2> Offline Exporter Options [#offline-exporter-options] <h2> Markdown exporter options [#markdown-exporter-options] <h2> Sitemap Options [#sitemap-options] <h2> Fastest URL Analyzer [#fastest-url-analyzer] <h2> SEO and OpenGraph Analyzer [#seo-and-opengraph-analyzer] <h2> Slowest URL Analyzer [#slowest-url-analyzer]\",\n          \"headingsCount\": \"16\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Command-line options [#_top]<ul><li>&lt;h2&gt; Basic Settings [#basic-settings]</li><li>&lt;h2&gt; Output Settings [#output-settings]</li><li>&lt;h2&gt; Upload options [#upload-options]</li><li>&lt;h2&gt; Resource Filtering [#resource-filtering]</li><li>&lt;h2&gt; Advanced Crawler Settings [#advanced-crawler-settings]</li><li>&lt;h2&gt; Expert Settings [#expert-settings]</li><li>&lt;h2&gt; File Export Settings [#file-export-settings]</li><li>&lt;h2&gt; Mailer Options [#mailer-options]</li><li>&lt;h2&gt; Offline Exporter Options [#offline-exporter-options]</li><li>&lt;h2&gt; Markdown exporter options [#markdown-exporter-options]</li><li>&lt;h2&gt; Sitemap Options [#sitemap-options]</li><li>&lt;h2&gt; Fastest URL Analyzer [#fastest-url-analyzer]</li><li>&lt;h2&gt; SEO and OpenGraph Analyzer [#seo-and-opengraph-analyzer]</li><li>&lt;h2&gt; Slowest URL Analyzer [#slowest-url-analyzer]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/configuration/command-line-options/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Basic Usage [#_top] <h2> First steps [#first-steps] <h3> Crawl a website and print the results to the console [#crawl-a-website-and-print-the-results-to-the-console] <h3> Crawl with HTML report to e-mail [#crawl-with-html-report-to-e-mail] <h3> Generate offline version of the website [#generate-offline-version-of-the-website] <h2> Lot of other uses [#lot-of-other-uses]\",\n          \"headingsCount\": \"7\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Basic Usage [#_top]<ul><li>&lt;h2&gt; First steps [#first-steps]<ul><li>&lt;h3&gt; Crawl a website and print the results to the console [#crawl-a-website-and-print-the-results-to-the-console]</li><li>&lt;h3&gt; Crawl with HTML report to e-mail [#crawl-with-html-report-to-e-mail]</li><li>&lt;h3&gt; Generate offline version of the website [#generate-offline-version-of-the-website]</li></ul></li><li>&lt;h2&gt; Lot of other uses [#lot-of-other-uses]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/getting-started/basic-usage/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Contact and Community [#_top] <h2> Introduction [#introduction] <h2> Contact and Community links [#contact-and-community-links] <h3> Documentation [#documentation] <h3> Primary channels [#primary-channels] <h3> Secondary channels [#secondary-channels]\",\n          \"headingsCount\": \"7\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Contact and Community [#_top]<ul><li>&lt;h2&gt; Introduction [#introduction]</li><li>&lt;h2&gt; Contact and Community links [#contact-and-community-links]<ul><li>&lt;h3&gt; Documentation [#documentation]</li><li>&lt;h3&gt; Primary channels [#primary-channels]</li><li>&lt;h3&gt; Secondary channels [#secondary-channels]</li></ul></li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/introduction/contact-and-community/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Security Analysis [#_top] <h2> Sample Results [#sample-results] <h2> 💡What would you improve? [#what-would-you-improve]\",\n          \"headingsCount\": \"4\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Security Analysis [#_top]<ul><li>&lt;h2&gt; Sample Results [#sample-results]</li><li>&lt;h2&gt; 💡What would you improve? [#what-would-you-improve]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/security-analysis/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Dev/DevOps assistant [#_top] <h2> 💡Further development ideas [#further-development-ideas]\",\n          \"headingsCount\": \"3\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Dev/DevOps assistant [#_top]<ul><li>&lt;h2&gt; 💡Further development ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/dev-devops-assistant/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Redirect and 404 Analysis [#_top] <h2> Key Findings [#key-findings] <h2> Sample Results [#sample-results] <h2> 💡What would you improve? [#what-would-you-improve]\",\n          \"headingsCount\": \"5\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Redirect and 404 Analysis [#_top]<ul><li>&lt;h2&gt; Key Findings [#key-findings]</li><li>&lt;h2&gt; Sample Results [#sample-results]</li><li>&lt;h2&gt; 💡What would you improve? [#what-would-you-improve]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/redirect-and-404-analysis/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Performance Analysis [#_top] <h2> Key Findings [#key-findings] <h2> Sample Results [#sample-results] <h3> Slowest URLs [#slowest-urls] <h3> Fastest URLs [#fastest-urls] <h2> Configuration Options [#configuration-options] <h2> Usage [#usage] <h2> 💡Further development ideas [#further-development-ideas]\",\n          \"headingsCount\": \"9\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Performance Analysis [#_top]<ul><li>&lt;h2&gt; Key Findings [#key-findings]</li><li>&lt;h2&gt; Sample Results [#sample-results]<ul><li>&lt;h3&gt; Slowest URLs [#slowest-urls]</li><li>&lt;h3&gt; Fastest URLs [#fastest-urls]</li></ul></li><li>&lt;h2&gt; Configuration Options [#configuration-options]</li><li>&lt;h2&gt; Usage [#usage]</li><li>&lt;h2&gt; 💡Further development ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/performance-analysis/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> SEO and OpenGraph Analysis [#_top] <h2> Key Findings [#key-findings] <h2> 💡What would you improve? [#what-would-you-improve]\",\n          \"headingsCount\": \"4\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; SEO and OpenGraph Analysis [#_top]<ul><li>&lt;h2&gt; Key Findings [#key-findings]</li><li>&lt;h2&gt; 💡What would you improve? [#what-would-you-improve]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/seo-and-opengraph-analysis/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Desktop Application (GUI) [#_top] <h2> Where to download [#where-to-download] <h3> All platforms and older versions [#all-platforms-and-older-versions] <h2> How to use Desktop Application [#how-to-use-desktop-application]\",\n          \"headingsCount\": \"5\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Desktop Application (GUI) [#_top]<ul><li>&lt;h2&gt; Where to download [#where-to-download]<ul><li>&lt;h3&gt; All platforms and older versions [#all-platforms-and-older-versions]</li></ul></li><li>&lt;h2&gt; How to use Desktop Application [#how-to-use-desktop-application]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/installation-and-requirements/desktop-application/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Website to Markdown Converter [#_top] <h2> Features [#features] <h2> Command-line Options [#command-line-options] <h2> 💡Further development ideas [#further-development-ideas]\",\n          \"headingsCount\": \"5\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Website to Markdown Converter [#_top]<ul><li>&lt;h2&gt; Features [#features]</li><li>&lt;h2&gt; Command-line Options [#command-line-options]</li><li>&lt;h2&gt; 💡Further development ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/website-to-markdown-converter/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> CLI: Ready-to-use Packages [#_top] <h2> Where to download packages [#where-to-download-packages] <h2> Windows 7/8/10/11 (x64) [#windows-781011-x64] <h2> macOS (x64, Intel) [#macos-x64-intel] <h2> macOS (arm64, Apple Silicon, M1/M2/M3) [#macos-arm64-apple-silicon-m1m2m3] <h2> Linux (x64) or WSL on Windows [#linux-x64-or-wsl-on-windows] <h2> Linux (arm64) [#linux-arm64]\",\n          \"headingsCount\": \"8\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; CLI: Ready-to-use Packages [#_top]<ul><li>&lt;h2&gt; Where to download packages [#where-to-download-packages]</li><li>&lt;h2&gt; Windows 7/8/10/11 (x64) [#windows-781011-x64]</li><li>&lt;h2&gt; macOS (x64, Intel) [#macos-x64-intel]</li><li>&lt;h2&gt; macOS (arm64, Apple Silicon, M1/M2/M3) [#macos-arm64-apple-silicon-m1m2m3]</li><li>&lt;h2&gt; Linux (x64) or WSL on Windows [#linux-x64-or-wsl-on-windows]</li><li>&lt;h2&gt; Linux (arm64) [#linux-arm64]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/installation-and-requirements/ready-to-use-packages/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> System Requirements [#_top] <h2> Hardware Requirements [#hardware-requirements] <h3> CPU [#cpu] <h3> Memory (RAM) [#memory-ram] <h3> Disk [#disk] <h3> Network/Internet [#networkinternet] <h2> Software Requirements [#software-requirements]\",\n          \"headingsCount\": \"8\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; System Requirements [#_top]<ul><li>&lt;h2&gt; Hardware Requirements [#hardware-requirements]<ul><li>&lt;h3&gt; CPU [#cpu]</li><li>&lt;h3&gt; Memory (RAM) [#memory-ram]</li><li>&lt;h3&gt; Disk [#disk]</li><li>&lt;h3&gt; Network/Internet [#networkinternet]</li></ul></li><li>&lt;h2&gt; Software Requirements [#software-requirements]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/installation-and-requirements/system-requirements/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Ideas and Roadmap [#_top]\",\n          \"headingsCount\": \"2\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Ideas and Roadmap [#_top]</li></ul>\",\n          \"urlPathAndQuery\": \"/introduction/ideas-and-roadmap/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Technical Analysis [#_top] <h2> Key Technical Analysis Features [#key-technical-analysis-features] <h3> HTTP Headers Analysis [#http-headers-analysis] <h3> SSL/TLS Analysis [#ssltls-analysis] <h3> Server Configuration [#server-configuration] <h3> Technical Implementation [#technical-implementation] <h2> Sample Results [#sample-results] <h3> HTTP Headers Statistics [#http-headers-statistics] <h3> HTTP Header Values [#http-header-values] <h3> DNS Information [#dns-information] <h3> Analysis Statistics [#analysis-statistics] <h2> Interpreting Results [#interpreting-results] <h2> Use Cases [#use-cases] <h3> Security Hardening [#security-hardening] <h3> Performance Optimization [#performance-optimization] <h3> Standards Compliance [#standards-compliance] <h2> 💡Further Development Ideas [#further-development-ideas]\",\n          \"headingsCount\": \"18\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Technical Analysis [#_top]<ul><li>&lt;h2&gt; Key Technical Analysis Features [#key-technical-analysis-features]<ul><li>&lt;h3&gt; HTTP Headers Analysis [#http-headers-analysis]</li><li>&lt;h3&gt; SSL/TLS Analysis [#ssltls-analysis]</li><li>&lt;h3&gt; Server Configuration [#server-configuration]</li><li>&lt;h3&gt; Technical Implementation [#technical-implementation]</li></ul></li><li>&lt;h2&gt; Sample Results [#sample-results]<ul><li>&lt;h3&gt; HTTP Headers Statistics [#http-headers-statistics]</li><li>&lt;h3&gt; HTTP Header Values [#http-header-values]</li><li>&lt;h3&gt; DNS Information [#dns-information]</li><li>&lt;h3&gt; Analysis Statistics [#analysis-statistics]</li></ul></li><li>&lt;h2&gt; Interpreting Results [#interpreting-results]</li><li>&lt;h2&gt; Use Cases [#use-cases]<ul><li>&lt;h3&gt; Security Hardening [#security-hardening]</li><li>&lt;h3&gt; Performance Optimization [#performance-optimization]</li><li>&lt;h3&gt; Standards Compliance [#standards-compliance]</li></ul></li><li>&lt;h2&gt; 💡Further Development Ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/technical-analysis/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Audit Report [#_top] <h2> Video about the audit report [#video-about-the-audit-report] <h2> Upload feature to easy report sharing [#upload-feature-to-easy-report-sharing] <h2> Real examples of the audit report [#real-examples-of-the-audit-report] <h2> High-quality continuous display of the result [#high-quality-continuous-display-of-the-result] <h2> 💡What would you improve? [#what-would-you-improve]\",\n          \"headingsCount\": \"7\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Audit Report [#_top]<ul><li>&lt;h2&gt; Video about the audit report [#video-about-the-audit-report]</li><li>&lt;h2&gt; Upload feature to easy report sharing [#upload-feature-to-easy-report-sharing]</li><li>&lt;h2&gt; Real examples of the audit report [#real-examples-of-the-audit-report]</li><li>&lt;h2&gt; High-quality continuous display of the result [#high-quality-continuous-display-of-the-result]</li><li>&lt;h2&gt; 💡What would you improve? [#what-would-you-improve]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/audit-report/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> CLI: Manual Installation [#_top] <h2> Choose your platform [#choose-your-platform] <h2> Windows 7/8/10/11 (x64) [#windows-781011-x64] <h2> macOS (x64, Intel) [#macos-x64-intel] <h2> macOS (arm64, Apple Silicon, M1/M2/M3) [#macos-arm64-apple-silicon-m1m2m3] <h2> Linux (x64) or WSL on Windows [#linux-x64-or-wsl-on-windows] <h2> Linux (arm64) [#linux-arm64]\",\n          \"headingsCount\": \"8\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; CLI: Manual Installation [#_top]<ul><li>&lt;h2&gt; Choose your platform [#choose-your-platform]</li><li>&lt;h2&gt; Windows 7/8/10/11 (x64) [#windows-781011-x64]</li><li>&lt;h2&gt; macOS (x64, Intel) [#macos-x64-intel]</li><li>&lt;h2&gt; macOS (arm64, Apple Silicon, M1/M2/M3) [#macos-arm64-apple-silicon-m1m2m3]</li><li>&lt;h2&gt; Linux (x64) or WSL on Windows [#linux-x64-or-wsl-on-windows]</li><li>&lt;h2&gt; Linux (arm64) [#linux-arm64]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/installation-and-requirements/manual-installation/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Heading Analysis [#_top] <h2> 💡What would you improve? [#what-would-you-improve]\",\n          \"headingsCount\": \"3\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Heading Analysis [#_top]<ul><li>&lt;h2&gt; 💡What would you improve? [#what-would-you-improve]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/heading-analysis/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Accessibility Analysis [#_top] <h2> Included checks [#included-checks] <h2> Sample Results [#sample-results] <h2> 💡Further development ideas [#further-development-ideas]\",\n          \"headingsCount\": \"5\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Accessibility Analysis [#_top]<ul><li>&lt;h2&gt; Included checks [#included-checks]</li><li>&lt;h2&gt; Sample Results [#sample-results]</li><li>&lt;h2&gt; 💡Further development ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/accessibility-analysis/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Audit Report Sharing (upload) [#_top] <h2> Features of the online audit report [#features-of-the-online-audit-report] <h2> Security mechanisms [#security-mechanisms] <h2> How to set up your own upload service [#how-to-set-up-your-own-upload-service] <h2> Command-line options [#command-line-options] <h2> 💡Further development ideas [#further-development-ideas]\",\n          \"headingsCount\": \"7\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Audit Report Sharing (upload) [#_top]<ul><li>&lt;h2&gt; Features of the online audit report [#features-of-the-online-audit-report]</li><li>&lt;h2&gt; Security mechanisms [#security-mechanisms]</li><li>&lt;h2&gt; How to set up your own upload service [#how-to-set-up-your-own-upload-service]</li><li>&lt;h2&gt; Command-line options [#command-line-options]</li><li>&lt;h2&gt; 💡Further development ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/online-html-report-upload/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Deep Website Crawling [#_top] <h2> 💡Further development ideas [#further-development-ideas]\",\n          \"headingsCount\": \"3\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Deep Website Crawling [#_top]<ul><li>&lt;h2&gt; 💡Further development ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/deep-website-crawling/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Extending [#_top] <h2> Architecture Overview [#architecture-overview] <h2> Creating Custom Analyzers [#creating-custom-analyzers] <h3> Analyzer Interface [#analyzer-interface] <h3> Key Analyzer Methods [#key-analyzer-methods] <h3> Adding the Analyzer [#adding-the-analyzer] <h2> Creating Custom Content Processors [#creating-custom-content-processors] <h3> Content Processor Interface [#content-processor-interface] <h3> Adding the Content Processor [#adding-the-content-processor] <h2> Creating Custom Exporters [#creating-custom-exporters] <h3> Exporter Interface [#exporter-interface] <h3> Adding the Exporter [#adding-the-exporter] <h2> Best Practices for Extensions [#best-practices-for-extensions] <h2> Example: Simple SEO Title Analyzer [#example-simple-seo-title-analyzer] <h2> 💡Further Development Ideas [#further-development-ideas]\",\n          \"headingsCount\": \"16\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Extending [#_top]<ul><li>&lt;h2&gt; Architecture Overview [#architecture-overview]</li><li>&lt;h2&gt; Creating Custom Analyzers [#creating-custom-analyzers]<ul><li>&lt;h3&gt; Analyzer Interface [#analyzer-interface]</li><li>&lt;h3&gt; Key Analyzer Methods [#key-analyzer-methods]</li><li>&lt;h3&gt; Adding the Analyzer [#adding-the-analyzer]</li></ul></li><li>&lt;h2&gt; Creating Custom Content Processors [#creating-custom-content-processors]<ul><li>&lt;h3&gt; Content Processor Interface [#content-processor-interface]</li><li>&lt;h3&gt; Adding the Content Processor [#adding-the-content-processor]</li></ul></li><li>&lt;h2&gt; Creating Custom Exporters [#creating-custom-exporters]<ul><li>&lt;h3&gt; Exporter Interface [#exporter-interface]</li><li>&lt;h3&gt; Adding the Exporter [#adding-the-exporter]</li></ul></li><li>&lt;h2&gt; Best Practices for Extensions [#best-practices-for-extensions]</li><li>&lt;h2&gt; Example: Simple SEO Title Analyzer [#example-simple-seo-title-analyzer]</li><li>&lt;h2&gt; 💡Further Development Ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/advanced-topics/extending/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Stress Testing [#_top] <h2> Key Features [#key-features] <h2> 💡What would you improve? [#what-would-you-improve]\",\n          \"headingsCount\": \"4\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Stress Testing [#_top]<ul><li>&lt;h2&gt; Key Features [#key-features]</li><li>&lt;h2&gt; 💡What would you improve? [#what-would-you-improve]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/stress-testing/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> DNS Analysis [#_top] <h2> Key Features [#key-features] <h2> DNS Information Display [#dns-information-display] <h2> Summary Information [#summary-information] <h2> How It Works [#how-it-works] <h2> Practical Benefits [#practical-benefits] <h2> Considerations and Limitations [#considerations-and-limitations] <h2> 💡Further Development Ideas [#further-development-ideas]\",\n          \"headingsCount\": \"9\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; DNS Analysis [#_top]<ul><li>&lt;h2&gt; Key Features [#key-features]</li><li>&lt;h2&gt; DNS Information Display [#dns-information-display]</li><li>&lt;h2&gt; Summary Information [#summary-information]</li><li>&lt;h2&gt; How It Works [#how-it-works]</li><li>&lt;h2&gt; Practical Benefits [#practical-benefits]</li><li>&lt;h2&gt; Considerations and Limitations [#considerations-and-limitations]</li><li>&lt;h2&gt; 💡Further Development Ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/dns-analysis/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Performance Metrics [#_top] <h2> Key Capabilities [#key-capabilities] <h2> Fastest Pages Analysis [#fastest-pages-analysis] <h2> Slowest Pages Analysis [#slowest-pages-analysis] <h2> Customizable Performance Thresholds [#customizable-performance-thresholds] <h2> Implementation Details [#implementation-details] <h2> Practical Applications [#practical-applications] <h3> Performance Optimization [#performance-optimization] <h3> User Experience Improvement [#user-experience-improvement] <h3> Technical Troubleshooting [#technical-troubleshooting] <h3> Content Strategy [#content-strategy] <h2> Complementary Analysis [#complementary-analysis] <h2> 💡Further Development Ideas [#further-development-ideas]\",\n          \"headingsCount\": \"14\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Performance Metrics [#_top]<ul><li>&lt;h2&gt; Key Capabilities [#key-capabilities]</li><li>&lt;h2&gt; Fastest Pages Analysis [#fastest-pages-analysis]</li><li>&lt;h2&gt; Slowest Pages Analysis [#slowest-pages-analysis]</li><li>&lt;h2&gt; Customizable Performance Thresholds [#customizable-performance-thresholds]</li><li>&lt;h2&gt; Implementation Details [#implementation-details]</li><li>&lt;h2&gt; Practical Applications [#practical-applications]<ul><li>&lt;h3&gt; Performance Optimization [#performance-optimization]</li><li>&lt;h3&gt; User Experience Improvement [#user-experience-improvement]</li><li>&lt;h3&gt; Technical Troubleshooting [#technical-troubleshooting]</li><li>&lt;h3&gt; Content Strategy [#content-strategy]</li></ul></li><li>&lt;h2&gt; Complementary Analysis [#complementary-analysis]</li><li>&lt;h2&gt; 💡Further Development Ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/performance-metrics/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Troubleshooting [#_top] <h2> Common Issues and Solutions [#common-issues-and-solutions] <h3> Memory Issues [#memory-issues] <h3> Performance Issues [#performance-issues] <h3> Crawling Problems [#crawling-problems] <h3> Export and Report Issues [#export-and-report-issues] <h2> Debugging Techniques [#debugging-techniques] <h3> Enable Debug Mode [#enable-debug-mode] <h3> Logging to File [#logging-to-file] <h3> Progressive Testing [#progressive-testing] <h2> Specific Scenarios [#specific-scenarios] <h3> Handling Modern JavaScript Frameworks [#handling-modern-javascript-frameworks] <h3> Working with Large E-commerce Sites [#working-with-large-e-commerce-sites] <h3> Handling Sites with Login Requirements [#handling-sites-with-login-requirements] <h2> Error Messages Explained [#error-messages-explained] <h2> Getting More Help [#getting-more-help] <h2> 💡Further Troubleshooting Tips [#further-troubleshooting-tips]\",\n          \"headingsCount\": \"18\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Troubleshooting [#_top]<ul><li>&lt;h2&gt; Common Issues and Solutions [#common-issues-and-solutions]<ul><li>&lt;h3&gt; Memory Issues [#memory-issues]</li><li>&lt;h3&gt; Performance Issues [#performance-issues]</li><li>&lt;h3&gt; Crawling Problems [#crawling-problems]</li><li>&lt;h3&gt; Export and Report Issues [#export-and-report-issues]</li></ul></li><li>&lt;h2&gt; Debugging Techniques [#debugging-techniques]<ul><li>&lt;h3&gt; Enable Debug Mode [#enable-debug-mode]</li><li>&lt;h3&gt; Logging to File [#logging-to-file]</li><li>&lt;h3&gt; Progressive Testing [#progressive-testing]</li></ul></li><li>&lt;h2&gt; Specific Scenarios [#specific-scenarios]<ul><li>&lt;h3&gt; Handling Modern JavaScript Frameworks [#handling-modern-javascript-frameworks]</li><li>&lt;h3&gt; Working with Large E-commerce Sites [#working-with-large-e-commerce-sites]</li><li>&lt;h3&gt; Handling Sites with Login Requirements [#handling-sites-with-login-requirements]</li></ul></li><li>&lt;h2&gt; Error Messages Explained [#error-messages-explained]</li><li>&lt;h2&gt; Getting More Help [#getting-more-help]</li><li>&lt;h2&gt; 💡Further Troubleshooting Tips [#further-troubleshooting-tips]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/advanced-topics/troubleshooting/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Content Type Analysis [#_top] <h2> Key Metrics [#key-metrics] <h2> Sample Results [#sample-results] <h3> Content Types Summary [#content-types-summary] <h3> MIME Types Breakdown [#mime-types-breakdown] <h2> 💡Further development ideas [#further-development-ideas]\",\n          \"headingsCount\": \"7\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Content Type Analysis [#_top]<ul><li>&lt;h2&gt; Key Metrics [#key-metrics]</li><li>&lt;h2&gt; Sample Results [#sample-results]<ul><li>&lt;h3&gt; Content Types Summary [#content-types-summary]</li><li>&lt;h3&gt; MIME Types Breakdown [#mime-types-breakdown]</li></ul></li><li>&lt;h2&gt; 💡Further development ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/content-type-analysis/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Improvement Meter [#_top] <h2> 💡What would you improve? [#what-would-you-improve]\",\n          \"headingsCount\": \"3\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Improvement Meter [#_top]<ul><li>&lt;h2&gt; 💡What would you improve? [#what-would-you-improve]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/improvement-meter/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Source Domains Analysis [#_top] <h2> Key Features [#key-features] <h2> Analysis Table [#analysis-table] <h2> Practical Applications [#practical-applications] <h3> Performance Optimization [#performance-optimization] <h3> Security Assessment [#security-assessment] <h3> Infrastructure Planning [#infrastructure-planning] <h3> Compliance and Privacy [#compliance-and-privacy] <h2> Implementation Details [#implementation-details] <h2> Best Practices Based on Analysis [#best-practices-based-on-analysis] <h2> 💡Further Development Ideas [#further-development-ideas]\",\n          \"headingsCount\": \"12\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Source Domains Analysis [#_top]<ul><li>&lt;h2&gt; Key Features [#key-features]</li><li>&lt;h2&gt; Analysis Table [#analysis-table]</li><li>&lt;h2&gt; Practical Applications [#practical-applications]<ul><li>&lt;h3&gt; Performance Optimization [#performance-optimization]</li><li>&lt;h3&gt; Security Assessment [#security-assessment]</li><li>&lt;h3&gt; Infrastructure Planning [#infrastructure-planning]</li><li>&lt;h3&gt; Compliance and Privacy [#compliance-and-privacy]</li></ul></li><li>&lt;h2&gt; Implementation Details [#implementation-details]</li><li>&lt;h2&gt; Best Practices Based on Analysis [#best-practices-based-on-analysis]</li><li>&lt;h2&gt; 💡Further Development Ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/source-domains-analysis/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Crawler Behavior [#_top] <h2> Basic Crawling Process [#basic-crawling-process] <h2> URL Handling and Discovery [#url-handling-and-discovery] <h2> URL Filtering [#url-filtering] <h3> Domain Restrictions [#domain-restrictions] <h3> Content Type Filtering [#content-type-filtering] <h3> Depth Limitation [#depth-limitation] <h3> Pattern Matching [#pattern-matching] <h3> Robots.txt Compliance [#robotstxt-compliance] <h2> Handling Special Cases [#handling-special-cases] <h3> Query Parameters [#query-parameters] <h3> Redirects and Non-200 Responses [#redirects-and-non-200-responses] <h3> JavaScript Frameworks [#javascript-frameworks] <h2> Performance Considerations [#performance-considerations] <h2> 💡Further Development Ideas [#further-development-ideas]\",\n          \"headingsCount\": \"16\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Crawler Behavior [#_top]<ul><li>&lt;h2&gt; Basic Crawling Process [#basic-crawling-process]</li><li>&lt;h2&gt; URL Handling and Discovery [#url-handling-and-discovery]</li><li>&lt;h2&gt; URL Filtering [#url-filtering]<ul><li>&lt;h3&gt; Domain Restrictions [#domain-restrictions]</li><li>&lt;h3&gt; Content Type Filtering [#content-type-filtering]</li><li>&lt;h3&gt; Depth Limitation [#depth-limitation]</li><li>&lt;h3&gt; Pattern Matching [#pattern-matching]</li><li>&lt;h3&gt; Robots.txt Compliance [#robotstxt-compliance]</li></ul></li><li>&lt;h2&gt; Handling Special Cases [#handling-special-cases]<ul><li>&lt;h3&gt; Query Parameters [#query-parameters]</li><li>&lt;h3&gt; Redirects and Non-200 Responses [#redirects-and-non-200-responses]</li><li>&lt;h3&gt; JavaScript Frameworks [#javascript-frameworks]</li></ul></li><li>&lt;h2&gt; Performance Considerations [#performance-considerations]</li><li>&lt;h2&gt; 💡Further Development Ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/advanced-topics/crawler-behavior/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Mailer [#_top] <h2> Configuration Options [#configuration-options] <h2> 💡What would you improve? [#what-would-you-improve]\",\n          \"headingsCount\": \"4\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Mailer [#_top]<ul><li>&lt;h2&gt; Configuration Options [#configuration-options]</li><li>&lt;h2&gt; 💡What would you improve? [#what-would-you-improve]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/mailer/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Exports and Reports [#_top] <h2> Report formats [#report-formats] <h2> Export features [#export-features] <h2> 💡What would you improve? [#what-would-you-improve]\",\n          \"headingsCount\": \"5\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Exports and Reports [#_top]<ul><li>&lt;h2&gt; Report formats [#report-formats]</li><li>&lt;h2&gt; Export features [#export-features]</li><li>&lt;h2&gt; 💡What would you improve? [#what-would-you-improve]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/exports-and-reports/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Motivation [#_top] <h2> About the main author [#about-the-main-author] <h2> Motivation to create this project [#motivation-to-create-this-project]\",\n          \"headingsCount\": \"4\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Motivation [#_top]<ul><li>&lt;h2&gt; About the main author [#about-the-main-author]</li><li>&lt;h2&gt; Motivation to create this project [#motivation-to-create-this-project]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/introduction/motivation/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Best Practices Analysis [#_top] <h2> Key Findings [#key-findings] <h2> Sample Results [#sample-results] <h2> 💡Further development ideas [#further-development-ideas]\",\n          \"headingsCount\": \"5\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Best Practices Analysis [#_top]<ul><li>&lt;h2&gt; Key Findings [#key-findings]</li><li>&lt;h2&gt; Sample Results [#sample-results]</li><li>&lt;h2&gt; 💡Further development ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/best-practices-analysis/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Advanced usage [#_top] <h2> Features [#features] <h2> Configuration [#configuration] <h2> Full example [#full-example]\",\n          \"headingsCount\": \"5\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Advanced usage [#_top]<ul><li>&lt;h2&gt; Features [#features]</li><li>&lt;h2&gt; Configuration [#configuration]</li><li>&lt;h2&gt; Full example [#full-example]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/getting-started/advanced-usage/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Availability [#_top] <h2> 💡Further development ideas [#further-development-ideas]\",\n          \"headingsCount\": \"3\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Availability [#_top]<ul><li>&lt;h2&gt; 💡Further development ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/availability/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Thanks [#_top]\",\n          \"headingsCount\": \"2\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Thanks [#_top]</li></ul>\",\n          \"urlPathAndQuery\": \"/introduction/thanks/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Sitemap Generator [#_top] <h2> Key Features [#key-features] <h2> How It Works [#how-it-works] <h2> 💡What would you improve? [#what-would-you-improve]\",\n          \"headingsCount\": \"5\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Sitemap Generator [#_top]<ul><li>&lt;h2&gt; Key Features [#key-features]</li><li>&lt;h2&gt; How It Works [#how-it-works]</li><li>&lt;h2&gt; 💡What would you improve? [#what-would-you-improve]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/sitemap-generator/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Caching Analysis [#_top] <h2> Key Features [#key-features] <h2> Analysis Tables [#analysis-tables] <h3> HTTP Caching by Content Type [#http-caching-by-content-type] <h3> HTTP Caching by Domain [#http-caching-by-domain] <h3> HTTP Caching by Domain and Content Type [#http-caching-by-domain-and-content-type] <h2> Detected Cache Types [#detected-cache-types] <h2> How It Works [#how-it-works] <h2> Optimization Opportunities [#optimization-opportunities] <h2> Best Practices [#best-practices] <h2> 💡Further Development Ideas [#further-development-ideas]\",\n          \"headingsCount\": \"12\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Caching Analysis [#_top]<ul><li>&lt;h2&gt; Key Features [#key-features]</li><li>&lt;h2&gt; Analysis Tables [#analysis-tables]<ul><li>&lt;h3&gt; HTTP Caching by Content Type [#http-caching-by-content-type]</li><li>&lt;h3&gt; HTTP Caching by Domain [#http-caching-by-domain]</li><li>&lt;h3&gt; HTTP Caching by Domain and Content Type [#http-caching-by-domain-and-content-type]</li></ul></li><li>&lt;h2&gt; Detected Cache Types [#detected-cache-types]</li><li>&lt;h2&gt; How It Works [#how-it-works]</li><li>&lt;h2&gt; Optimization Opportunities [#optimization-opportunities]</li><li>&lt;h2&gt; Best Practices [#best-practices]</li><li>&lt;h2&gt; 💡Further Development Ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/caching-analysis/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> HTTP Headers Analysis [#_top] <h2> Key Features [#key-features] <h2> Analysis Tables [#analysis-tables] <h3> HTTP Headers Overview [#http-headers-overview] <h3> HTTP Header Values [#http-header-values] <h2> Security Headers Focus [#security-headers-focus] <h2> Key HTTP Headers Analyzed [#key-http-headers-analyzed] <h3> Security Headers [#security-headers] <h3> Caching Headers [#caching-headers] <h3> Content Headers [#content-headers] <h2> Practical Benefits [#practical-benefits] <h2> Implementation Recommendations [#implementation-recommendations] <h2> 💡Further Development Ideas [#further-development-ideas]\",\n          \"headingsCount\": \"14\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; HTTP Headers Analysis [#_top]<ul><li>&lt;h2&gt; Key Features [#key-features]</li><li>&lt;h2&gt; Analysis Tables [#analysis-tables]<ul><li>&lt;h3&gt; HTTP Headers Overview [#http-headers-overview]</li><li>&lt;h3&gt; HTTP Header Values [#http-header-values]</li></ul></li><li>&lt;h2&gt; Security Headers Focus [#security-headers-focus]</li><li>&lt;h2&gt; Key HTTP Headers Analyzed [#key-http-headers-analyzed]<ul><li>&lt;h3&gt; Security Headers [#security-headers]</li><li>&lt;h3&gt; Caching Headers [#caching-headers]</li><li>&lt;h3&gt; Content Headers [#content-headers]</li></ul></li><li>&lt;h2&gt; Practical Benefits [#practical-benefits]</li><li>&lt;h2&gt; Implementation Recommendations [#implementation-recommendations]</li><li>&lt;h2&gt; 💡Further Development Ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/headers-analysis/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> SSL/TLS Analysis [#_top] <h2> Key Features [#key-features] <h2> Certificate Information Table [#certificate-information-table] <h2> Comprehensive Security Assessment [#comprehensive-security-assessment] <h3> Certificate Details [#certificate-details] <h3> Protocol Security [#protocol-security] <h3> Implementation Features [#implementation-features] <h2> Security Recommendations [#security-recommendations] <h2> Implementation Details [#implementation-details] <h2> Real-World Benefits [#real-world-benefits] <h2> 💡Further Development Ideas [#further-development-ideas]\",\n          \"headingsCount\": \"12\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; SSL/TLS Analysis [#_top]<ul><li>&lt;h2&gt; Key Features [#key-features]</li><li>&lt;h2&gt; Certificate Information Table [#certificate-information-table]</li><li>&lt;h2&gt; Comprehensive Security Assessment [#comprehensive-security-assessment]<ul><li>&lt;h3&gt; Certificate Details [#certificate-details]</li><li>&lt;h3&gt; Protocol Security [#protocol-security]</li><li>&lt;h3&gt; Implementation Features [#implementation-features]</li></ul></li><li>&lt;h2&gt; Security Recommendations [#security-recommendations]</li><li>&lt;h2&gt; Implementation Details [#implementation-details]</li><li>&lt;h2&gt; Real-World Benefits [#real-world-benefits]</li><li>&lt;h2&gt; 💡Further Development Ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/features/ssl-tls-analysis/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Contribution and Development [#_top] <h2> Getting Started [#getting-started] <h3> Development Environment Setup [#development-environment-setup] <h3> Project Structure [#project-structure] <h2> Contributing Code [#contributing-code] <h3> Development Workflow [#development-workflow] <h3> Coding Standards [#coding-standards] <h3> Pull Request Process [#pull-request-process] <h2> Contributing Documentation [#contributing-documentation] <h3> Documentation Structure [#documentation-structure] <h3> Documentation Guidelines [#documentation-guidelines] <h2> Reporting Issues [#reporting-issues] <h3> Bug Reports [#bug-reports] <h3> Feature Requests [#feature-requests] <h2> Development Guidelines [#development-guidelines] <h3> Performance Considerations [#performance-considerations] <h3> Security Best Practices [#security-best-practices] <h2> Release Process [#release-process] <h2> Community Communication [#community-communication] <h2> 💡Further Development Resources [#further-development-resources]\",\n          \"headingsCount\": \"21\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Contribution and Development [#_top]<ul><li>&lt;h2&gt; Getting Started [#getting-started]<ul><li>&lt;h3&gt; Development Environment Setup [#development-environment-setup]</li><li>&lt;h3&gt; Project Structure [#project-structure]</li></ul></li><li>&lt;h2&gt; Contributing Code [#contributing-code]<ul><li>&lt;h3&gt; Development Workflow [#development-workflow]</li><li>&lt;h3&gt; Coding Standards [#coding-standards]</li><li>&lt;h3&gt; Pull Request Process [#pull-request-process]</li></ul></li><li>&lt;h2&gt; Contributing Documentation [#contributing-documentation]<ul><li>&lt;h3&gt; Documentation Structure [#documentation-structure]</li><li>&lt;h3&gt; Documentation Guidelines [#documentation-guidelines]</li></ul></li><li>&lt;h2&gt; Reporting Issues [#reporting-issues]<ul><li>&lt;h3&gt; Bug Reports [#bug-reports]</li><li>&lt;h3&gt; Feature Requests [#feature-requests]</li></ul></li><li>&lt;h2&gt; Development Guidelines [#development-guidelines]<ul><li>&lt;h3&gt; Performance Considerations [#performance-considerations]</li><li>&lt;h3&gt; Security Best Practices [#security-best-practices]</li></ul></li><li>&lt;h2&gt; Release Process [#release-process]</li><li>&lt;h2&gt; Community Communication [#community-communication]</li><li>&lt;h2&gt; 💡Further Development Resources [#further-development-resources]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/advanced-topics/contribution-and-development/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Support Us [#_top]\",\n          \"headingsCount\": \"2\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Support Us [#_top]</li></ul>\",\n          \"urlPathAndQuery\": \"/introduction/support-us/\"\n        },\n        {\n          \"headings\": \"<h2> On this page [#starlight__on-this-page] <h1> Caching [#_top] <h2> How Caching Works [#how-caching-works] <h2> Cache Configuration [#cache-configuration] <h3> Main Cache Settings [#main-cache-settings] <h3> Example Usage [#example-usage] <h2> Cache Implementation Details [#cache-implementation-details] <h2> When to Use Caching [#when-to-use-caching] <h2> When to Disable Caching [#when-to-disable-caching] <h2> 💡Further Development Ideas [#further-development-ideas]\",\n          \"headingsCount\": \"10\",\n          \"headingsErrorsCount\": \"1\",\n          \"headingsHtml\": \"<ul><li><span class=\\\"help\\\" title=\\\"Heading level 2 is not correct. Should be 1.\\\"><span style=\\\"color: #ff00ff\\\">&lt;h2&gt; On this page [#starlight__on-this-page]</span></span></li><li>&lt;h1&gt; Caching [#_top]<ul><li>&lt;h2&gt; How Caching Works [#how-caching-works]</li><li>&lt;h2&gt; Cache Configuration [#cache-configuration]<ul><li>&lt;h3&gt; Main Cache Settings [#main-cache-settings]</li><li>&lt;h3&gt; Example Usage [#example-usage]</li></ul></li><li>&lt;h2&gt; Cache Implementation Details [#cache-implementation-details]</li><li>&lt;h2&gt; When to Use Caching [#when-to-use-caching]</li><li>&lt;h2&gt; When to Disable Caching [#when-to-disable-caching]</li><li>&lt;h2&gt; 💡Further Development Ideas [#further-development-ideas]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/advanced-topics/caching/\"\n        },\n        {\n          \"headings\": \"<h1> SiteOne Crawler [#_top] <h2> How it works and examples [#how-it-works-and-examples] <h2> 3 videos, more than a thousand words [#3-videos-more-than-a-thousand-words] <h2> First Steps [#first-steps] <h3> Desktop Application <h3> Command-line Interface <h2> Key Features [#key-features]\",\n          \"headingsCount\": \"7\",\n          \"headingsErrorsCount\": \"0\",\n          \"headingsHtml\": \"<ul><li>&lt;h1&gt; SiteOne Crawler [#_top]<ul><li>&lt;h2&gt; How it works and examples [#how-it-works-and-examples]</li><li>&lt;h2&gt; 3 videos, more than a thousand words [#3-videos-more-than-a-thousand-words]</li><li>&lt;h2&gt; First Steps [#first-steps]<ul><li>&lt;h3&gt; Desktop Application</li><li>&lt;h3&gt; Command-line Interface</li></ul></li><li>&lt;h2&gt; Key Features [#key-features]</li></ul></li></ul>\",\n          \"urlPathAndQuery\": \"/\"\n        }\n      ],\n      \"title\": \"Heading structure\"\n    },\n    \"skipped\": {\n      \"aplCode\": \"skipped\",\n      \"columns\": {\n        \"reason\": {\n          \"aplCode\": \"reason\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Reason\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 18\n        },\n        \"sourceAttr\": {\n          \"aplCode\": \"sourceAttr\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Source\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 19\n        },\n        \"sourceUqId\": {\n          \"aplCode\": \"sourceUqId\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"Found at URL\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 60\n        },\n        \"url\": {\n          \"aplCode\": \"url\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"Skipped URL\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 60\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"reason\": \"Robots.txt\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"/examples-exports/docs.astro.build/\"\n        },\n        {\n          \"reason\": \"Robots.txt\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"/examples-exports/netlify.com/\"\n        },\n        {\n          \"reason\": \"Robots.txt\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"/examples-exports/nextjs.org/\"\n        },\n        {\n          \"reason\": \"Robots.txt\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"/html/2024-08-23/forever/cl8xw4r-fdag8wg-44dd.html\"\n        },\n        {\n          \"reason\": \"Robots.txt\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"/html/2024-08-23/forever/x2-vuvb0oi6qxkr-ku79.html\"\n        },\n        {\n          \"reason\": \"Robots.txt\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"/html/2024-08-24/forever/hwzxj1-qrs69-1fqlxbd.html\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://adamwathan.me/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/support-us/\",\n          \"url\": \"https://alternativeto.net/software/siteone-crawler--deep-website-analyzer/about/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://chat.openai.com/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://cz.linkedin.com/in/janbezdek\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://daisyui.com/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://discord.gg/Uh66HaZJ\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/contact-and-community/\",\n          \"url\": \"https://discord.gg/fdm7KE8Z\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://en.wikipedia.org/wiki/Larry_Page\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://en.wikipedia.org/wiki/Sergey_Brin\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://en.wikipedia.org/wiki/Steve_Jobs\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://en.wikipedia.org/wiki/Tilman_Hausherr\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/features/ease-of-use/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/issues/new\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/installation-and-requirements/desktop-application/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-arm64-1.0.8.AppImage\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/installation-and-requirements/desktop-application/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-arm64-1.0.8.deb\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/installation-and-requirements/desktop-application/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-arm64-1.0.8.snap\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-x64-1.0.8.AppImage\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-x64-1.0.8.deb\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-x64-1.0.8.snap\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-mac-arm64-1.0.8.dmg\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-mac-x64-1.0.8.dmg\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-win-x64-1.0.8-portable.exe\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-win-x64-1.0.8-setup.exe\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-win-x64-1.0.8.msi\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler-markdown-examples/blob/main/react.dev/index.md\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/faq/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/contact-and-community/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler/discussions\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler/issues\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/features/ease-of-use/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler/issues/new\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/installation-and-requirements/ready-to-use-packages/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler/releases\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/installation-and-requirements/ready-to-use-packages/\",\n          \"url\": \"https://github.com/janreges/siteone-crawler/releases/download/v1.0.8/siteone-crawler-v1.0.8-win-x64.zip\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://github.com/matyhtf\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/ideas-and-roadmap/\",\n          \"url\": \"https://github.com/swoole/swoole-src\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/installation-and-requirements/manual-installation/\",\n          \"url\": \"https://github.com/swoole/swoole-src/releases\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/installation-and-requirements/manual-installation/\",\n          \"url\": \"https://github.com/swoole/swoole-src/releases/download/v4.8.13/swoole-cli-v4.8.13-cygwin-x64.zip\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/installation-and-requirements/manual-installation/\",\n          \"url\": \"https://github.com/swoole/swoole-src/releases/download/v4.8.13/swoole-cli-v4.8.13-linux-arm64.tar.xz\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/installation-and-requirements/manual-installation/\",\n          \"url\": \"https://github.com/swoole/swoole-src/releases/download/v4.8.13/swoole-cli-v4.8.13-macos-arm64.tar.xz\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/installation-and-requirements/manual-installation/\",\n          \"url\": \"https://github.com/swoole/swoole-src/releases/download/v4.8.13/swoole-cli-v4.8.13-macos-x64.tar.xz\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://home.snafu.de/tilman/xenulink.html\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/installation-and-requirements/manual-installation/\",\n          \"url\": \"https://learn.microsoft.com/en-us/windows/wsl/about\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/installation-and-requirements/ready-to-use-packages/\",\n          \"url\": \"https://learn.microsoft.com/en-us/windows/wsl/install\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://nette.org/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/advanced-topics/contribution-and-development/\",\n          \"url\": \"https://opensource.guide/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/ideas-and-roadmap/\",\n          \"url\": \"https://openswoole.com/docs/modules/swoole-table\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/advanced-topics/contribution-and-development/\",\n          \"url\": \"https://phpbestpractices.org/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://phpstan.org/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/advanced-topics/contribution-and-development/\",\n          \"url\": \"https://phptherightway.com/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<script src>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://platform-api.sharethis.com/js/sharethis.js\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/ideas-and-roadmap/\",\n          \"url\": \"https://reactphp.org/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://starlight.astro.build/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://svelte.dev/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://tailwindcss.com/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://twitter.com/BillGates\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://twitter.com/DavidGrudl\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://twitter.com/OndrejMirtes\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://twitter.com/elonmusk\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://twitter.com/machal\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://twitter.com/rich_harris\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://twitter.com/ryancarniato\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://twitter.com/saadeghi?lang=cs\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://twitter.com/sama\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://twitter.com/siteone_crawler\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://twitter.com/spazef0rze\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://twitter.com/swithinbank\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://twitter.com/zdendac\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/installation-and-requirements/manual-installation/\",\n          \"url\": \"https://ubuntu.com/wsl\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://www.amd.com/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://www.cdn77.com/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/ideas-and-roadmap/\",\n          \"url\": \"https://www.cygwin.com/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://www.electronjs.org/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://www.jetbrains.com/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://www.lenovo.com/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://www.linkedin.com/in/linustorvalds\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/installation-and-requirements/manual-installation/\",\n          \"url\": \"https://www.linuxfordevices.com/tutorials/linux/install-debian-on-windows-wsl\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://www.michalspacek.cz/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/advanced-topics/contribution-and-development/\",\n          \"url\": \"https://www.php-fig.org/psr/psr-12/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/configuration/command-line-options/\",\n          \"url\": \"https://www.php.net/manual/en/timezones.php\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/contact-and-community/\",\n          \"url\": \"https://www.reddit.com/r/siteone_crawler/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/ideas-and-roadmap/\",\n          \"url\": \"https://www.rust-lang.org/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://www.siteone.io/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://www.solidjs.com/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://www.spse-po.sk/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://www.swoole.com/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/thanks/\",\n          \"url\": \"https://www.vzhurudolu.cz/\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/configuration/command-line-options/\",\n          \"url\": \"https://www.w3schools.com/xml/xpath_syntax.asp\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/\",\n          \"url\": \"https://www.youtube.com/@SiteOne-Crawler\"\n        },\n        {\n          \"reason\": \"Not allowed host\",\n          \"sourceAttr\": \"<a href>\",\n          \"sourceUqId\": \"/introduction/faq/\",\n          \"url\": \"https://x.com/janreges\"\n        }\n      ],\n      \"title\": \"Skipped URLs\"\n    },\n    \"skipped-summary\": {\n      \"aplCode\": \"skipped-summary\",\n      \"columns\": {\n        \"count\": {\n          \"aplCode\": \"count\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Unique URLs\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 11\n        },\n        \"domain\": {\n          \"aplCode\": \"domain\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Domain\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"reason\": {\n          \"aplCode\": \"reason\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Reason\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 18\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"count\": \"29\",\n          \"domain\": \"github.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"13\",\n          \"domain\": \"twitter.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"6\",\n          \"domain\": \"crawler.siteone.io\",\n          \"reason\": \"Robots.txt\"\n        },\n        {\n          \"count\": \"4\",\n          \"domain\": \"en.wikipedia.org\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"2\",\n          \"domain\": \"learn.microsoft.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"2\",\n          \"domain\": \"discord.gg\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.rust-lang.org\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"nette.org\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"phpstan.org\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.php-fig.org\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.solidjs.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.w3schools.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.jetbrains.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"phptherightway.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.linuxfordevices.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"alternativeto.net\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.siteone.io\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"reactphp.org\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"opensource.guide\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.electronjs.org\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"phpbestpractices.org\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"tailwindcss.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.cygwin.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.php.net\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"adamwathan.me\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.youtube.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.reddit.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"svelte.dev\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"x.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.lenovo.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.cdn77.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.vzhurudolu.cz\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"starlight.astro.build\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.spse-po.sk\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"openswoole.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"cz.linkedin.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.michalspacek.cz\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"chat.openai.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"platform-api.sharethis.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.swoole.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.linkedin.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"ubuntu.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"www.amd.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"daisyui.com\",\n          \"reason\": \"Not allowed host\"\n        },\n        {\n          \"count\": \"1\",\n          \"domain\": \"home.snafu.de\",\n          \"reason\": \"Not allowed host\"\n        }\n      ],\n      \"title\": \"Skipped URLs Summary\"\n    },\n    \"slowest-urls\": {\n      \"aplCode\": \"slowest-urls\",\n      \"columns\": {\n        \"requestTime\": {\n          \"aplCode\": \"requestTime\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Time\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 6\n        },\n        \"statusCode\": {\n          \"aplCode\": \"statusCode\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Status\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": 6\n        },\n        \"url\": {\n          \"aplCode\": \"url\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": true,\n          \"getDataValueCallback\": null,\n          \"name\": \"Slow URL\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": true,\n          \"width\": 113\n        }\n      },\n      \"position\": \"before-url-table\",\n      \"rows\": [\n        {\n          \"requestTime\": \"0.0732\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/getting-started/basic-usage/\"\n        },\n        {\n          \"requestTime\": \"0.0548\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/configuration/command-line-options/\"\n        },\n        {\n          \"requestTime\": \"0.0400\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/introduction/contact-and-community/\"\n        },\n        {\n          \"requestTime\": \"0.0330\",\n          \"statusCode\": \"200\",\n          \"url\": \"https://crawler.siteone.io/features/ease-of-use/\"\n        }\n      ],\n      \"title\": \"TOP slowest URLs\"\n    },\n    \"source-domains\": {\n      \"aplCode\": \"source-domains\",\n      \"columns\": {\n        \"CSS\": {\n          \"aplCode\": \"CSS\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"CSS\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"Document\": {\n          \"aplCode\": \"Document\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Document\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"HTML\": {\n          \"aplCode\": \"HTML\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"HTML\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"Image\": {\n          \"aplCode\": \"Image\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Image\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"JS\": {\n          \"aplCode\": \"JS\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"JS\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"domain\": {\n          \"aplCode\": \"domain\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": null,\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Domain\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        },\n        \"totals\": {\n          \"aplCode\": \"totals\",\n          \"escapeOutputHtml\": true,\n          \"forcedDataType\": null,\n          \"formatter\": {},\n          \"formatterWillChangeValueLength\": false,\n          \"getDataValueCallback\": null,\n          \"name\": \"Totals\",\n          \"nonBreakingSpaces\": false,\n          \"renderer\": null,\n          \"truncateIfLonger\": false,\n          \"width\": -1\n        }\n      },\n      \"position\": \"after-url-table\",\n      \"rows\": [\n        {\n          \"Audio\": \"\",\n          \"CSS\": \"3/79kB/11ms\",\n          \"Document\": \"1/152B/3ms\",\n          \"Font\": \"\",\n          \"HTML\": \"55/3MB/453ms\",\n          \"Image\": \"9/18MB/779ms\",\n          \"JS\": \"5/9kB/59ms\",\n          \"JSON\": \"\",\n          \"Other\": \"\",\n          \"Redirect\": \"\",\n          \"Video\": \"\",\n          \"XML\": \"\",\n          \"domain\": \"crawler.siteone.io\",\n          \"totalCount\": \"73\",\n          \"totals\": \"73/21MB/1.3s\"\n        }\n      ],\n      \"title\": \"Source domains\"\n    }\n  }\n}\n"
  },
  {
    "path": "docs/OUTPUT-crawler.siteone.io.txt",
    "content": "\n ####                ####             #####        \n ####                ####           #######        \n ####      ###       ####         #########        \n ####     ######     ####       ###### ####        \n  ######################       #####   ####        \n    #######    #######       #####     ####        \n    #######    #######         #       ####        \n  ######################               ####        \n ####     ######     ####              ####        \n ####       ##       ####              ####        \n ####                ####       ################## \n ####                ####       ################## \n\n==================================================\n# SiteOne Crawler, v2.0.0.20260316               #\n# Author: jan.reges@siteone.cz                   #\n==================================================\n\n\nDetected terminal width 138 < 140 chars - compact mode activated.\n\nProgress| URL                                                    | Status | Type     | Time   | Size   | Cache  | Access.  | Best pr.\n--------------------------------------------------------------------------------------------------------------------------------------\n1/40    | /                                                      | 200    | HTML     | 4 ms   | 50 kB  | 60 min  | 3/1      | 7       \n2/66    | /introduction/key-features/                            | 200    | HTML     | 4 ms   | 54 kB  | 60 min  | 2/2      | 1/6     \n3/66    | /configuration/command-line-options/                   | 200    | HTML     | 10 ms  | 107 kB | 60 min  | 2/2      | 1/6     \n4/69    | /installation-and-requirements/ready-to-use-packages/  | 200    | HTML     | 5 ms   | 67 kB  | 60 min  | 2/2      | 1/6     \n5/69    | /_astro/siteone-crawler-logo-dark.DaIuiR1U.svg         | 200    | Image    | 2 ms   | 673 B  | 12 mon  |          |         \n6/69    | /features/dev-devops-assistant/                        | 200    | HTML     | 4 ms   | 43 kB  | 60 min  | 2/2      | 1/6     \n7/69    | /configuration/examples/                               | 200    | HTML     | 15 ms  | 79 kB  | 60 min  | 2/2      | 1/6     \n8/69    | /features/online-html-report-upload/                   | 200    | HTML     | 9 ms   | 58 kB  | 60 min  | 2/2      | 1/6     \n9/69    | /features/ease-of-use/                                 | 200    | HTML     | 4 ms   | 42 kB  | 60 min  | 2/2      | 1/6     \n10/69   | /siteone-crawler-app-demo.avif                         | 200    | Image    | 32 ms  | 591 kB | 12 mon  |          |         \n11/69   | /features/accessibility-analysis/                      | 200    | HTML     | 4 ms   | 50 kB  | 60 min  | 2/2      | 1/6     \n12/69   | /introduction/overview/                                | 200    | HTML     | 4 ms   | 59 kB  | 60 min  | 2/2      | 1/6     \n13/69   | /features/redirect-and-404-analysis/                   | 200    | HTML     | 4 ms   | 50 kB  | 60 min  | 2/2      | 1/6     \n14/69   | /features/performance-analysis/                        | 200    | HTML     | 4 ms   | 58 kB  | 60 min  | 2/2      | 1/6     \n15/69   | /features/website-to-markdown-converter/               | 200    | HTML     | 4 ms   | 52 kB  | 60 min  | 2/2      | 1/6     \n16/69   | /introduction/contact-and-community/                   | 200    | HTML     | 4 ms   | 53 kB  | 60 min  | 2/2      | 1/6     \n17/69   | /features/offline-website-generator/                   | 200    | HTML     | 5 ms   | 51 kB  | 60 min  | 2/2      | 1/6     \n18/69   | /_astro/page.7qqag-5g.js                               | 200    | JS       | 2 ms   | 2 kB   | 12 mon  |          |         \n19/69   | /_astro/Search.astro_astro_type_scri…_lang.DMZ5WJ-J.js | 200    | JS       | 3 ms   | 3 kB   | 12 mon  |          |         \n20/69   | /robots.txt                                            | 200    | Document | 2 ms   | 152 B  | 60 min  |          |         \n21/69   | /features/audit-report/                                | 200    | HTML     | 4 ms   | 52 kB  | 60 min  | 2/2      | 1/6     \n22/69   | /installation-and-requirements/manual-installation/    | 200    | HTML     | 5 ms   | 70 kB  | 60 min  | 2/2      | 1/6     \n23/70   | /installation-and-requirements/desktop-application/    | 200    | HTML     | 10 ms  | 50 kB  | 60 min  | 2/2      | 1/6     \n24/70   | /features/technical-analysis/                          | 200    | HTML     | 5 ms   | 73 kB  | 60 min  | 2/2      | 1/6     \n25/70   | /_astro/index.BRwACyc2.css                             | 200    | CSS      | 6 ms   | 60 kB  | 12 mon  |          |         \n26/70   | /siteone-crawler-app-demo.gif                          | 200    | Image    | 134 ms | 3 MB   | 12 mon  |          |         \n27/70   | /features/heading-analysis/                            | 200    | HTML     | 76 ms  | 42 kB  | 60 min  | 2/2      | 1/6     \n28/70   | /features/security-analysis/                           | 200    | HTML     | 4 ms   | 55 kB  | 60 min  | 2/2      | 1/6     \n29/70   | /favicon.svg                                           | 200    | Image    | 2 ms   | 673 B  | 12 mon  |          |         \n30/70   | /siteone-crawler-command-line-demo-w960.avif           | 200    | Image    | 50 ms  | 1 MB   | 12 mon  |          |         \n31/70   | /_astro/siteone-crawler-mascot.CPk15tXh_HGMwJ.webp     | 200    | Image    | 14 ms  | 31 kB  | 12 mon  |          |         \n32/70   | /installation-and-requirements/system-requirements/    | 200    | HTML     | 5 ms   | 57 kB  | 60 min  | 2/2      | 1/6     \n33/70   | /getting-started/basic-usage/                          | 200    | HTML     | 56 ms  | 60 kB  | 60 min  | 2/2      | 1/6     \n34/70   | /getting-started/quick-start-guide/                    | 200    | HTML     | 70 ms  | 52 kB  | 60 min  | 2/2      | 1/6     \n35/70   | /siteone-crawler-command-line-demo-full.gif            | 200    | Image    | 344 ms | 8 MB   | 12 mon  |          |         \n36/70   | /features/deep-website-crawling/                       | 200    | HTML     | 142 ms | 44 kB  | 60 min  | 2/2      | 1/6     \n37/70   | /features/seo-and-opengraph-analysis/                  | 200    | HTML     | 8 ms   | 44 kB  | 60 min  | 2/2      | 1/6     \n38/70   | /siteone-crawler-command-line-demo-w960.gif            | 200    | Image    | 191 ms | 4 MB   | 12 mon  |          |         \n39/70   | /introduction/ideas-and-roadmap/                       | 200    | HTML     | 132 ms | 52 kB  | 60 min  | 2/2      | 1/6     \n40/70   | /_astro/print.DNXP8c50.css                             | 200    | CSS      | 6 ms   | 3 kB   | 12 mon  |          |         \n41/73   | /features/performance-metrics/                         | 200    | HTML     | 4 ms   | 63 kB  | 60 min  | 2/2      | 1/6     \n42/73   | /features/content-type-analysis/                       | 200    | HTML     | 4 ms   | 54 kB  | 60 min  | 2/2      | 1/6     \n43/73   | /advanced-topics/crawler-behavior/                     | 200    | HTML     | 4 ms   | 64 kB  | 60 min  | 2/2      | 1/6     \n44/73   | /features/sitemap-generator/                           | 200    | HTML     | 4 ms   | 45 kB  | 60 min  | 2/2      | 1/6     \n45/73   | /features/stress-testing/                              | 200    | HTML     | 4 ms   | 46 kB  | 60 min  | 2/2      | 1/6     \n46/73   | /features/best-practices-analysis/                     | 200    | HTML     | 4 ms   | 51 kB  | 60 min  | 2/2      | 1/6     \n47/73   | /introduction/support-us/                              | 200    | HTML     | 5 ms   | 41 kB  | 60 min  | 2/2      | 1/6     \n48/73   | /getting-started/advanced-usage/                       | 200    | HTML     | 5 ms   | 71 kB  | 60 min  | 2/2      | 1/6     \n49/73   | /features/caching-analysis/                            | 200    | HTML     | 4 ms   | 62 kB  | 60 min  | 2/2      | 1/6     \n50/73   | /features/exports-and-reports/                         | 200    | HTML     | 4 ms   | 46 kB  | 60 min  | 2/2      | 1/6     \n51/73   | /introduction/thanks/                                  | 200    | HTML     | 4 ms   | 46 kB  | 60 min  | 2/2      | 1/6     \n52/73   | /features/availability/                                | 200    | HTML     | 4 ms   | 43 kB  | 60 min  | 2/2      | 1/6     \n53/73   | /advanced-topics/extending/                            | 200    | HTML     | 5 ms   | 112 kB | 60 min  | 2/2      | 1/6     \n54/73   | /introduction/faq/                                     | 200    | HTML     | 4 ms   | 64 kB  | 60 min  | 2/2      | 1/6     \n55/73   | /_astro/MobileTableOfContents.astro_…_lang.C181hMzK.js | 200    | JS       | 2 ms   | 667 B  | 12 mon  |          |         \n56/73   | /introduction/motivation/                              | 200    | HTML     | 6 ms   | 50 kB  | 60 min  | 2/2      | 1/6     \n57/73   | /features/dns-analysis/                                | 200    | HTML     | 4 ms   | 56 kB  | 60 min  | 2/2      | 1/6     \n58/73   | /features/mailer/                                      | 200    | HTML     | 4 ms   | 45 kB  | 60 min  | 2/2      | 1/6     \n59/73   | /advanced-topics/troubleshooting/                      | 200    | HTML     | 4 ms   | 71 kB  | 60 min  | 2/2      | 1/6     \n60/73   | /_astro/TableOfContents.astro_astro_…_lang.CKWWgpjV.js | 200    | JS       | 2 ms   | 2 kB   | 12 mon  |          |         \n61/73   | /advanced-topics/caching/                              | 200    | HTML     | 4 ms   | 57 kB  | 60 min  | 2/2      | 1/6     \n62/73   | /advanced-topics/contribution-and-development/         | 200    | HTML     | 5 ms   | 76 kB  | 60 min  | 2/2      | 1/6     \n63/73   | /features/headers-analysis/                            | 200    | HTML     | 4 ms   | 68 kB  | 60 min  | 2/2      | 1/6     \n64/73   | /features/ssl-tls-analysis/                            | 200    | HTML     | 4 ms   | 59 kB  | 60 min  | 2/2      | 1/6     \n65/73   | /features/improvement-meter/                           | 200    | HTML     | 4 ms   | 45 kB  | 60 min  | 2/2      | 1/6     \n66/73   | /features/source-domains-analysis/                     | 429    | HTML     | 3 ms   | 788 B  | etag    |          | 4       \n67/73   | /_astro/ec.8zarh.js                                    | 429    | HTML     | 2 ms   | 788 B  | etag    |          | 4       \n68/73   | /_astro/ec.5wl1j.css                                   | 429    | HTML     | 2 ms   | 788 B  | etag    |          | 4       \n69/73   | /_astro/ready-to-use-packages.BYCPharn_Z1ivwN5.webp    | 429    | HTML     | 2 ms   | 788 B  | etag    |          | 4       \n70/73   | /_astro/desktop-app-release-assets.D…-vv8_Z2rPu7O.webp | 429    | HTML     | 2 ms   | 788 B  | etag    |          | 4       \n71/73   | /docs/features/technical-analysis                      | 429    | HTML     | 2 ms   | 788 B  | etag    |          | 4       \n72/73   | /docs/features/best-practices-analysis                 | 429    | HTML     | 2 ms   | 788 B  | etag    |          | 4       \n73/73   | /docs/features/content-type-analysis                   | 404    | HTML     | 3 ms   | 780 B  | etag    |          | 4       \n\nSkipped URLs Summary\n--------------------\n\nReason             | Domain                     | Unique URLs\n---------------------------------------------------------------\nNot allowed host   | github.com                 | 29         \nNot allowed host   | twitter.com                | 13         \nRobots.txt         | crawler.siteone.io         | 6          \nNot allowed host   | en.wikipedia.org           | 4          \nNot allowed host   | discord.gg                 | 2          \nNot allowed host   | learn.microsoft.com        | 2          \nNot allowed host   | home.snafu.de              | 1          \nNot allowed host   | svelte.dev                 | 1          \nNot allowed host   | www.php.net                | 1          \nNot allowed host   | phptherightway.com         | 1          \nNot allowed host   | alternativeto.net          | 1          \nNot allowed host   | reactphp.org               | 1          \nNot allowed host   | www.siteone.io             | 1          \nNot allowed host   | www.php-fig.org            | 1          \nNot allowed host   | www.w3schools.com          | 1          \nNot allowed host   | www.jetbrains.com          | 1          \nNot allowed host   | www.linkedin.com           | 1          \nNot allowed host   | adamwathan.me              | 1          \nNot allowed host   | www.cygwin.com             | 1          \nNot allowed host   | tailwindcss.com            | 1          \nNot allowed host   | chat.openai.com            | 1          \nNot allowed host   | nette.org                  | 1          \nNot allowed host   | starlight.astro.build      | 1          \nNot allowed host   | www.youtube.com            | 1          \nNot allowed host   | www.linuxfordevices.com    | 1          \nNot allowed host   | opensource.guide           | 1          \nNot allowed host   | platform-api.sharethis.com | 1          \nNot allowed host   | www.vzhurudolu.cz          | 1          \nNot allowed host   | cz.linkedin.com            | 1          \nNot allowed host   | www.cdn77.com              | 1          \nNot allowed host   | phpbestpractices.org       | 1          \nNot allowed host   | www.solidjs.com            | 1          \nNot allowed host   | www.rust-lang.org          | 1          \nNot allowed host   | www.amd.com                | 1          \nNot allowed host   | www.spse-po.sk             | 1          \nNot allowed host   | www.michalspacek.cz        | 1          \nNot allowed host   | ubuntu.com                 | 1          \nNot allowed host   | daisyui.com                | 1          \nNot allowed host   | openswoole.com             | 1          \nNot allowed host   | phpstan.org                | 1          \nNot allowed host   | www.swoole.com             | 1          \nNot allowed host   | www.lenovo.com             | 1          \nNot allowed host   | www.reddit.com             | 1          \nNot allowed host   | x.com                      | 1          \nNot allowed host   | www.electronjs.org         | 1          \n\n\nSkipped URLs\n------------\n\nReason             | Skipped URL                                                  | Source              | Found at URL                                                \n------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nRobots.txt         | /examples-exports/docs.astro.build/                          | <a href>            | /                                                           \nRobots.txt         | /examples-exports/netlify.com/                               | <a href>            | /                                                           \nRobots.txt         | /examples-exports/nextjs.org/                                | <a href>            | /                                                           \nRobots.txt         | /html/2024-08-23/forever/cl8xw4r-fdag8wg-44dd.html           | <a href>            | /                                                           \nRobots.txt         | /html/2024-08-23/forever/x2-vuvb0oi6qxkr-ku79.html           | <a href>            | /                                                           \nRobots.txt         | /html/2024-08-24/forever/hwzxj1-qrs69-1fqlxbd.html           | <a href>            | /                                                           \nNot allowed host   | https://adamwathan.me/                                       | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://alternativeto.net/software/siteo…ite-analyzer/about/ | <a href>            | /introduction/support-us/                                   \nNot allowed host   | https://chat.openai.com/                                     | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://cz.linkedin.com/in/janbezdek                         | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://daisyui.com/                                         | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://discord.gg/Uh66HaZJ                                  | <a href>            | /                                                           \nNot allowed host   | https://discord.gg/fdm7KE8Z                                  | <a href>            | /introduction/contact-and-community/                        \nNot allowed host   | https://en.wikipedia.org/wiki/Larry_Page                     | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://en.wikipedia.org/wiki/Sergey_Brin                    | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://en.wikipedia.org/wiki/Steve_Jobs                     | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://en.wikipedia.org/wiki/Tilman_Hausherr                | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://github.com/janreges/siteone-crawler                  | <a href>            | /                                                           \nNot allowed host   | https://github.com/janreges/siteone-crawler-gui              | <a href>            | /                                                           \nNot allowed host   | https://github.com/janreges/siteone-crawler-gui/issues/new   | <a href>            | /features/dev-devops-assistant/                             \nNot allowed host   | https://github.com/janreges/siteone-crawler-gui/releases     | <a href>            | /                                                           \nNot allowed host   | https://github.com/janreges/siteone-craw…rm64-1.0.8.AppImage | <a href>            | /installation-and-requirements/desktop-application/         \nNot allowed host   | https://github.com/janreges/siteone-craw…nux-arm64-1.0.8.deb | <a href>            | /installation-and-requirements/desktop-application/         \nNot allowed host   | https://github.com/janreges/siteone-craw…ux-arm64-1.0.8.snap | <a href>            | /installation-and-requirements/desktop-application/         \nNot allowed host   | https://github.com/janreges/siteone-craw…-x64-1.0.8.AppImage | <a href>            | /                                                           \nNot allowed host   | https://github.com/janreges/siteone-craw…linux-x64-1.0.8.deb | <a href>            | /                                                           \nNot allowed host   | https://github.com/janreges/siteone-craw…inux-x64-1.0.8.snap | <a href>            | /                                                           \nNot allowed host   | https://github.com/janreges/siteone-craw…mac-arm64-1.0.8.dmg | <a href>            | /                                                           \nNot allowed host   | https://github.com/janreges/siteone-craw…r-mac-x64-1.0.8.dmg | <a href>            | /                                                           \nNot allowed host   | https://github.com/janreges/siteone-craw…-1.0.8-portable.exe | <a href>            | /                                                           \nNot allowed host   | https://github.com/janreges/siteone-craw…x64-1.0.8-setup.exe | <a href>            | /                                                           \nNot allowed host   | https://github.com/janreges/siteone-craw…r-win-x64-1.0.8.msi | <a href>            | /                                                           \nNot allowed host   | https://github.com/janreges/siteone-craw…/react.dev/index.md | <a href>            | /                                                           \nNot allowed host   | https://github.com/janreges/siteone-crawler/                 | <a href>            | /introduction/faq/                                          \nNot allowed host   | https://github.com/janreges/siteone-crawler/discussions      | <a href>            | /introduction/contact-and-community/                        \nNot allowed host   | https://github.com/janreges/siteone-crawler/issues           | <a href>            | /                                                           \nNot allowed host   | https://github.com/janreges/siteone-crawler/issues/new       | <a href>            | /features/dev-devops-assistant/                             \nNot allowed host   | https://github.com/janreges/siteone-crawler/releases         | <a href>            | /installation-and-requirements/ready-to-use-packages/       \nNot allowed host   | https://github.com/janreges/siteone-craw…-v1.0.8-win-x64.zip | <a href>            | /installation-and-requirements/ready-to-use-packages/       \nNot allowed host   | https://github.com/matyhtf                                   | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://github.com/swoole/swoole-src                         | <a href>            | /introduction/ideas-and-roadmap/                            \nNot allowed host   | https://github.com/swoole/swoole-src/releases                | <a href>            | /installation-and-requirements/manual-installation/         \nNot allowed host   | https://github.com/swoole/swoole-src/rel…8.13-cygwin-x64.zip | <a href>            | /installation-and-requirements/manual-installation/         \nNot allowed host   | https://github.com/swoole/swoole-src/rel…-linux-arm64.tar.xz | <a href>            | /installation-and-requirements/manual-installation/         \nNot allowed host   | https://github.com/swoole/swoole-src/rel…-macos-arm64.tar.xz | <a href>            | /installation-and-requirements/manual-installation/         \nNot allowed host   | https://github.com/swoole/swoole-src/rel…13-macos-x64.tar.xz | <a href>            | /installation-and-requirements/manual-installation/         \nNot allowed host   | https://home.snafu.de/tilman/xenulink.html                   | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://learn.microsoft.com/en-us/windows/wsl/about          | <a href>            | /installation-and-requirements/manual-installation/         \nNot allowed host   | https://learn.microsoft.com/en-us/windows/wsl/install        | <a href>            | /installation-and-requirements/ready-to-use-packages/       \nNot allowed host   | https://nette.org/                                           | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://opensource.guide/                                    | <a href>            | /advanced-topics/contribution-and-development/              \nNot allowed host   | https://openswoole.com/docs/modules/swoole-table             | <a href>            | /introduction/ideas-and-roadmap/                            \nNot allowed host   | https://phpbestpractices.org/                                | <a href>            | /advanced-topics/contribution-and-development/              \nNot allowed host   | https://phpstan.org/                                         | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://phptherightway.com/                                  | <a href>            | /advanced-topics/contribution-and-development/              \nNot allowed host   | https://platform-api.sharethis.com/js/sharethis.js           | <script src>        | /                                                           \nNot allowed host   | https://reactphp.org/                                        | <a href>            | /introduction/ideas-and-roadmap/                            \nNot allowed host   | https://starlight.astro.build/                               | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://svelte.dev/                                          | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://tailwindcss.com/                                     | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://twitter.com/BillGates                                | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://twitter.com/DavidGrudl                               | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://twitter.com/OndrejMirtes                             | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://twitter.com/elonmusk                                 | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://twitter.com/machal                                   | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://twitter.com/rich_harris                              | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://twitter.com/ryancarniato                             | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://twitter.com/saadeghi?lang=cs                         | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://twitter.com/sama                                     | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://twitter.com/siteone_crawler                          | <a href>            | /                                                           \nNot allowed host   | https://twitter.com/spazef0rze                               | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://twitter.com/swithinbank                              | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://twitter.com/zdendac                                  | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://ubuntu.com/wsl                                       | <a href>            | /installation-and-requirements/manual-installation/         \nNot allowed host   | https://www.amd.com/                                         | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://www.cdn77.com/                                       | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://www.cygwin.com/                                      | <a href>            | /introduction/ideas-and-roadmap/                            \nNot allowed host   | https://www.electronjs.org/                                  | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://www.jetbrains.com/                                   | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://www.lenovo.com/                                      | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://www.linkedin.com/in/linustorvalds                    | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://www.linuxfordevices.com/tutorial…bian-on-windows-wsl | <a href>            | /installation-and-requirements/manual-installation/         \nNot allowed host   | https://www.michalspacek.cz/                                 | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://www.php-fig.org/psr/psr-12/                          | <a href>            | /advanced-topics/contribution-and-development/              \nNot allowed host   | https://www.php.net/manual/en/timezones.php                  | <a href>            | /configuration/command-line-options/                        \nNot allowed host   | https://www.reddit.com/r/siteone_crawler/                    | <a href>            | /introduction/contact-and-community/                        \nNot allowed host   | https://www.rust-lang.org/                                   | <a href>            | /introduction/ideas-and-roadmap/                            \nNot allowed host   | https://www.siteone.io/                                      | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://www.solidjs.com/                                     | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://www.spse-po.sk/                                      | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://www.swoole.com/                                      | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://www.vzhurudolu.cz/                                   | <a href>            | /introduction/thanks/                                       \nNot allowed host   | https://www.w3schools.com/xml/xpath_syntax.asp               | <a href>            | /configuration/command-line-options/                        \nNot allowed host   | https://www.youtube.com/@SiteOne-Crawler                     | <a href>            | /                                                           \nNot allowed host   | https://x.com/janreges                                       | <a href>            | /introduction/faq/                                          \n\n\nExternal URLs\n-------------\n\nExternal URL                                                                                                        | Pages | Found on URL (max 5)                                                           \n---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\nhttps://adamwathan.me/                                                                                              | 1     | /introduction/thanks/                                                          \nhttps://alternativeto.net/software/siteone-crawler--deep-website-analyzer/about/                                    | 1     | /introduction/support-us/                                                      \nhttps://chat.openai.com/                                                                                            | 1     | /introduction/thanks/                                                          \nhttps://cz.linkedin.com/in/janbezdek                                                                                | 1     | /introduction/thanks/                                                          \nhttps://daisyui.com/                                                                                                | 1     | /introduction/thanks/                                                          \nhttps://discord.gg/Uh66HaZJ                                                                                         | 1     | /                                                                              \nhttps://discord.gg/fdm7KE8Z                                                                                         | 1     | /introduction/contact-and-community/                                           \nhttps://en.wikipedia.org/wiki/Larry_Page                                                                            | 1     | /introduction/thanks/                                                          \nhttps://en.wikipedia.org/wiki/Sergey_Brin                                                                           | 1     | /introduction/thanks/                                                          \nhttps://en.wikipedia.org/wiki/Steve_Jobs                                                                            | 1     | /introduction/thanks/                                                          \nhttps://en.wikipedia.org/wiki/Tilman_Hausherr                                                                       | 1     | /introduction/thanks/                                                          \nhttps://github.com/janreges/siteone-crawler                                                                         | 1     | /                                                                              \nhttps://github.com/janreges/siteone-crawler-gui                                                                     | 1     | /                                                                              \nhttps://github.com/janreges/siteone-crawler-gui/issues/new                                                          | 1     | /features/dev-devops-assistant/                                                \nhttps://github.com/janreges/siteone-crawler-gui/releases                                                            | 1     | /                                                                              \nhttps://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-arm64-1.0.8.AppImage | 1     | /installation-and-requirements/desktop-application/                            \nhttps://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-arm64-1.0.8.deb      | 1     | /installation-and-requirements/desktop-application/                            \nhttps://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-arm64-1.0.8.snap     | 1     | /installation-and-requirements/desktop-application/                            \nhttps://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-x64-1.0.8.AppImage   | 1     | /                                                                              \nhttps://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-x64-1.0.8.deb        | 1     | /                                                                              \nhttps://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-linux-x64-1.0.8.snap       | 1     | /                                                                              \nhttps://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-mac-arm64-1.0.8.dmg        | 1     | /                                                                              \nhttps://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-mac-x64-1.0.8.dmg          | 1     | /                                                                              \nhttps://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-win-x64-1.0.8-portable.exe | 1     | /                                                                              \nhttps://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-win-x64-1.0.8-setup.exe    | 1     | /                                                                              \nhttps://github.com/janreges/siteone-crawler-gui/releases/download/v1.0.8/SiteOne-Crawler-win-x64-1.0.8.msi          | 1     | /                                                                              \nhttps://github.com/janreges/siteone-crawler-markdown-examples/blob/main/react.dev/index.md                          | 1     | /                                                                              \nhttps://github.com/janreges/siteone-crawler/                                                                        | 1     | /introduction/faq/                                                             \nhttps://github.com/janreges/siteone-crawler/discussions                                                             | 1     | /introduction/contact-and-community/                                           \nhttps://github.com/janreges/siteone-crawler/issues                                                                  | 1     | /                                                                              \nhttps://github.com/janreges/siteone-crawler/issues/new                                                              | 1     | /features/dev-devops-assistant/                                                \nhttps://github.com/janreges/siteone-crawler/releases                                                                | 1     | /installation-and-requirements/ready-to-use-packages/                          \nhttps://github.com/janreges/siteone-crawler/releases/download/v1.0.8/siteone-crawler-v1.0.8-win-x64.zip             | 1     | /installation-and-requirements/ready-to-use-packages/                          \nhttps://github.com/matyhtf                                                                                          | 1     | /introduction/thanks/                                                          \nhttps://github.com/swoole/swoole-src                                                                                | 1     | /introduction/ideas-and-roadmap/                                               \nhttps://github.com/swoole/swoole-src/releases                                                                       | 1     | /installation-and-requirements/manual-installation/                            \nhttps://github.com/swoole/swoole-src/releases/download/v4.8.13/swoole-cli-v4.8.13-cygwin-x64.zip                    | 1     | /installation-and-requirements/manual-installation/                            \nhttps://github.com/swoole/swoole-src/releases/download/v4.8.13/swoole-cli-v4.8.13-linux-arm64.tar.xz                | 1     | /installation-and-requirements/manual-installation/                            \nhttps://github.com/swoole/swoole-src/releases/download/v4.8.13/swoole-cli-v4.8.13-macos-arm64.tar.xz                | 1     | /installation-and-requirements/manual-installation/                            \nhttps://github.com/swoole/swoole-src/releases/download/v4.8.13/swoole-cli-v4.8.13-macos-x64.tar.xz                  | 1     | /installation-and-requirements/manual-installation/                            \nhttps://home.snafu.de/tilman/xenulink.html                                                                          | 1     | /introduction/thanks/                                                          \nhttps://learn.microsoft.com/en-us/windows/wsl/about                                                                 | 1     | /installation-and-requirements/manual-installation/                            \nhttps://learn.microsoft.com/en-us/windows/wsl/install                                                               | 1     | /installation-and-requirements/ready-to-use-packages/                          \nhttps://nette.org/                                                                                                  | 1     | /introduction/thanks/                                                          \nhttps://opensource.guide/                                                                                           | 1     | /advanced-topics/contribution-and-development/                                 \nhttps://openswoole.com/docs/modules/swoole-table                                                                    | 1     | /introduction/ideas-and-roadmap/                                               \nhttps://phpbestpractices.org/                                                                                       | 1     | /advanced-topics/contribution-and-development/                                 \nhttps://phpstan.org/                                                                                                | 1     | /introduction/thanks/                                                          \nhttps://phptherightway.com/                                                                                         | 1     | /advanced-topics/contribution-and-development/                                 \nhttps://platform-api.sharethis.com/js/sharethis.js                                                                  | 1     | /                                                                              \nhttps://reactphp.org/                                                                                               | 1     | /introduction/ideas-and-roadmap/                                               \nhttps://starlight.astro.build/                                                                                      | 1     | /introduction/thanks/                                                          \nhttps://svelte.dev/                                                                                                 | 1     | /introduction/thanks/                                                          \nhttps://tailwindcss.com/                                                                                            | 1     | /introduction/thanks/                                                          \nhttps://twitter.com/BillGates                                                                                       | 1     | /introduction/thanks/                                                          \nhttps://twitter.com/DavidGrudl                                                                                      | 1     | /introduction/thanks/                                                          \nhttps://twitter.com/OndrejMirtes                                                                                    | 1     | /introduction/thanks/                                                          \nhttps://twitter.com/elonmusk                                                                                        | 1     | /introduction/thanks/                                                          \nhttps://twitter.com/machal                                                                                          | 1     | /introduction/thanks/                                                          \nhttps://twitter.com/rich_harris                                                                                     | 1     | /introduction/thanks/                                                          \nhttps://twitter.com/ryancarniato                                                                                    | 1     | /introduction/thanks/                                                          \nhttps://twitter.com/saadeghi?lang=cs                                                                                | 1     | /introduction/thanks/                                                          \nhttps://twitter.com/sama                                                                                            | 1     | /introduction/thanks/                                                          \nhttps://twitter.com/siteone_crawler                                                                                 | 1     | /                                                                              \nhttps://twitter.com/spazef0rze                                                                                      | 1     | /introduction/thanks/                                                          \nhttps://twitter.com/swithinbank                                                                                     | 1     | /introduction/thanks/                                                          \nhttps://twitter.com/zdendac                                                                                         | 1     | /introduction/thanks/                                                          \nhttps://ubuntu.com/wsl                                                                                              | 1     | /installation-and-requirements/manual-installation/                            \nhttps://www.amd.com/                                                                                                | 1     | /introduction/thanks/                                                          \nhttps://www.cdn77.com/                                                                                              | 1     | /introduction/thanks/                                                          \nhttps://www.cygwin.com/                                                                                             | 1     | /introduction/ideas-and-roadmap/                                               \nhttps://www.electronjs.org/                                                                                         | 1     | /introduction/thanks/                                                          \nhttps://www.jetbrains.com/                                                                                          | 1     | /introduction/thanks/                                                          \nhttps://www.lenovo.com/                                                                                             | 1     | /introduction/thanks/                                                          \nhttps://www.linkedin.com/in/linustorvalds                                                                           | 1     | /introduction/thanks/                                                          \nhttps://www.linuxfordevices.com/tutorials/linux/install-debian-on-windows-wsl                                       | 1     | /installation-and-requirements/manual-installation/                            \nhttps://www.michalspacek.cz/                                                                                        | 1     | /introduction/thanks/                                                          \nhttps://www.php-fig.org/psr/psr-12/                                                                                 | 1     | /advanced-topics/contribution-and-development/                                 \nhttps://www.php.net/manual/en/timezones.php                                                                         | 1     | /configuration/command-line-options/                                           \nhttps://www.reddit.com/r/siteone_crawler/                                                                           | 1     | /introduction/contact-and-community/                                           \nhttps://www.rust-lang.org/                                                                                          | 1     | /introduction/ideas-and-roadmap/                                               \nhttps://www.siteone.io/                                                                                             | 1     | /introduction/thanks/                                                          \nhttps://www.solidjs.com/                                                                                            | 1     | /introduction/thanks/                                                          \nhttps://www.spse-po.sk/                                                                                             | 1     | /introduction/thanks/                                                          \nhttps://www.swoole.com/                                                                                             | 1     | /introduction/thanks/                                                          \nhttps://www.vzhurudolu.cz/                                                                                          | 1     | /introduction/thanks/                                                          \nhttps://www.w3schools.com/xml/xpath_syntax.asp                                                                      | 1     | /configuration/command-line-options/                                           \nhttps://www.youtube.com/@SiteOne-Crawler                                                                            | 1     | /                                                                              \nhttps://x.com/janreges                                                                                              | 1     | /introduction/faq/                                                             \n\n\nRedirected URLs\n---------------\n\nNo redirects found.\n\n\n404 URLs\n--------\n\nStatus | URL 404                                                       | Found at URL                                                 \n----------------------------------------------------------------------------------------------------------------------------------------\n404    | /docs/features/content-type-analysis                          | /features/performance-metrics/                               \n\n\nSSL/TLS info\n------------\n\nInfo                   | Text                                                                                                        \n---------------------------------------------------------------------------------------------------------------------------------------\nIssuer                 | C = BE, O = GlobalSign nv-sa, CN = GlobalSign GCC R6 AlphaSSL CA 2025                                       \nSubject                | CN = *.siteone.io                                                                                           \nValid from             | Feb  9 15:43:30 2026 GMT (VALID already 35 day(s))                                                          \nValid to               | Mar 13 15:43:29 2027 GMT (VALID still for 362 day(s))                                                       \nSupported protocols    | TLSv1.2                                                                                                     \nRAW certificate output | Certificate:\n    Data:\n        Version: 3 (0x2)\n        Serial Number:…2:7c:16:53:e8:\n        91:24:48:5b   \nRAW protocols output   | === ssl2 ===\ns_client: Unknown option: -ssl2\ns_client: Use -help for su…ent\nVerify return code: 0 (ok)\n---  \n\n\nTOP fastest URLs\n----------------\n\nTime   | Status | Fast URL                                                                                                              \n------------------------------------------------------------------------------------------------------------------------------------------\n4 ms   | 200    | /features/performance-analysis/                                                                                       \n4 ms   | 200    | /features/availability/                                                                                               \n4 ms   | 200    | /features/stress-testing/                                                                                             \n4 ms   | 200    | /features/redirect-and-404-analysis/                                                                                  \n4 ms   | 200    | /features/ease-of-use/                                                                                                \n4 ms   | 200    | /features/mailer/                                                                                                     \n4 ms   | 200    | /features/dns-analysis/                                                                                               \n4 ms   | 200    | /features/improvement-meter/                                                                                          \n4 ms   | 200    | /features/exports-and-reports/                                                                                        \n4 ms   | 200    | /features/ssl-tls-analysis/                                                                                           \n4 ms   | 200    | /features/best-practices-analysis/                                                                                    \n4 ms   | 200    | /features/sitemap-generator/                                                                                          \n4 ms   | 200    | /advanced-topics/troubleshooting/                                                                                     \n4 ms   | 200    | /introduction/key-features/                                                                                           \n4 ms   | 200    | /introduction/contact-and-community/                                                                                  \n4 ms   | 200    | /introduction/thanks/                                                                                                 \n4 ms   | 200    | /features/content-type-analysis/                                                                                      \n4 ms   | 200    | /introduction/overview/                                                                                               \n4 ms   | 200    | /features/security-analysis/                                                                                          \n4 ms   | 200    | /features/website-to-markdown-converter/                                                                              \n\n\nTOP slowest URLs\n----------------\n\nTime   | Status | Slow URL                                                                                                         \n-------------------------------------------------------------------------------------------------------------------------------------\n142 ms | 200    | /features/deep-website-crawling/                                                                                 \n132 ms | 200    | /introduction/ideas-and-roadmap/                                                                                 \n76 ms  | 200    | /features/heading-analysis/                                                                                      \n70 ms  | 200    | /getting-started/quick-start-guide/                                                                              \n56 ms  | 200    | /getting-started/basic-usage/                                                                                    \n15 ms  | 200    | /configuration/examples/                                                                                         \n10 ms  | 200    | /configuration/command-line-options/                                                                             \n10 ms  | 200    | /installation-and-requirements/desktop-application/                                                              \n\n\nSEO metadata\n------------\n\nThis table contains large data and shows max 10 rows. To see them all, use output to HTML using `--output-html-report=tmp/myreport.html`.\n\nURL                                                | Indexing             | Title        | H1           | Description  | Keywords    \n---------------------------------------------------------------------------------------------------------------------------------------\n/                                                  | Allowed              | SiteOne…ove  | SiteOne…ler  | A very u…o). |             \n/advanced-topics/caching/                          | Allowed              | Caching…ler  | Caching      | SiteOne…ls.  |             \n/advanced-topics/contribution-and-development/     | Allowed              | Contribu…ler | Contribu…ent | Guidelin…ts. |             \n/advanced-topics/crawler-behavior/                 | Allowed              | Crawler…ler  | Crawler…ior  | Understa…ns. |             \n/advanced-topics/extending/                        | Allowed              | Extendin…ler | Extending    | SiteOne…ty.  |             \n/advanced-topics/troubleshooting/                  | Allowed              | Troubles…ler | Troubles…ing | Solution…es. |             \n/configuration/command-line-options/               | Allowed              | Command-…ler | Command-…ons | This sec…ol. |             \n/configuration/examples/                           | Allowed              | Examples…ler | Examples     | This sec…ol. |             \n/features/accessibility-analysis/                  | Allowed              | Accessib…ler | Accessib…sis | SiteOne…es.  |             \n/features/audit-report/                            | Allowed              | Audit Re…ler | Audit Report | Learn ab…at. |             \n\n\nOpenGraph metadata\n------------------\n\nThis table contains large data and shows max 10 rows. To see them all, use output to HTML using `--output-html-report=tmp/myreport.html`.\n\nURL                                                | OG Title   | OG Description | OG Image           | Twitter Title | Twitter Description | Twitter Image     \n--------------------------------------------------------------------------------------------------------------------------------------------------\n/                                                  | SiteOne…r) | A very…).  | /siteone-cra…e.png |            |            |                   \n/advanced-topics/caching/                          | Caching    | SiteOne…s. | /siteone-cra…e.png |            |            |                   \n/advanced-topics/contribution-and-development/     | Contrib…nt | Guideli…s. | /siteone-cra…e.png |            |            |                   \n/advanced-topics/crawler-behavior/                 | Crawler…or | Underst…s. | /siteone-cra…e.png |            |            |                   \n/advanced-topics/extending/                        | Extending  | SiteOne…y. | /siteone-cra…e.png |            |            |                   \n/advanced-topics/troubleshooting/                  | Trouble…ng | Solutio…s. | /siteone-cra…e.png |            |            |                   \n/configuration/command-line-options/               | Command…ns | This se…l. | /siteone-cra…e.png |            |            |                   \n/configuration/examples/                           | Examples   | This se…l. | /siteone-cra…e.png |            |            |                   \n/features/accessibility-analysis/                  | Accessi…is | SiteOne…s. | /siteone-cra…e.png |            |            |                   \n/features/audit-report/                            | Audit R…rt | Learn a…t. | /siteone-cra…e.png |            |            |                   \n\n\nHeading structure\n-----------------\n\nThis table contains large data and shows max 10 rows. To see them all, use output to HTML using `--output-html-report=tmp/myreport.html`.\n\nHeading structure                                                                    | Count | Errors | URL                           \n----------------------------------------------------------------------------------------------------------------------------------------\n<h2> On this page [#starlight__on-this-page] <h1> FAQ [#…w-can-i-contact-the-author] | 14    | 13     | /introduction/faq/            \n<h2> On this page [#starlight__on-this-page] <h1> Exampl…with-all-available-options] | 13    | 12     | /configuration/examples/      \n<h2> On this page [#starlight__on-this-page] <h1> Quick…use-command-line-interface]  | 4     | 3      | /getting-started/qui…rt-guide/\n<h2> On this page [#starlight__on-this-page] <h1> Key Fe…eatures [#list-of-features] | 5     | 1      | /introduction/key-features/   \n<h2> On this page [#starlight__on-this-page] <h1> Comman…zer [#slowest-url-analyzer] | 16    | 1      | /configuration/comma…-options/\n<h2> On this page [#starlight__on-this-page] <h1> CLI: R…inux (arm64) [#linux-arm64] | 8     | 1      | /installation-and-re…packages/\n<h2> On this page [#starlight__on-this-page] <h1> Dev/De…#further-development-ideas] | 3     | 1      | /features/dev-devops…ssistant/\n<h2> On this page [#starlight__on-this-page] <h1> Audit…#further-development-ideas]  | 7     | 1      | /features/online-htm…t-upload/\n<h2> On this page [#starlight__on-this-page] <h1> Ease o…? [#what-would-you-improve] | 3     | 1      | /features/ease-of-use/        \n<h2> On this page [#starlight__on-this-page] <h1> Access…#further-development-ideas] | 5     | 1      | /features/accessibil…analysis/\n\n\nHTTP headers\n------------\n\nHeader                    | Occurs | Unique | Values preview                                   | Min value  | Max value \n--------------------------------------------------------------------------------------------------------------------------\nAccept-Ranges             | 10     | 1      | bytes                                            |            |           \nCache-Control             | 65     | 2      | max-age=3600 (51) / max-age=31536000 (14)        |            |           \nContent-Length            | 18     | -      | [ignored generic values]                         | 152 B      | 8 MB      \nContent-Security-Policy   | 51     | 1      | default-src 'self' 'unsafe-inlin…s://*.ytimg.com |            |           \nContent-Type              | 73     | 8      | text/html (58) / application/jav…text/plain (1)  |            |           \nDate                      | 73     | -      | [ignored generic values]                         | 2026-03-16 | 2026-03-16\nEtag                      | 73     | -      | [ignored generic values]                         |            |           \nExpires                   | 65     | -      | [ignored generic values]                         | 2026-03-16 | 2027-03-16\nFeature-Policy            | 73     | 1      | accelerometer 'none'; camera 'no…ne'; usb 'none' |            |           \nLast-Modified             | 65     | -      | [ignored generic values]                         | 2025-05-06 | 2025-06-08\nPermissions-Policy        | 73     | 1      | accelerometer=(), camera=(), geo…ment=(), usb=() |            |           \nReferrer-Policy           | 73     | 1      | no-referrer-when-downgrade                       |            |           \nServer                    | 73     | 1      | -                                                |            |           \nStrict-Transport-Security | 73     | 1      | max-age=15552000                                 |            |           \nVary                      | 55     | 1      | Accept-Encoding                                  |            |           \nX-Content-Type-Options    | 73     | 2      | nosniff (59) / nosniff, nosniff (14)             |            |           \nX-Frame-Options           | 73     | 1      | SAMEORIGIN                                       |            |           \nX-XSS-Protection          | 73     | 1      | 1; mode=block                                    |            |           \n\n\nHTTP header values\n------------------\n\nHeader                    | Occurs | Value                                                                             \n-------------------------------------------------------------------------------------------------------------------------\nAccept-Ranges             | 10     | bytes                                                                             \nCache-Control             | 51     | max-age=3600                                                                      \nCache-Control             | 14     | max-age=31536000                                                                  \nContent-Security-Policy   | 51     | default-src 'self' 'unsafe-inline' 'unsafe-eval' data:…is.com https://*.ytimg.com \nContent-Type              | 58     | text/html                                                                         \nContent-Type              | 4      | application/javascript                                                            \nContent-Type              | 3      | image/gif                                                                         \nContent-Type              | 2      | text/css                                                                          \nContent-Type              | 2      | image/svg+xml                                                                     \nContent-Type              | 2      | image/avif                                                                        \nContent-Type              | 1      | image/webp                                                                        \nContent-Type              | 1      | text/plain                                                                        \nFeature-Policy            | 73     | accelerometer 'none'; camera 'none'; geolocation 'self'…payment 'none'; usb 'none'\nPermissions-Policy        | 73     | accelerometer=(), camera=(), geolocation=(self), gyrosc…idi=(), payment=(), usb=()\nReferrer-Policy           | 73     | no-referrer-when-downgrade                                                        \nServer                    | 73     | -                                                                                 \nStrict-Transport-Security | 73     | max-age=15552000                                                                  \nVary                      | 55     | Accept-Encoding                                                                   \nX-Content-Type-Options    | 59     | nosniff                                                                           \nX-Content-Type-Options    | 14     | nosniff, nosniff                                                                  \nX-Frame-Options           | 73     | SAMEORIGIN                                                                        \nX-XSS-Protection          | 73     | 1; mode=block                                                                     \n\n\nHTTP Caching by content type (only from crawlable domains)\n----------------------------------------------------------\n\nContent type | Cache type   | URLs  | AVG lifetime | MIN lifetime | MAX lifetime\n----------------------------------------------------------------------------\nHTML         | Cache-Control + ETag + Last-Modified | 50    | 60 min     | 60 min     | 60 min    \nHTML         | ETag         | 8     | -          | -          | -         \nImage        | Cache-Control + ETag + Last-Modified | 8     | 12 mon     | 12 mon     | 12 mon    \nJS           | Cache-Control + ETag + Last-Modified | 4     | 12 mon     | 12 mon     | 12 mon    \nCSS          | Cache-Control + ETag + Last-Modified | 2     | 12 mon     | 12 mon     | 12 mon    \nDocument     | Cache-Control + ETag + Last-Modified | 1     | 60 min     | 60 min     | 60 min    \n\n\nHTTP Caching by domain\n----------------------\n\nDomain               | Cache type   | URLs  | AVG lifetime | MIN lifetime | MAX lifetime\n------------------------------------------------------------------------------------\ncrawler.siteone.io   | Cache-Control + ETag + Last-Modified | 65    | 78 d       | 60 min     | 12 mon    \ncrawler.siteone.io   | ETag         | 8     | -          | -          | -         \n\n\nHTTP Caching by domain and content type\n---------------------------------------\n\nDomain               | Content type | Cache type   | URLs  | AVG lifetime | MIN lifetime | MAX lifetime\n---------------------------------------------------------------------------------------------------\ncrawler.siteone.io   | HTML         | Cache-Control + ETag + Last-Modified | 50    | 60 min     | 60 min     | 60 min    \ncrawler.siteone.io   | Image        | Cache-Control + ETag + Last-Modified | 8     | 12 mon     | 12 mon     | 12 mon    \ncrawler.siteone.io   | HTML         | ETag         | 8     | -          | -          | -         \ncrawler.siteone.io   | JS           | Cache-Control + ETag + Last-Modified | 4     | 12 mon     | 12 mon     | 12 mon    \ncrawler.siteone.io   | CSS          | Cache-Control + ETag + Last-Modified | 2     | 12 mon     | 12 mon     | 12 mon    \ncrawler.siteone.io   | Document     | Cache-Control + ETag + Last-Modified | 1     | 60 min     | 60 min     | 60 min    \n\n\nTOP non-unique titles\n---------------------\n\nNothing to report.\n\n\nTOP non-unique descriptions\n---------------------------\n\nCount | Description                                                                                                                     \n------------------------------------------------------------------------------------------------------------------------------------------\n2     |                                                                                                                                 \n\n\nBest practices\n--------------\n\nAnalysis name                            | OK    | Notice | Warning | Critical\n--------------------------------------------------------------------------------\nInvalid inline SVGs                      | 34    | 0      | 0       | 0       \nDOM depth (> 30)                         | 58    | 0      | 0       | 0       \nLarge inline SVGs (> 5120 B)             | 34    | 0      | 0       | 0       \nHeading structure                        | 55    | 0      | 53      | 0       \nDuplicate inline SVGs (> 5 and > 1024 B) | 34    | 0      | 0       | 0       \nTitle uniqueness (> 10%)                 | 50    | 0      | 0       | 0       \nDescription uniqueness (> 10%)           | 49    | 0      | 0       | 0       \nBrotli support                           | 0     | 0      | 50      | 0       \nWebP support                             | 1     | 0      | 0       | 0       \nAVIF support                             | 2     | 0      | 0       | 0       \n\n\nAccessibility\n-------------\n\nAnalysis name                | OK    | Notice | Warning | Critical\n--------------------------------------------------------------------\nMissing html lang attribute  | 1     | 0      | 0       | 0       \nMissing aria labels          | 2     | 0      | 119     | 0       \nMissing roles                | 0     | 0      | 35      | 0       \nMissing image alt attributes | 6     | 0      | 1       | 0       \n\n\nSource domains\n--------------\n\nDomain             | Totals       | HTML         | Image        | JS         | CSS         | Document  \n---------------------------------------------------------------------------------------------------------\ncrawler.siteone.io | 73/20MB/1.5s | 58/3MB/744ms | 8/18MB/773ms | 4/7kB/11ms | 2/64kB/12ms | 1/152B/2ms\n\n\nContent types\n-------------\n\nContent type | URLs  | Total size | Total time | Avg time | Status 20x | Status 40x | Status 42x\n--------------------------------------------------------------------------------------------------\nHTML         | 58    | 3 MB       | 744 ms     | 12 ms    | 50         | 1          | 7         \nImage        | 8     | 18 MB      | 773 ms     | 96 ms    | 8          | 0          | 0         \nJS           | 4     | 7 kB       | 11 ms      | 2 ms     | 4          | 0          | 0         \nCSS          | 2     | 64 kB      | 12 ms      | 6 ms     | 2          | 0          | 0         \nDocument     | 1     | 152 B      | 2 ms       | 2 ms     | 1          | 0          | 0         \n\n\nContent types (MIME types)\n--------------------------\n\nContent type               | URLs  | Total size | Total time | Avg time | Status 20x | Status 40x | Status 42x\n----------------------------------------------------------------------------------------------------------------\ntext/html                  | 58    | 3 MB       | 744 ms     | 12 ms    | 50         | 1          | 7         \napplication/javascript     | 4     | 7 kB       | 11 ms      | 2 ms     | 4          | 0          | 0         \nimage/gif                  | 3     | 16 MB      | 671 ms     | 223 ms   | 3          | 0          | 0         \ntext/css                   | 2     | 64 kB      | 12 ms      | 6 ms     | 2          | 0          | 0         \nimage/svg+xml              | 2     | 1 kB       | 5 ms       | 2 ms     | 2          | 0          | 0         \nimage/avif                 | 2     | 2 MB       | 82 ms      | 41 ms    | 2          | 0          | 0         \nimage/webp                 | 1     | 31 kB      | 14 ms      | 14 ms    | 1          | 0          | 0         \ntext/plain                 | 1     | 152 B      | 2 ms       | 2 ms     | 1          | 0          | 0         \n\n\nDNS info\n--------\n\nDNS resolving tree                                                    \n------------------------------------------------------------------------\ncrawler.siteone.io                                                    \n  IPv4: 86.49.167.242                                                 \n                                                                      \nDNS server: 10.255.255.254                                            \n\n\nSecurity\n--------\n\nHeader                     | OK    | Notice | Warning | Critical | Recommendation                                                      \n-----------------------------------------------------------------------------------------------------------------------------------------\nContent-Security-Policy    | 50    | 0      | 0       | 4        | Content-Security-Policy header is not set. It…prevents XSS attacks. \nX-Frame-Options            | 0     | 54     | 0       | 0        | X-Frame-Options header is set to SAMEORIGIN wh…resource in a frame. \nX-XSS-Protection           | 0     | 54     | 0       | 0        | X-XSS-Protection header is set but deprecated.…urity-Policy instead.\nStrict-Transport-Security  | 54    | 0      | 0       | 0        |                                                                     \nX-Content-Type-Options     | 54    | 0      | 0       | 0        |                                                                     \nReferrer-Policy            | 54    | 0      | 0       | 0        |                                                                     \nFeature-Policy             | 54    | 0      | 0       | 0        |                                                                     \nPermissions-Policy         | 54    | 0      | 0       | 0        |                                                                     \nServer                     | 54    | 0      | 0       | 0        | Server header is not set or empty. This is recommended.             \n\n\nAnalysis stats\n--------------\n\nClass::method                                        | Exec time | Exec count\n-------------------------------------------------------------------------------\nSslTlsAnalyzer::getTLSandSSLCertificateInfo          | 259 ms    | 1         \nBestPracticeAnalyzer::checkHeadingStructure          | 47 ms     | 58        \nAccessibilityAnalyzer::checkMissingAriaLabels        | 45 ms     | 50        \nAccessibilityAnalyzer::checkMissingLabels            | 42 ms     | 50        \nAccessibilityAnalyzer::checkMissingRoles             | 39 ms     | 50        \nBestPracticeAnalyzer::checkMaxDOMDepth               | 36 ms     | 58        \nAccessibilityAnalyzer::checkMissingLang              | 36 ms     | 50        \nBestPracticeAnalyzer::checkNonClickablePhoneNumbers  | 24 ms     | 58        \nBestPracticeAnalyzer::checkInlineSvg                 | 11 ms     | 58        \nBestPracticeAnalyzer::checkMissingQuotesOnAttributes | 3 ms      | 58        \nSeoAndOpenGraphAnalyzer::analyzeHeadings             | 2 ms      | 1         \nSecurityAnalyzer::checkHtmlSecurity                  | 1 ms      | 54        \nAccessibilityAnalyzer::checkImageAltAttributes       | 1 ms      | 50        \nSecurityAnalyzer::checkHeaders                       | 0 ms      | 54        \nSeoAndOpenGraphAnalyzer::analyzeSeo                  | 0 ms      | 1         \nSeoAndOpenGraphAnalyzer::analyzeOpenGraph            | 0 ms      | 1         \nBestPracticeAnalyzer::checkMetaDescriptionUniqueness | 0 ms      | 1         \nBestPracticeAnalyzer::checkTitleUniqueness           | 0 ms      | 1         \nBestPracticeAnalyzer::checkBrotliSupport             | 0 ms      | 1         \nBestPracticeAnalyzer::checkWebpSupport               | 0 ms      | 1         \nBestPracticeAnalyzer::checkAvifSupport               | 0 ms      | 1         \n\n\nContent processor stats\n-----------------------\n\nClass::method                                            | Exec time | Exec count\n-----------------------------------------------------------------------------------\nHtmlProcessor::findUrls                                  | 47 ms     | 58        \nNextJsProcessor::applyContentChangesBeforeUrlParsing     | 11 ms     | 64        \nJavaScriptProcessor::findUrls                            | 8 ms      | 62        \nAstroProcessor::findUrls                                 | 1 ms      | 62        \nCssProcessor::findUrls                                   | 1 ms      | 60        \nAstroProcessor::applyContentChangesBeforeUrlParsing      | 0 ms      | 62        \nNextJsProcessor::findUrls                                | 0 ms      | 64        \nJavaScriptProcessor::applyContentChangesBeforeUrlParsing | 0 ms      | 62        \nSvelteProcessor::applyContentChangesBeforeUrlParsing     | 0 ms      | 58        \nCssProcessor::applyContentChangesBeforeUrlParsing        | 0 ms      | 60        \nHtmlProcessor::applyContentChangesBeforeUrlParsing       | 0 ms      | 58        \nSvelteProcessor::findUrls                                | 0 ms      | 58        \n\n\n==========================================================================================================================================\nTotal execution time 9.2 s using 3 workers and 2048M memory limit (max used 109 MB)\nTotal of 73 visited URLs with a total size of 20 MB and power of 7 reqs/s with download speed 2 MB/s\nResponse times: AVG 21 ms MIN 3 ms MAX 345 ms TOTAL 1.5 s\n==========================================================================================================================================\n\n╔═════════════════════════════════════════════════════════════════╗\n║                      WEBSITE QUALITY SCORE                      ║\n╠═════════════════════════════════════════════════════════════════╣\n║  Overall         █████████████████████░░░░   8.2/10  Good       ║\n╠═════════════════════════════════════════════════════════════════╣\n║  Performance     █████████████████████████  10.0/10  Excellent  ║\n║  SEO             ████████████████████████░   9.5/10  Excellent  ║\n║  Security        ███████████████████░░░░░░   7.5/10  Good       ║\n║  Accessibility   █████████████░░░░░░░░░░░░   5.0/10  Fair       ║\n║  Best Practices  ████████████████████████░   9.5/10  Excellent  ║\n╚═════════════════════════════════════════════════════════════════╝\n\nSummary\n-------\n\n⛔ Skipped URLs - 95 skipped URLs found.\n⛔ Security - 4 pages(s) with critical finding(s).\n⚠️ Latest SSL/TLS protocol TLSv1.3 is not supported. Ask your admin/provider to add TLSv1.3 support.\n⚠️ 50 page(s) do not support Brotli compression.\n⚠️ 49 page(s) with skipped heading levels.\n⚠️ 1 page(s) without image alt attributes.\n⚠️ 50 page(s) without aria labels.\n⚠️ 50 page(s) without role attributes.\n⏩ Loaded robots.txt for domain 'crawler.siteone.io': status code 200, size 152 B and took 24 ms.\n⏩ External URLs - 89 external URL(s) found.\n⏩ 404 NOTICE - 1 non-existent page(s) found.\n⏩ DNS IPv6: domain crawler.siteone.io does not support IPv6 (DNS server: 10.255.255.254).\n✅ Redirects - no redirects found.\n✅ SSL/TLS certificate is valid until Mar 13 15:43:29 2027 GMT. Issued by C = BE, O = GlobalSign nv-sa, CN = GlobalSign GCC R6 AlphaSSL CA 2025. Subject is CN = *.siteone.io.\n✅ SSL/TLS certificate issued by 'C = BE, O = GlobalSign nv-sa, CN = GlobalSign GCC R6 AlphaSSL CA 2025'.\n✅ Performance OK - all non-media URLs are faster than 3 seconds.\n✅ HTTP headers - found 18 unique headers.\n✅ All 50 unique title(s) are within the allowed 10% duplicity. Highest duplicity title has 2%.\n✅ All 49 description(s) are within the allowed 10% duplicity. Highest duplicity description has 4%.\n✅ 1 WebP image(s) found on the website.\n✅ 2 AVIF image(s) found on the website.\n✅ All pages have quoted attributes.\n✅ All pages have inline SVGs smaller than 5120 bytes.\n✅ All pages have inline SVGs with less than 5 duplicates.\n✅ All pages have valid or none inline SVGs.\n✅ All pages without multiple <h1> headings.\n✅ All pages have <h1> heading.\n✅ All pages have DOM depth less than 30.\n✅ All pages have clickable (interactive) phone numbers.\n✅ All pages have valid HTML.\n✅ All pages have form labels.\n✅ All pages have lang attribute.\n✅ DNS IPv4 OK: domain crawler.siteone.io resolved to 86.49.167.242 (DNS server: 10.255.255.254).\n📌 Text report saved to '/home/janreges/siteone-crawler/tmp/crawler.siteone.io.output.20260316-155513.txt' and took 0 ms.\n📌 JSON report saved to '/home/janreges/siteone-crawler/tmp/crawler.siteone.io.output.20260316-155513.json' and took 0 ms.\n📌 HTML report saved to '/home/janreges/siteone-crawler/tmp/crawler.siteone.io.report.20260316-155513.html' and took 1 ms.\n\n"
  },
  {
    "path": "docs/TEXT-OUTPUT.md",
    "content": "# SiteOne Crawler: Text Output Documentation\n\n## Table of Contents\n\n*   [1. Introduction](#1-introduction)\n*   [2. General Format](#2-general-format)\n*   [3. Detailed Section Breakdown](#3-detailed-section-breakdown)\n    *   [3.1. Progress Report](#31-progress-report)\n    *   [3.2. Skipped URLs Summary](#32-skipped-urls-summary)\n    *   [3.3. Skipped URLs](#33-skipped-urls)\n    *   [3.4. External URLs](#34-external-urls)\n    *   [3.5. Redirected URLs](#35-redirected-urls)\n    *   [3.6. 404 URLs](#36-404-urls)\n    *   [3.7. SSL/TLS Info](#37-ssltls-info)\n    *   [3.8. Performance Metrics (Fastest/Slowest URLs)](#38-performance-metrics-fastestslowest-urls)\n    *   [3.9. SEO & Content Analysis](#39-seo--content-analysis)\n    *   [3.10. HTTP Headers](#310-http-headers)\n    *   [3.11. HTTP Caching](#311-http-caching)\n    *   [3.12. Non-Unique Titles and Descriptions](#312-non-unique-titles-and-descriptions)\n    *   [3.13. Best Practices](#313-best-practices)\n    *   [3.14. Accessibility](#314-accessibility)\n    *   [3.15. Source Domains](#315-source-domains)\n    *   [3.16. Content Types](#316-content-types)\n    *   [3.17. DNS Info](#317-dns-info)\n    *   [3.18. Security](#318-security)\n    *   [3.19. Analysis Stats](#319-analysis-stats)\n    *   [3.20. Content Processor Stats](#320-content-processor-stats)\n    *   [3.21. Execution Summary](#321-execution-summary)\n    *   [3.22. Website Quality Score](#322-website-quality-score)\n    *   [3.23. Summary](#323-summary)\n*   [4. Information Obtainable from Text Output](#4-information-obtainable-from-text-output)\n*   [5. Use Cases for Text Output](#5-use-cases-for-text-output)\n*   [6. Note on JSON Output](#6-note-on-json-output)\n\n\nThis document describes the format of the text (`.txt`) output generated by the SiteOne Crawler tool. This output provides a comprehensive summary of the crawl results in a human-readable format, suitable for quick analysis and review directly in a text editor or terminal.\n\n## 1. Introduction\n\nThe text output begins with an ASCII art logo, version information, and the author's contact details. This is followed by several sections detailing various aspects of the crawled website. The primary sections include:\n\n*   **Progress Report:** Real-time status of crawled URLs.\n*   **Skipped URLs Summary:** Aggregated counts of URLs skipped for various reasons.\n*   **Skipped URLs:** Detailed list of skipped URLs, reasons, and sources.\n*   **External URLs:** List of all external URLs found during the crawl, with page counts and source locations.\n*   **Redirected URLs:** List of URLs that resulted in redirects.\n*   **404 URLs:** List of URLs that returned a 404 Not Found status.\n*   **SSL/TLS Info:** Details about the website's SSL/TLS certificate.\n*   **Performance Metrics:** Top fastest and slowest URLs.\n*   **SEO & Content Analysis:** SEO metadata, OpenGraph metadata, heading structure.\n*   **HTTP Headers:** Analysis of HTTP headers found during the crawl.\n*   **HTTP Caching:** Detailed breakdown of caching strategies by content type and domain.\n*   **Non-Unique Titles and Descriptions:** Reports on duplicate page titles and meta descriptions.\n*   **Best Practices:** Results of various best practice checks.\n*   **Accessibility:** Results of accessibility checks.\n*   **Source Domains:** Summary of crawled domains with content type breakdowns.\n*   **Content Types:** Summary of crawled content types (general and MIME types).\n*   **DNS Info:** Information about DNS resolution.\n*   **Security:** Results of security header checks.\n*   **Analysis Stats:** Performance statistics for the crawler's internal analyzers.\n*   **Content Processor Stats:** Performance statistics for content processors (HTML, CSS, JS, etc.).\n*   **Execution Summary:** Total execution time, URL counts, response time statistics, and DNS info.\n*   **Website Quality Score:** Scored rating (0-10) across five quality categories.\n*   **Summary:** Categorized findings with severity indicators.\n\n## 2. General Format\n\nThe output uses simple text formatting:\n\n*   **Headers:** Section titles are followed by `---` underlines for visual separation. The final execution summary block uses `===` double-line borders.\n*   **Tables:** Data is presented in fixed-width tables with headers underlined by hyphens (`-`). Column alignment is maintained using spaces. Columns are separated by ` | ` (pipe with surrounding spaces). This documentation uses Markdown tables for illustrative examples.\n*   **Truncation:** Some tables containing potentially large amounts of data (like SEO metadata or heading structures) show only a limited number of rows (e.g., max 10) in the text output, with a note advising the use of the HTML report (`--output-html-report`) for the complete data. Long cell values may be truncated with an ellipsis character.\n\n## 3. Detailed Section Breakdown\n\n### 3.1. Progress Report\n\nThis section shows the progress of the crawl in real-time (or the final state if the crawl is complete). The columns displayed depend on the detected terminal width. When the terminal width is less than 140 characters, a compact mode is activated that omits the `%` and `Bar` columns.\n\nA message like this appears before the progress table when compact mode is active:\n\n```\nDetected terminal width 138 < 140 chars - compact mode activated.\n```\n\n**Compact mode (terminal width < 140 chars):**\n\n| Progress | URL                                                     | Status | Type | Time  | Size  | Cache  | Access. | Best pr. |\n| :------- | :------------------------------------------------------ | :----- | :--- | :---- | :---- | :----- | :------ | :------- |\n| 1/40     | /                                                       | 200    | HTML | 4 ms  | 50 kB | 60 min | 3/1     | 7        |\n| 2/66     | /introduction/key-features/                             | 200    | HTML | 4 ms  | 54 kB | 60 min | 2/2     | 1/6      |\n| ...      | ...                                                     | ...    | ...  | ...   | ...   | ...    | ...     | ...      |\n\n**Wide mode (terminal width >= 140 chars) includes two additional columns:**\n\n| Progress | %   | Bar | URL | Status | Type | Time | Size | Cache | Access. | Best pr. |\n| :------- | :-- | :-- | :-- | :----- | :--- | :--- | :--- | :---- | :------ | :------- |\n| 1/40     | 2%  | >   | /   | 200    | HTML | 4 ms | 50 kB| 60 min| 3/1     | 7        |\n\n*   **Progress report columns:**\n    *   `Progress` (`X/Y`): `X` = URL sequence number, `Y` = Total URLs found so far.\n    *   `%`: *(wide mode only)* Percentage of URLs processed relative to the total found.\n    *   `Bar`: *(wide mode only)* Visual progress indicator.\n    *   `URL`: The path or full URL being processed.\n    *   `Status`: HTTP status code returned (e.g., 200, 404, 301, 429).\n    *   `Type`: Detected content type (e.g., HTML, JS, CSS, Image, Document).\n    *   `Time`: Time taken to download the URL.\n    *   `Size`: Size of the downloaded content.\n    *   `Cache`: Detected cache lifetime (e.g., `60 min`, `12 mon`, `etag`, `none`).\n    *   `Access.`: Accessibility issues summary as compact numeric counts. Values like `3/1` mean \"3 OK / 1 warning\", `2/2` means \"2 OK / 2 warnings\", and a single number like `7` means that count of findings in the most relevant severity. Empty for non-HTML resources.\n    *   `Best pr.`: Best practices issues summary in the same compact numeric format. Values like `1/6` mean \"1 OK / 6 warnings\". Empty for non-HTML resources.\n\n### 3.2. Skipped URLs Summary\n\nProvides a high-level overview of why URLs were skipped during the crawl, grouped by reason and domain.\n\n**Skipped URLs Summary**\n\n| Reason           | Domain             | Unique URLs |\n| :--------------- | :----------------- | :---------- |\n| Not allowed host | github.com         | 29          |\n| Not allowed host | twitter.com        | 13          |\n| Robots.txt       | crawler.siteone.io | 6           |\n| ...              | ...                | ...         |\n\n*   **Reason:** Why the URL was skipped (e.g., `Not allowed host`, `Robots.txt`, `Max depth reached`).\n*   **Domain:** The domain of the skipped URLs.\n*   **Unique URLs:** The count of unique URLs skipped for that reason/domain combination.\n\n### 3.3. Skipped URLs\n\nLists individual skipped URLs with more context.\n\n**Skipped URLs**\n\n| Reason           | Skipped URL                      | Source     | Found at URL                        |\n| :--------------- | :------------------------------- | :--------- | :---------------------------------- |\n| Robots.txt       | /examples-exports/docs.astro.build/ | `<a href>` | /                                   |\n| Not allowed host | https://adamwathan.me/           | `<a href>` | /introduction/thanks/               |\n| ...              | ...                              | ...        | ...                                 |\n\n*   **Reason:** Why the URL was skipped.\n*   **Skipped URL:** The specific URL that was not crawled.\n*   **Source:** How the URL was discovered (e.g., `<a href>`, `<img src>`, `<script src>`, `CSS url()`).\n*   **Found at URL:** The URL where the skipped URL was found.\n\n### 3.4. External URLs\n\nLists all external URLs found during the crawl, along with the number of pages each was found on and up to 5 example source pages.\n\n**External URLs**\n\n| External URL                                            | Pages | Found on URL (max 5)                      |\n| :------------------------------------------------------ | :---- | :---------------------------------------- |\n| https://adamwathan.me/                                  | 1     | /introduction/thanks/                     |\n| https://discord.gg/Uh66HaZJ                             | 1     | /                                         |\n| https://github.com/janreges/siteone-crawler             | 1     | /                                         |\n| ...                                                     | ...   | ...                                       |\n\n*   **External URL:** The full external URL that was found but not crawled.\n*   **Pages:** Number of distinct pages the external URL was found on.\n*   **Found on URL (max 5):** Up to 5 source pages where the external URL was discovered.\n\n### 3.5. Redirected URLs\n\nLists URLs that resulted in an HTTP redirect. If no redirects were found, the section displays \"No redirects found.\"\n\n**Redirected URLs**\n\n| Status | URL from                        | URL to                          | Found at URL              |\n| :----- | :------------------------------ | :------------------------------ | :------------------------ |\n| 301    | /old-page/                      | /new-page/                      | /some-page/               |\n| ...    | ...                             | ...                             | ...                       |\n\nWhen there are no redirects, the output is simply:\n\n```\nRedirected URLs\n---------------\n\nNo redirects found.\n```\n\n### 3.6. 404 URLs\n\nLists URLs that returned a 404 Not Found status code.\n\n**404 URLs**\n\n| Status | URL 404                                          | Found at URL                       |\n| :----- | :----------------------------------------------- | :--------------------------------- |\n| 404    | /docs/features/content-type-analysis             | /features/performance-metrics/     |\n| ...    | ...                                              | ...                                |\n\n*   **Status:** The HTTP status code (typically 404).\n*   **URL 404:** The URL that resulted in the 404 error.\n*   **Found at URL:** The URL containing the link to the broken page.\n\n### 3.7. SSL/TLS Info\n\nProvides details about the SSL/TLS certificate of the primary host. Includes the raw certificate output and raw protocol test output.\n\n**SSL/TLS info**\n\n| Info                   | Text                                                                                  |\n| :--------------------- | :------------------------------------------------------------------------------------ |\n| Issuer                 | C = BE, O = GlobalSign nv-sa, CN = GlobalSign GCC R6 AlphaSSL CA 2025                |\n| Subject                | CN = *.siteone.io                                                                     |\n| Valid from             | Feb  9 15:43:30 2026 GMT (VALID already 35 day(s))                                    |\n| Valid to               | Mar 13 15:43:29 2027 GMT (VALID still for 362 day(s))                                 |\n| Supported protocols    | TLSv1.2                                                                               |\n| RAW certificate output | `Certificate: Data: Version: 3 (0x2) ...` (truncated)                                 |\n| RAW protocols output   | `=== ssl2 === s_client: Unknown option: -ssl2 ...` (truncated)                         |\n\n*   **Info:** The type of information (Issuer, Subject, Validity dates, Supported protocols).\n*   **Text:** The corresponding value for the information type. The RAW rows contain the full openssl output, which may span multiple lines and be truncated in the text output.\n\n### 3.8. Performance Metrics (Fastest/Slowest URLs)\n\nTwo tables listing the top N fastest and slowest URLs encountered during the crawl. By default, up to 20 fastest and 8 slowest URLs are shown (configurable via `--fastest-top-limit` and `--slowest-top-limit`).\n\n**TOP fastest URLs**\n\n| Time  | Status | Fast URL                                     |\n| :---- | :----- | :------------------------------------------- |\n| 4 ms  | 200    | /features/performance-analysis/              |\n| 4 ms  | 200    | /features/availability/                      |\n| ...   | ...    | ...                                          |\n\n**TOP slowest URLs**\n\n| Time   | Status | Slow URL                                     |\n| :----- | :----- | :------------------------------------------- |\n| 142 ms | 200    | /features/deep-website-crawling/             |\n| 132 ms | 200    | /introduction/ideas-and-roadmap/             |\n| ...    | ...    | ...                                          |\n\n*   **Time:** Time taken to download the URL.\n*   **Status:** HTTP status code.\n*   **Fast/Slow URL:** The URL itself.\n\n### 3.9. SEO & Content Analysis\n\nIncludes several sub-sections: SEO metadata, OpenGraph metadata, and heading structure. These tables are often truncated in the text output to show max 10 rows, with a note advising the use of `--output-html-report=tmp/myreport.html` for the complete data.\n\n**SEO metadata**\n\n| URL                                            | Indexing | Title        | H1           | Description  | Keywords |\n| :--------------------------------------------- | :------- | :----------- | :----------- | :----------- | :------- |\n| /                                              | Allowed  | SiteOne...ve | SiteOne...ler| A very u...  |          |\n| /advanced-topics/caching/                      | Allowed  | Caching...ler| Caching      | SiteOne...ls.|          |\n| ...                                            | ...      | ...          | ...          | ...          | ...      |\n\n*   **URL:** The crawled page URL.\n*   **Indexing:** Whether the page allows indexing (`Allowed` or specific directive).\n*   **Title:** The page's `<title>` tag content (truncated in text output).\n*   **H1:** The page's first `<h1>` heading content (truncated in text output).\n*   **Description:** The page's meta description (truncated in text output).\n*   **Keywords:** The page's meta keywords (usually empty on modern sites).\n\n**OpenGraph metadata**\n\n| URL                                | OG Title   | OG Description | OG Image          | Twitter Title | Twitter Description | Twitter Image |\n| :--------------------------------- | :--------- | :------------- | :---------------- | :------------ | :------------------ | :------------ |\n| /                                  | SiteOne... | A very...      | /siteone-cra...   |               |                     |               |\n| /advanced-topics/caching/          | Caching    | SiteOne...     | /siteone-cra...   |               |                     |               |\n| ...                                | ...        | ...            | ...               | ...           | ...                 | ...           |\n\n**Heading structure**\n\n| Heading structure                                                    | Count | Errors | URL                           |\n| :------------------------------------------------------------------- | :---- | :----- | :---------------------------- |\n| `<h2>` On this page ... `<h1>` FAQ ...                               | 14    | 13     | /introduction/faq/            |\n| `<h2>` On this page ... `<h1>` Examples ...                          | 13    | 12     | /configuration/examples/      |\n| ...                                                                  | ...   | ...    | ...                           |\n\n*   **Heading structure:** A compressed representation of the heading hierarchy, showing heading tags with their text and IDs.\n*   **Count:** Total number of headings on the page.\n*   **Errors:** Number of heading structure errors (e.g., skipped levels, out-of-order headings).\n*   **URL:** The page where the heading structure was found.\n\n### 3.10. HTTP Headers\n\nAnalyzes HTTP response headers across all crawled URLs. Presented in two tables.\n\n*   **HTTP headers:** Lists unique headers, occurrence count, unique value count, preview of values, and min/max values where applicable (e.g., for Content-Length or dates). Headers with many unique values show `[ignored generic values]` and display `-` for unique count.\n*   **HTTP header values:** Lists specific values for each header with their occurrence counts.\n\n**HTTP headers (Summary)**\n\n| Header                    | Occurs | Unique | Values preview                                    | Min value  | Max value  |\n| :------------------------ | :----- | :----- | :------------------------------------------------ | :--------- | :--------- |\n| Accept-Ranges             | 10     | 1      | bytes                                             |            |            |\n| Cache-Control             | 65     | 2      | max-age=3600 (51) / max-age=31536000 (14)         |            |            |\n| Content-Length            | 18     | -      | [ignored generic values]                          | 152 B      | 8 MB       |\n| Content-Type              | 73     | 8      | text/html (58) / application/jav...text/plain (1) |            |            |\n| Date                      | 73     | -      | [ignored generic values]                          | 2026-03-16 | 2026-03-16 |\n| ...                       | ...    | ...    | ...                                               | ...        | ...        |\n\n**HTTP header values (Detailed)**\n\n| Header                    | Occurs | Value                                                           |\n| :------------------------ | :----- | :-------------------------------------------------------------- |\n| Accept-Ranges             | 10     | bytes                                                           |\n| Cache-Control             | 51     | max-age=3600                                                    |\n| Cache-Control             | 14     | max-age=31536000                                                |\n| Content-Type              | 58     | text/html                                                       |\n| Content-Type              | 4      | application/javascript                                          |\n| ...                       | ...    | ...                                                             |\n\n### 3.11. HTTP Caching\n\nProvides detailed analysis of HTTP caching headers in three tables.\n\n*   **HTTP Caching by content type:** Summarizes caching strategies (e.g., `Cache-Control + ETag + Last-Modified`, `ETag`) used for different content types (HTML, CSS, JS, Image, etc.), including counts and average/min/max lifetimes.\n*   **HTTP Caching by domain:** Similar summary, but grouped by domain.\n*   **HTTP Caching by domain and content type:** The most granular view, showing caching strategies for each content type within each domain.\n\n**HTTP Caching by content type (only from crawlable domains)**\n\n| Content type | Cache type                           | URLs | AVG lifetime | MIN lifetime | MAX lifetime |\n| :----------- | :----------------------------------- | :--- | :----------- | :----------- | :----------- |\n| HTML         | Cache-Control + ETag + Last-Modified | 50   | 60 min       | 60 min       | 60 min       |\n| HTML         | ETag                                 | 8    | -            | -            | -            |\n| Image        | Cache-Control + ETag + Last-Modified | 8    | 12 mon       | 12 mon       | 12 mon       |\n| JS           | Cache-Control + ETag + Last-Modified | 4    | 12 mon       | 12 mon       | 12 mon       |\n| CSS          | Cache-Control + ETag + Last-Modified | 2    | 12 mon       | 12 mon       | 12 mon       |\n| Document     | Cache-Control + ETag + Last-Modified | 1    | 60 min       | 60 min       | 60 min       |\n\n**HTTP Caching by domain**\n\n| Domain             | Cache type                           | URLs | AVG lifetime | MIN lifetime | MAX lifetime |\n| :----------------- | :----------------------------------- | :--- | :----------- | :----------- | :----------- |\n| crawler.siteone.io | Cache-Control + ETag + Last-Modified | 65   | 78 d         | 60 min       | 12 mon       |\n| crawler.siteone.io | ETag                                 | 8    | -            | -            | -            |\n\n**HTTP Caching by domain and content type**\n\n| Domain             | Content type | Cache type                           | URLs | AVG lifetime | MIN lifetime | MAX lifetime |\n| :----------------- | :----------- | :----------------------------------- | :--- | :----------- | :----------- | :----------- |\n| crawler.siteone.io | HTML         | Cache-Control + ETag + Last-Modified | 50   | 60 min       | 60 min       | 60 min       |\n| crawler.siteone.io | Image        | Cache-Control + ETag + Last-Modified | 8    | 12 mon       | 12 mon       | 12 mon       |\n| ...                | ...          | ...                                  | ...  | ...          | ...          | ...          |\n\n### 3.12. Non-Unique Titles and Descriptions\n\nTwo sections that report on duplicate page titles and meta descriptions across the crawled site.\n\n**TOP non-unique titles**\n\nDisplays titles that appear on more than one page. If all titles are unique, displays \"Nothing to report.\"\n\n**TOP non-unique descriptions**\n\n| Count | Description |\n| :---- | :---------- |\n| 2     |             |\n\n*   **Count:** Number of pages sharing the same title or description.\n*   **Description/Title:** The duplicated value. An empty value indicates pages with missing meta descriptions.\n\n### 3.13. Best Practices\n\nSummarizes results from various best practice checks.\n\n**Best practices**\n\n| Analysis name                            | OK  | Notice | Warning | Critical |\n| :--------------------------------------- | :-- | :----- | :------ | :------- |\n| Invalid inline SVGs                      | 34  | 0      | 0       | 0        |\n| DOM depth (> 30)                         | 58  | 0      | 0       | 0        |\n| Large inline SVGs (> 5120 B)             | 34  | 0      | 0       | 0        |\n| Heading structure                        | 55  | 0      | 53      | 0        |\n| Duplicate inline SVGs (> 5 and > 1024 B) | 34  | 0      | 0       | 0        |\n| Title uniqueness (> 10%)                 | 50  | 0      | 0       | 0        |\n| Description uniqueness (> 10%)           | 49  | 0      | 0       | 0        |\n| Brotli support                           | 0   | 0      | 50      | 0        |\n| WebP support                             | 1   | 0      | 0       | 0        |\n| AVIF support                             | 2   | 0      | 0       | 0        |\n\n*   **Analysis name:** The specific check performed.\n*   **OK / Notice / Warning / Critical:** Counts of URLs falling into each severity category for that check.\n\n### 3.14. Accessibility\n\nSummarizes results from accessibility checks.\n\n**Accessibility**\n\n| Analysis name                | OK  | Notice | Warning | Critical |\n| :--------------------------- | :-- | :----- | :------ | :------- |\n| Missing html lang attribute  | 1   | 0      | 0       | 0        |\n| Missing aria labels          | 2   | 0      | 119     | 0        |\n| Missing roles                | 0   | 0      | 35      | 0        |\n| Missing image alt attributes | 6   | 0      | 1       | 0        |\n\n*   **Analysis name:** The specific accessibility check.\n*   **OK / Notice / Warning / Critical:** Counts for each severity level.\n\n### 3.15. Source Domains\n\nLists all domains from which resources were successfully crawled, with counts and size/time summaries per content type. The content type columns are dynamic and depend on the types of resources actually found during the crawl (e.g., HTML, Image, JS, CSS, Document). Only content types present in the crawl results are shown.\n\n**Source domains**\n\n| Domain             | Totals       | HTML         | Image        | JS         | CSS         | Document  |\n| :----------------- | :----------- | :----------- | :----------- | :--------- | :---------- | :-------- |\n| crawler.siteone.io | 73/20MB/1.5s | 58/3MB/744ms | 8/18MB/773ms | 4/7kB/11ms | 2/64kB/12ms | 1/152B/2ms|\n\nEach cell in the content type columns contains three values separated by `/`: count of URLs, total size, and total download time.\n\n### 3.16. Content Types\n\nSummarizes crawled resources by content type in two tables.\n\n*   **Content types (General):** Groups by broad categories (HTML, Image, JS, CSS, Document, etc.).\n*   **Content types (MIME types):** Groups by specific MIME types (e.g., `text/html`, `image/gif`, `application/javascript`).\n\nNote: The status columns are dynamic and reflect the actual HTTP status code ranges encountered during the crawl. For example, if HTTP 429 responses are encountered, a `Status 42x` column will appear alongside the standard `Status 20x` and `Status 40x` columns.\n\n**Content types (General)**\n\n| Content type | URLs | Total size | Total time | Avg time | Status 20x | Status 40x | Status 42x |\n| :----------- | :--- | :--------- | :--------- | :------- | :--------- | :--------- | :--------- |\n| HTML         | 58   | 3 MB       | 744 ms     | 12 ms    | 50         | 1          | 7          |\n| Image        | 8    | 18 MB      | 773 ms     | 96 ms    | 8          | 0          | 0          |\n| JS           | 4    | 7 kB       | 11 ms      | 2 ms     | 4          | 0          | 0          |\n| CSS          | 2    | 64 kB      | 12 ms      | 6 ms     | 2          | 0          | 0          |\n| Document     | 1    | 152 B      | 2 ms       | 2 ms     | 1          | 0          | 0          |\n\n**Content types (MIME types)**\n\n| Content type           | URLs | Total size | Total time | Avg time | Status 20x | Status 40x | Status 42x |\n| :--------------------- | :--- | :--------- | :--------- | :------- | :--------- | :--------- | :--------- |\n| text/html              | 58   | 3 MB       | 744 ms     | 12 ms    | 50         | 1          | 7          |\n| application/javascript | 4    | 7 kB       | 11 ms      | 2 ms     | 4          | 0          | 0          |\n| image/gif              | 3    | 16 MB      | 671 ms     | 223 ms   | 3          | 0          | 0          |\n| text/css               | 2    | 64 kB      | 12 ms      | 6 ms     | 2          | 0          | 0          |\n| image/svg+xml          | 2    | 1 kB       | 5 ms       | 2 ms     | 2          | 0          | 0          |\n| image/avif             | 2    | 2 MB       | 82 ms      | 41 ms    | 2          | 0          | 0          |\n| image/webp             | 1    | 31 kB      | 14 ms      | 14 ms    | 1          | 0          | 0          |\n| text/plain             | 1    | 152 B      | 2 ms       | 2 ms     | 1          | 0          | 0          |\n\n### 3.17. DNS Info\n\nShows the DNS resolution tree for the crawled domain(s) and the DNS server used. This section is not a table but a tree-formatted block.\n\n```\nDNS info\n--------\n\nDNS resolving tree\n------------------------------------------------------------------------\ncrawler.siteone.io\n  IPv4: 86.49.167.242\n\nDNS server: 10.255.255.254\n```\n\n### 3.18. Security\n\nReports on the presence and configuration of important security-related HTTP headers. Each header is checked and results are categorized into OK, Notice, Warning, or Critical. A recommendation is provided when issues are found.\n\n**Security**\n\n| Header                    | OK | Notice | Warning | Critical | Recommendation                                                                      |\n| :------------------------ | :- | :----- | :------ | :------- | :---------------------------------------------------------------------------------- |\n| Content-Security-Policy   | 50 | 0      | 0       | 4        | Content-Security-Policy header is not set. It...prevents XSS attacks.               |\n| X-Frame-Options           | 0  | 54     | 0       | 0        | X-Frame-Options header is set to SAMEORIGIN wh...resource in a frame.               |\n| X-XSS-Protection          | 0  | 54     | 0       | 0        | X-XSS-Protection header is set but deprecated....urity-Policy instead.              |\n| Strict-Transport-Security | 54 | 0      | 0       | 0        |                                                                                     |\n| X-Content-Type-Options    | 54 | 0      | 0       | 0        |                                                                                     |\n| Referrer-Policy           | 54 | 0      | 0       | 0        |                                                                                     |\n| Feature-Policy            | 54 | 0      | 0       | 0        |                                                                                     |\n| Permissions-Policy        | 54 | 0      | 0       | 0        |                                                                                     |\n| Server                    | 54 | 0      | 0       | 0        | Server header is not set or empty. This is recommended.                             |\n\n*   **Header:** The security header being checked.\n*   **OK / Notice / Warning / Critical:** Counts based on the header's presence and configuration. Note that X-XSS-Protection produces a \"Notice\" (deprecated) rather than a \"Critical\" when it is set, because the header itself is deprecated in favor of Content-Security-Policy.\n*   **Recommendation:** Suggestion for improvement if issues are found. Empty when no action is needed.\n\n### 3.19. Analysis Stats\n\nProvides performance metrics for the crawler's internal analysis modules. Useful for debugging the crawler itself. The method names follow Rust naming conventions (e.g., `BestPracticeAnalyzer::checkHeadingStructure`, `AccessibilityAnalyzer::checkMissingAriaLabels`).\n\n**Analysis stats**\n\n| Class::method                                        | Exec time | Exec count |\n| :--------------------------------------------------- | :-------- | :--------- |\n| SslTlsAnalyzer::getTLSandSSLCertificateInfo          | 259 ms    | 1          |\n| BestPracticeAnalyzer::checkHeadingStructure           | 47 ms     | 58         |\n| AccessibilityAnalyzer::checkMissingAriaLabels         | 45 ms     | 50         |\n| AccessibilityAnalyzer::checkMissingLabels             | 42 ms     | 50         |\n| AccessibilityAnalyzer::checkMissingRoles              | 39 ms     | 50         |\n| BestPracticeAnalyzer::checkMaxDOMDepth                | 36 ms     | 58         |\n| AccessibilityAnalyzer::checkMissingLang               | 36 ms     | 50         |\n| BestPracticeAnalyzer::checkNonClickablePhoneNumbers   | 24 ms     | 58         |\n| BestPracticeAnalyzer::checkInlineSvg                  | 11 ms     | 58         |\n| BestPracticeAnalyzer::checkMissingQuotesOnAttributes  | 3 ms      | 58         |\n| SeoAndOpenGraphAnalyzer::analyzeHeadings              | 2 ms      | 1          |\n| SecurityAnalyzer::checkHtmlSecurity                   | 1 ms      | 54         |\n| AccessibilityAnalyzer::checkImageAltAttributes        | 1 ms      | 50         |\n| SecurityAnalyzer::checkHeaders                        | 0 ms      | 54         |\n| SeoAndOpenGraphAnalyzer::analyzeSeo                   | 0 ms      | 1          |\n| SeoAndOpenGraphAnalyzer::analyzeOpenGraph             | 0 ms      | 1          |\n| BestPracticeAnalyzer::checkMetaDescriptionUniqueness  | 0 ms      | 1          |\n| BestPracticeAnalyzer::checkTitleUniqueness            | 0 ms      | 1          |\n| BestPracticeAnalyzer::checkBrotliSupport              | 0 ms      | 1          |\n| BestPracticeAnalyzer::checkWebpSupport                | 0 ms      | 1          |\n| BestPracticeAnalyzer::checkAvifSupport                | 0 ms      | 1          |\n\n*   **Class::method:** The analyzer class and specific check method.\n*   **Exec time:** Total execution time for all invocations of this method.\n*   **Exec count:** Number of times the method was invoked (typically once per analyzed URL or once for aggregate checks).\n\n### 3.20. Content Processor Stats\n\nProvides performance metrics for content processors that run during the crawl. These processors handle URL extraction and content transformation for different resource types.\n\n**Content processor stats**\n\n| Class::method                                             | Exec time | Exec count |\n| :-------------------------------------------------------- | :-------- | :--------- |\n| HtmlProcessor::findUrls                                   | 47 ms     | 58         |\n| NextJsProcessor::applyContentChangesBeforeUrlParsing      | 11 ms     | 64         |\n| JavaScriptProcessor::findUrls                             | 8 ms      | 62         |\n| AstroProcessor::findUrls                                  | 1 ms      | 62         |\n| CssProcessor::findUrls                                    | 1 ms      | 60         |\n| AstroProcessor::applyContentChangesBeforeUrlParsing       | 0 ms      | 62         |\n| NextJsProcessor::findUrls                                 | 0 ms      | 64         |\n| JavaScriptProcessor::applyContentChangesBeforeUrlParsing  | 0 ms      | 62         |\n| SvelteProcessor::applyContentChangesBeforeUrlParsing      | 0 ms      | 58         |\n| CssProcessor::applyContentChangesBeforeUrlParsing         | 0 ms      | 60         |\n| HtmlProcessor::applyContentChangesBeforeUrlParsing        | 0 ms      | 58         |\n| SvelteProcessor::findUrls                                 | 0 ms      | 58         |\n\n*   **Class::method:** The content processor class and specific method (`findUrls` for URL extraction, `applyContentChangesBeforeUrlParsing` for pre-processing transformations).\n*   **Exec time:** Total execution time for all invocations.\n*   **Exec count:** Number of times the method was invoked.\n\n### 3.21. Execution Summary\n\nA bordered summary block showing overall crawl statistics, printed between `===` separator lines.\n\n```\n==========================================================================\nTotal execution time 9.2 s using 3 workers and 2048M memory limit (max used 109 MB)\nTotal of 73 visited URLs with a total size of 20 MB and power of 7 reqs/s with download speed 2 MB/s\nResponse times: AVG 21 ms MIN 3 ms MAX 345 ms TOTAL 1.5 s\n==========================================================================\n```\n\n*   **Total execution time:** Wall-clock time for the entire crawl, including the number of concurrent workers and memory usage.\n*   **Total of N visited URLs:** Count of all successfully visited URLs, total downloaded size, request throughput, and download speed.\n*   **Response times:** Average, minimum, maximum, and total response times across all URLs.\n\n### 3.22. Website Quality Score\n\nA visual box-drawing quality score display that rates the website across five weighted categories on a 0-10 scale. Each category shows a progress bar, numeric score, and a label (Excellent, Good, Fair, Poor, etc.).\n\n```\n+=====================================================================+\n|                      WEBSITE QUALITY SCORE                          |\n+=================================================================+\n|  Overall         #####################----   8.2/10  Good           |\n+=================================================================+\n|  Performance     #########################  10.0/10  Excellent      |\n|  SEO             ########################-   9.5/10  Excellent      |\n|  Security        ###################------   7.5/10  Good           |\n|  Accessibility   #############------------   5.0/10  Fair           |\n|  Best Practices  ########################-   9.5/10  Excellent      |\n+=====================================================================+\n```\n\n*(The actual output uses Unicode box-drawing characters and block characters for the progress bars.)*\n\nThe five categories and their weights are:\n*   **Performance** (20%): Based on response times, error rates.\n*   **SEO** (20%): Based on titles, descriptions, headings, indexing.\n*   **Security** (25%): Based on security header presence and configuration.\n*   **Accessibility** (20%): Based on lang attributes, alt text, ARIA labels, roles.\n*   **Best Practices** (15%): Based on inline SVGs, heading structure, DOM depth, compression support.\n\n### 3.23. Summary\n\nA categorized list of findings using severity-level prefixes. Each finding is on its own line with an emoji indicator:\n\n*   **CRITICAL** (red circle): Serious issues requiring immediate attention (e.g., pages with critical security findings, skipped URLs).\n*   **WARNING** (warning sign): Issues that should be addressed (e.g., missing Brotli support, missing ARIA labels, skipped heading levels).\n*   **INFO** (fast-forward): Informational items (e.g., robots.txt status, external URL count, DNS IPv6 status, 404 notices).\n*   **OK** (green check): Positive findings confirming correct configuration (e.g., valid SSL certificate, no redirects, all titles unique).\n*   **NOTICE** (pin): Export notifications (e.g., text/JSON/HTML report save paths and timing).\n\nExample summary output:\n\n```\nSummary\n-------\n\n[CRITICAL] Skipped URLs - 95 skipped URLs found.\n[CRITICAL] Security - 4 pages(s) with critical finding(s).\n[WARNING] Latest SSL/TLS protocol TLSv1.3 is not supported. Ask your admin/provider to add TLSv1.3 support.\n[WARNING] 50 page(s) do not support Brotli compression.\n[WARNING] 49 page(s) with skipped heading levels.\n[WARNING] 1 page(s) without image alt attributes.\n[WARNING] 50 page(s) without aria labels.\n[WARNING] 50 page(s) without role attributes.\n[INFO] Loaded robots.txt for domain 'crawler.siteone.io': status code 200, size 152 B and took 24 ms.\n[INFO] External URLs - 89 external URL(s) found.\n[INFO] 404 NOTICE - 1 non-existent page(s) found.\n[INFO] DNS IPv6: domain crawler.siteone.io does not support IPv6 (DNS server: 10.255.255.254).\n[OK] Redirects - no redirects found.\n[OK] SSL/TLS certificate is valid until Mar 13 15:43:29 2027 GMT. Issued by ...\n[OK] Performance OK - all non-media URLs are faster than 3 seconds.\n[OK] HTTP headers - found 18 unique headers.\n[OK] All 50 unique title(s) are within the allowed 10% duplicity.\n...\n[NOTICE] Text report saved to '.../crawler.siteone.io.output.20260316-155513.txt' and took 0 ms.\n[NOTICE] JSON report saved to '.../crawler.siteone.io.output.20260316-155513.json' and took 0 ms.\n[NOTICE] HTML report saved to '.../crawler.siteone.io.report.20260316-155513.html' and took 1 ms.\n```\n\n*(The actual output uses emoji characters for the severity prefixes rather than bracketed labels.)*\n\n## 4. Information Obtainable from Text Output\n\nThe text output provides a wealth of information about a website, including:\n\n*   **Crawl Overview:** Number of pages found, processed, and skipped.\n*   **Website Structure:** Implicitly through the list of crawled URLs and their relationships (via \"Found at URL\").\n*   **Link Health:** Identification of broken links (404s) and redirects.\n*   **External Dependencies:** Full list of external URLs linked from the site, with page counts and source pages.\n*   **Performance Bottlenecks:** Identification of the slowest loading pages and resources.\n*   **Content Inventory:** Summary of different content types (HTML, images, scripts, stylesheets) and their sizes/load times.\n*   **Basic SEO Health:** Status of titles, descriptions, heading structures, indexing directives, and duplicate content.\n*   **OpenGraph Implementation:** Presence and content of OG tags for social sharing.\n*   **Server Configuration:** Insights into HTTP headers used, including caching and security headers.\n*   **Caching Strategy:** Effectiveness of caching policies across different content types and domains.\n*   **Security Posture:** Checks for essential security headers (HSTS, CSP, X-Frame-Options, etc.).\n*   **Accessibility Issues:** High-level view of common accessibility problems (missing alt text, lang attributes, ARIA labels, roles).\n*   **Best Practice Adherence:** Checks against common web development best practices.\n*   **SSL/TLS Certificate Status:** Validity and issuer details of the site's certificate.\n*   **Website Quality Score:** Numeric scores (0-10) across five quality categories with an overall rating.\n*   **Content Processor Performance:** Internal timing data for URL extraction and content processing.\n\n## 5. Use Cases for Text Output\n\nThe text output is valuable for various tasks:\n\n1.  **Quick Website Health Check:** Get a fast overview of major issues like 404s, slow pages, or critical security/accessibility warnings via the Summary section.\n2.  **Identifying Broken Links:** Easily spot and locate 404 errors using the dedicated section.\n3.  **Performance Audit:** Identify the slowest URLs to prioritize optimization efforts.\n4.  **Basic SEO Audit:** Check for duplicate titles/descriptions and analyze heading structures.\n5.  **Security Header Review:** Quickly verify the presence of important security headers and see deprecation notices.\n6.  **Caching Policy Verification:** Understand how caching is implemented across the site.\n7.  **Pre/Post Deployment Checks:** Compare outputs before and after changes to catch regressions.\n8.  **Generating Simple Reports:** Copy-paste relevant sections into emails or documents for concise reporting.\n9.  **Troubleshooting Crawl Issues:** Use skipped URLs and analysis stats to understand crawler behavior.\n10. **Quality Scoring:** Use the Website Quality Score to track improvements over time across performance, SEO, security, accessibility, and best practices.\n11. **Command-Line Integration:** Process the text output with standard command-line tools (grep, awk, sed) for specific data extraction or automated checks in simple scripts.\n\n## 6. Note on JSON Output\n\nWhile this document focuses on the text output, SiteOne Crawler also offers a JSON output format (`--output-json-file`). The JSON output contains much of the same information but in a structured format that is ideal for programmatic consumption, detailed data analysis, or integration with other tools and dashboards. For automated processing or complex data manipulation, the JSON output is generally preferred.\n\nSee the [JSON Output Documentation](JSON-OUTPUT.md) for more details on the JSON format.\n"
  },
  {
    "path": "rustfmt.toml",
    "content": "max_width = 120\r\nuse_field_init_shorthand = true\r\n"
  },
  {
    "path": "src/analysis/accessibility_analyzer.rs",
    "content": "// SiteOne Crawler - AccessibilityAnalyzer\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\nuse std::time::Instant;\n\nuse regex::Regex;\nuse scraper::{Html, Selector};\n\nuse crate::analysis::analyzer::Analyzer;\nuse crate::analysis::base_analyzer::BaseAnalyzer;\nuse crate::analysis::result::analyzer_stats::AnalyzerStats;\nuse crate::analysis::result::url_analysis_result::UrlAnalysisResult;\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::extra_column::ExtraColumn;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::result::visited_url::VisitedUrl;\nuse crate::types::ContentTypeId;\nuse crate::utils;\n\nconst ANALYSIS_MISSING_IMAGE_ALT_ATTRIBUTES: &str = \"Missing image alt attributes\";\nconst ANALYSIS_MISSING_FORM_LABELS: &str = \"Missing form labels\";\nconst ANALYSIS_MISSING_ARIA_LABELS: &str = \"Missing aria labels\";\nconst ANALYSIS_MISSING_ROLES: &str = \"Missing roles\";\nconst ANALYSIS_MISSING_LANG_ATTRIBUTE: &str = \"Missing html lang attribute\";\n\nconst SUPER_TABLE_ACCESSIBILITY: &str = \"accessibility\";\n\npub struct AccessibilityAnalyzer {\n    base: BaseAnalyzer,\n    stats: AnalyzerStats,\n\n    pages_with_invalid_html: usize,\n    pages_without_image_alt_attributes: usize,\n    pages_without_form_labels: usize,\n    pages_without_aria_labels: usize,\n    pages_without_roles: usize,\n    pages_without_lang: usize,\n}\n\nimpl Default for AccessibilityAnalyzer {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl AccessibilityAnalyzer {\n    pub fn new() -> Self {\n        Self {\n            base: BaseAnalyzer::new(),\n            stats: AnalyzerStats::new(),\n            pages_with_invalid_html: 0,\n            pages_without_image_alt_attributes: 0,\n            pages_without_form_labels: 0,\n            pages_without_aria_labels: 0,\n            pages_without_roles: 0,\n            pages_without_lang: 0,\n        }\n    }\n\n    fn check_image_alt_attributes(&mut self, html: &str, result: &mut UrlAnalysisResult) {\n        use once_cell::sync::Lazy;\n        static RE_IMG: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)<img[^>]+>\").unwrap());\n        let img_re = &*RE_IMG;\n\n        let mut bad_images: Vec<String> = Vec::new();\n        let mut found_count = 0usize;\n\n        for mat in img_re.find_iter(html) {\n            found_count += 1;\n            let img = mat.as_str();\n            let img_lower = img.to_lowercase();\n\n            if !img_lower.contains(\" alt=\") || img_lower.contains(\" alt=\\\"\\\"\") || img_lower.contains(\" alt=''\") {\n                bad_images.push(img.to_string());\n                self.stats.add_warning(ANALYSIS_MISSING_IMAGE_ALT_ATTRIBUTES, Some(img));\n            } else {\n                self.stats.add_ok(ANALYSIS_MISSING_IMAGE_ALT_ATTRIBUTES, Some(img));\n            }\n        }\n\n        if !bad_images.is_empty() {\n            result.add_warning(\n                format!(\"{} image(s) without 'alt' attribute\", bad_images.len()),\n                ANALYSIS_MISSING_IMAGE_ALT_ATTRIBUTES,\n                Some(bad_images),\n            );\n            self.pages_without_image_alt_attributes += 1;\n        } else {\n            result.add_ok(\n                format!(\"All {} image(s) have an 'alt' attribute\", found_count),\n                ANALYSIS_MISSING_IMAGE_ALT_ATTRIBUTES,\n                None,\n            );\n        }\n    }\n\n    fn check_missing_labels(&mut self, html: &str, result: &mut UrlAnalysisResult) {\n        let document = Html::parse_document(html);\n\n        let input_selector = match Selector::parse(\"input:not([type='hidden'])\") {\n            Ok(s) => s,\n            Err(_) => return,\n        };\n        let label_selector_fn =\n            |id: &str| -> Option<Selector> { Selector::parse(&format!(\"label[for='{}']\", id)).ok() };\n\n        let inputs: Vec<_> = document.select(&input_selector).collect();\n        let mut inputs_without_labels: Vec<String> = Vec::new();\n\n        for input in &inputs {\n            let input_html = get_opening_tag_html(input);\n            let dedup_key = normalize_tag_for_dedup(input);\n            let id = input.value().attr(\"id\");\n\n            if let Some(id_val) = id {\n                if let Some(label_sel) = label_selector_fn(id_val)\n                    && document.select(&label_sel).next().is_none()\n                {\n                    inputs_without_labels.push(input_html);\n                    self.stats.add_warning(ANALYSIS_MISSING_FORM_LABELS, Some(&dedup_key));\n                }\n            } else {\n                inputs_without_labels.push(input_html);\n                self.stats.add_warning(ANALYSIS_MISSING_FORM_LABELS, Some(&dedup_key));\n            }\n        }\n\n        if !inputs_without_labels.is_empty() {\n            result.add_warning(\n                format!(\"{} input(s) without associated <label>\", inputs_without_labels.len()),\n                ANALYSIS_MISSING_FORM_LABELS,\n                Some(inputs_without_labels),\n            );\n            self.pages_without_form_labels += 1;\n        } else if !inputs.is_empty() {\n            result.add_ok(\n                format!(\"All {} input(s) have associated 'label'\", inputs.len()),\n                ANALYSIS_MISSING_FORM_LABELS,\n                None,\n            );\n        }\n    }\n\n    fn check_missing_aria_labels(&mut self, html: &str, result: &mut UrlAnalysisResult) {\n        let document = Html::parse_document(html);\n\n        let mut critical_elements_without: Vec<String> = Vec::new();\n        let critical_selectors = [\"input:not([type='hidden'])\", \"select\", \"textarea\"];\n\n        for sel_str in &critical_selectors {\n            let selector = match Selector::parse(sel_str) {\n                Ok(s) => s,\n                Err(_) => continue,\n            };\n\n            for element in document.select(&selector) {\n                let element_html = get_opening_tag_html(&element);\n                let dedup_key = normalize_tag_for_dedup(&element);\n\n                let has_aria_label = element.value().attr(\"aria-label\").is_some();\n                let has_aria_labelledby = element.value().attr(\"aria-labelledby\").is_some();\n\n                if !has_aria_label && !has_aria_labelledby {\n                    critical_elements_without.push(element_html);\n                    self.stats.add_critical(ANALYSIS_MISSING_ARIA_LABELS, Some(&dedup_key));\n                } else {\n                    self.stats.add_ok(ANALYSIS_MISSING_ARIA_LABELS, Some(&dedup_key));\n                }\n            }\n        }\n\n        let mut warning_elements_without: Vec<String> = Vec::new();\n        let warning_selectors = [\"a\", \"button\"];\n\n        for sel_str in &warning_selectors {\n            let selector = match Selector::parse(sel_str) {\n                Ok(s) => s,\n                Err(_) => continue,\n            };\n\n            for element in document.select(&selector) {\n                let element_html = get_opening_tag_html(&element);\n                let dedup_key = normalize_tag_for_dedup(&element);\n\n                let has_aria_label = element.value().attr(\"aria-label\").is_some();\n                let has_aria_labelledby = element.value().attr(\"aria-labelledby\").is_some();\n\n                if !has_aria_label && !has_aria_labelledby {\n                    warning_elements_without.push(element_html);\n                    self.stats.add_warning(ANALYSIS_MISSING_ARIA_LABELS, Some(&dedup_key));\n                } else {\n                    self.stats.add_ok(ANALYSIS_MISSING_ARIA_LABELS, Some(&dedup_key));\n                }\n            }\n        }\n\n        if !critical_elements_without.is_empty() {\n            result.add_critical(\n                format!(\n                    \"{} form element(s) without defined 'aria-label' or 'aria-labelledby'\",\n                    critical_elements_without.len()\n                ),\n                ANALYSIS_MISSING_ARIA_LABELS,\n                Some(critical_elements_without.clone()),\n            );\n        }\n        if !warning_elements_without.is_empty() {\n            result.add_warning(\n                format!(\n                    \"{} element(s) without defined 'aria-label' or 'aria-labelledby'\",\n                    warning_elements_without.len()\n                ),\n                ANALYSIS_MISSING_ARIA_LABELS,\n                Some(warning_elements_without.clone()),\n            );\n        }\n\n        if !critical_elements_without.is_empty() || !warning_elements_without.is_empty() {\n            self.pages_without_aria_labels += 1;\n        } else {\n            result.add_ok(\n                \"All key interactive element(s) have defined 'aria-label' or 'aria-labelledby'\".to_string(),\n                ANALYSIS_MISSING_ARIA_LABELS,\n                None,\n            );\n        }\n    }\n\n    fn check_missing_roles(&mut self, html: &str, result: &mut UrlAnalysisResult) {\n        let document = Html::parse_document(html);\n\n        let mut elements_without_roles: Vec<String> = Vec::new();\n        let elements_to_check = [\"nav\", \"main\", \"aside\", \"header\", \"footer\"];\n\n        for sel_str in &elements_to_check {\n            let selector = match Selector::parse(sel_str) {\n                Ok(s) => s,\n                Err(_) => continue,\n            };\n\n            for element in document.select(&selector) {\n                if element.value().attr(\"role\").is_some() {\n                    continue;\n                }\n                let element_html = get_opening_tag_html(&element);\n                let dedup_key = normalize_tag_for_dedup(&element);\n                elements_without_roles.push(element_html);\n                self.stats.add_warning(ANALYSIS_MISSING_ROLES, Some(&dedup_key));\n            }\n        }\n\n        if !elements_without_roles.is_empty() {\n            result.add_warning(\n                format!(\"{} element(s) without defined 'role'\", elements_without_roles.len()),\n                ANALYSIS_MISSING_ROLES,\n                Some(elements_without_roles),\n            );\n            self.pages_without_roles += 1;\n        } else {\n            result.add_ok(\n                \"All key element(s) have defined 'role'\".to_string(),\n                ANALYSIS_MISSING_ROLES,\n                None,\n            );\n        }\n    }\n\n    fn check_missing_lang(&mut self, html: &str, result: &mut UrlAnalysisResult) {\n        let document = Html::parse_document(html);\n\n        let html_selector = match Selector::parse(\"html\") {\n            Ok(s) => s,\n            Err(_) => return,\n        };\n\n        if let Some(html_el) = document.select(&html_selector).next() {\n            if let Some(lang) = html_el.value().attr(\"lang\") {\n                let element_html = format!(\"<html lang=\\\"{}\\\">\", lang);\n                if lang.is_empty() {\n                    result.add_critical(\n                        \"The 'lang' attribute is present in <html> but empty.\".to_string(),\n                        ANALYSIS_MISSING_LANG_ATTRIBUTE,\n                        Some(vec![\"HTML lang attribute value is empty ''.\".to_string()]),\n                    );\n                    self.stats\n                        .add_critical(ANALYSIS_MISSING_LANG_ATTRIBUTE, Some(&element_html));\n                    self.pages_without_lang += 1;\n                } else {\n                    result.add_ok(\n                        format!(\"Document has defined 'lang' attribute as '{}'.\", lang),\n                        ANALYSIS_MISSING_LANG_ATTRIBUTE,\n                        None,\n                    );\n                    self.stats.add_ok(ANALYSIS_MISSING_LANG_ATTRIBUTE, Some(&element_html));\n                }\n            } else {\n                result.add_critical(\n                    \"Document does not have a defined 'lang' attribute in <html>.\".to_string(),\n                    ANALYSIS_MISSING_LANG_ATTRIBUTE,\n                    Some(vec![\"HTML lang attribute is not present.\".to_string()]),\n                );\n                self.stats.add_critical(ANALYSIS_MISSING_LANG_ATTRIBUTE, Some(\"<html>\"));\n                self.pages_without_lang += 1;\n            }\n        } else {\n            result.add_critical(\n                \"Document does not have a defined 'lang' attribute in <html>.\".to_string(),\n                ANALYSIS_MISSING_LANG_ATTRIBUTE,\n                Some(vec![\"HTML lang attribute is not present.\".to_string()]),\n            );\n            self.stats.add_critical(ANALYSIS_MISSING_LANG_ATTRIBUTE, Some(\"<html>\"));\n            self.pages_without_lang += 1;\n        }\n    }\n\n    fn set_findings_to_summary(&self, status: &Status) {\n        if self.pages_with_invalid_html > 0 {\n            status.add_critical_to_summary(\n                \"pages-with-invalid-html\",\n                &format!(\"{} page(s) with invalid HTML\", self.pages_with_invalid_html),\n            );\n        } else {\n            status.add_ok_to_summary(\"pages-with-invalid-html\", \"All pages have valid HTML\");\n        }\n\n        if self.pages_without_image_alt_attributes > 0 {\n            status.add_warning_to_summary(\n                \"pages-without-image-alt-attributes\",\n                &format!(\n                    \"{} page(s) without image alt attributes\",\n                    self.pages_without_image_alt_attributes\n                ),\n            );\n        } else {\n            status.add_ok_to_summary(\n                \"pages-without-image-alt-attributes\",\n                \"All pages have image alt attributes\",\n            );\n        }\n\n        if self.pages_without_form_labels > 0 {\n            status.add_warning_to_summary(\n                \"pages-without-form-labels\",\n                &format!(\"{} page(s) without form labels\", self.pages_without_form_labels),\n            );\n        } else {\n            status.add_ok_to_summary(\"pages-without-form-labels\", \"All pages have form labels\");\n        }\n\n        if self.pages_without_aria_labels > 0 {\n            status.add_warning_to_summary(\n                \"pages-without-aria-labels\",\n                &format!(\"{} page(s) without aria labels\", self.pages_without_aria_labels),\n            );\n        } else {\n            status.add_ok_to_summary(\"pages-without-aria-labels\", \"All pages have aria labels\");\n        }\n\n        if self.pages_without_roles > 0 {\n            status.add_warning_to_summary(\n                \"pages-without-roles\",\n                &format!(\"{} page(s) without role attributes\", self.pages_without_roles),\n            );\n        } else {\n            status.add_ok_to_summary(\"pages-without-roles\", \"All pages have role attributes\");\n        }\n\n        if self.pages_without_lang > 0 {\n            status.add_critical_to_summary(\n                \"pages-without-lang\",\n                &format!(\"{} page(s) without lang attribute\", self.pages_without_lang),\n            );\n        } else {\n            status.add_ok_to_summary(\"pages-without-lang\", \"All pages have lang attribute\");\n        }\n    }\n}\n\nimpl Analyzer for AccessibilityAnalyzer {\n    fn analyze(&mut self, status: &Status, output: &mut dyn Output) {\n        let columns = vec![\n            SuperTableColumn::new(\n                \"analysisName\".to_string(),\n                \"Analysis name\".to_string(),\n                -1, // AUTO_WIDTH\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"ok\".to_string(),\n                \"OK\".to_string(),\n                5,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<usize>()\n                        && v > 0\n                    {\n                        return utils::get_color_text(&v.to_string(), \"green\", false);\n                    }\n                    \"0\".to_string()\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"notice\".to_string(),\n                \"Notice\".to_string(),\n                6,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<usize>()\n                        && v > 0\n                    {\n                        return utils::get_color_text(&v.to_string(), \"blue\", false);\n                    }\n                    \"0\".to_string()\n                })),\n                None,\n                false,\n                true,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"warning\".to_string(),\n                \"Warning\".to_string(),\n                7,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<usize>()\n                        && v > 0\n                    {\n                        return utils::get_color_text(&v.to_string(), \"magenta\", true);\n                    }\n                    \"0\".to_string()\n                })),\n                None,\n                false,\n                true,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"critical\".to_string(),\n                \"Critical\".to_string(),\n                8,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<usize>()\n                        && v > 0\n                    {\n                        return utils::get_color_text(&v.to_string(), \"red\", true);\n                    }\n                    \"0\".to_string()\n                })),\n                None,\n                false,\n                true,\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        let data = self.stats.to_table_data();\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_ACCESSIBILITY.to_string(),\n            \"Accessibility\".to_string(),\n            \"Nothing to report.\".to_string(),\n            columns,\n            true,\n            None,\n            \"ASC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        super_table.set_data(data);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_end(super_table);\n\n        self.set_findings_to_summary(status);\n    }\n\n    fn analyze_visited_url(\n        &mut self,\n        visited_url: &VisitedUrl,\n        body: Option<&str>,\n        _headers: Option<&HashMap<String, String>>,\n    ) -> Option<UrlAnalysisResult> {\n        let is_html = visited_url.content_type == ContentTypeId::Html\n            && visited_url.status_code == 200\n            && visited_url.is_allowed_for_crawling;\n\n        if !is_html {\n            return None;\n        }\n\n        let html = body?;\n        let mut result = UrlAnalysisResult::new();\n\n        let s = Instant::now();\n        self.check_image_alt_attributes(html, &mut result);\n        self.base\n            .measure_exec_time(\"AccessibilityAnalyzer\", \"checkImageAltAttributes\", s);\n\n        let s = Instant::now();\n        self.check_missing_labels(html, &mut result);\n        self.base\n            .measure_exec_time(\"AccessibilityAnalyzer\", \"checkMissingLabels\", s);\n\n        let s = Instant::now();\n        self.check_missing_aria_labels(html, &mut result);\n        self.base\n            .measure_exec_time(\"AccessibilityAnalyzer\", \"checkMissingAriaLabels\", s);\n\n        let s = Instant::now();\n        self.check_missing_roles(html, &mut result);\n        self.base\n            .measure_exec_time(\"AccessibilityAnalyzer\", \"checkMissingRoles\", s);\n\n        let s = Instant::now();\n        self.check_missing_lang(html, &mut result);\n        self.base\n            .measure_exec_time(\"AccessibilityAnalyzer\", \"checkMissingLang\", s);\n\n        Some(result)\n    }\n\n    fn show_analyzed_visited_url_result_as_column(&self) -> Option<ExtraColumn> {\n        ExtraColumn::new(\"Access.\".to_string(), Some(8), false, None, None, None).ok()\n    }\n\n    fn should_be_activated(&self) -> bool {\n        true\n    }\n\n    fn get_order(&self) -> i32 {\n        175\n    }\n\n    fn get_name(&self) -> &str {\n        \"AccessibilityAnalyzer\"\n    }\n\n    fn get_exec_times(&self) -> &HashMap<String, f64> {\n        self.base.get_exec_times()\n    }\n\n    fn get_exec_counts(&self) -> &HashMap<String, usize> {\n        self.base.get_exec_counts()\n    }\n}\n\n/// Get the opening tag HTML from an element reference (strip inner content).\n/// Built directly from element name + attributes to avoid html5ever serialization\n/// panics on malformed HTML (\"no parent ElemInfo\").\nfn get_opening_tag_html(element: &scraper::ElementRef) -> String {\n    let name = element.value().name();\n    let attrs: Vec<String> = element\n        .value()\n        .attrs()\n        .map(|(k, v)| format!(\"{}=\\\"{}\\\"\", k, v))\n        .collect();\n    if attrs.is_empty() {\n        format!(\"<{}>\", name)\n    } else {\n        format!(\"<{} {}>\", name, attrs.join(\" \"))\n    }\n}\n\n/// Normalize an opening tag for deduplication purposes.\n/// Replaces dynamic attribute values (href, src, action, id, class, style, data-*)\n/// with \"*\" so that structurally identical elements on different pages\n/// (e.g. same nav `<a>` with different href) are counted only once.\nfn normalize_tag_for_dedup(element: &scraper::ElementRef) -> String {\n    let name = element.value().name();\n    let attrs: Vec<String> = element\n        .value()\n        .attrs()\n        .map(|(k, v)| {\n            if k == \"href\"\n                || k == \"src\"\n                || k == \"action\"\n                || k == \"id\"\n                || k == \"class\"\n                || k == \"style\"\n                || k == \"for\"\n                || k.starts_with(\"data-\")\n            {\n                format!(\"{}=\\\"*\\\"\", k)\n            } else {\n                format!(\"{}=\\\"{}\\\"\", k, v)\n            }\n        })\n        .collect();\n    if attrs.is_empty() {\n        format!(\"<{}>\", name)\n    } else {\n        format!(\"<{} {}>\", name, attrs.join(\" \"))\n    }\n}\n"
  },
  {
    "path": "src/analysis/analyzer.rs",
    "content": "// SiteOne Crawler - Analyzer trait\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse crate::analysis::result::url_analysis_result::UrlAnalysisResult;\nuse crate::extra_column::ExtraColumn;\nuse crate::result::visited_url::VisitedUrl;\n\n/// Trait that all analyzers must implement.\npub trait Analyzer: Send + Sync {\n    /// Do your analysis and set results to output (post-crawl).\n    /// Called after all URLs have been visited.\n    fn analyze(&mut self, status: &crate::result::status::Status, output: &mut dyn crate::output::output::Output);\n\n    /// Do your analysis for a just-visited URL.\n    /// Body and headers are already downloaded and decompressed.\n    /// Return None if you don't want to analyze this URL,\n    /// otherwise return UrlAnalysisResult with your results.\n    fn analyze_visited_url(\n        &mut self,\n        _visited_url: &VisitedUrl,\n        _body: Option<&str>,\n        _headers: Option<&HashMap<String, String>>,\n    ) -> Option<UrlAnalysisResult> {\n        None\n    }\n\n    /// If you want to show URL analysis results in table column,\n    /// return the ExtraColumn under which results will be shown.\n    fn show_analyzed_visited_url_result_as_column(&self) -> Option<ExtraColumn> {\n        None\n    }\n\n    /// Should this analyzer be activated based on options?\n    fn should_be_activated(&self) -> bool;\n\n    /// Get order of this analyzer (lower = earlier).\n    fn get_order(&self) -> i32;\n\n    /// Get the name of this analyzer.\n    fn get_name(&self) -> &str;\n\n    /// Get execution times of analyzer methods.\n    fn get_exec_times(&self) -> &HashMap<String, f64>;\n\n    /// Get execution counts of analyzer methods.\n    fn get_exec_counts(&self) -> &HashMap<String, usize>;\n}\n"
  },
  {
    "path": "src/analysis/base_analyzer.rs",
    "content": "// SiteOne Crawler - BaseAnalyzer\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\nuse std::collections::HashMap;\r\nuse std::time::Instant;\r\n\r\n/// Common state and methods shared by all analyzers.\r\n/// Embed this as a field in each concrete analyzer struct.\r\n#[derive(Debug, Default)]\r\npub struct BaseAnalyzer {\r\n    /// Total exec times of analyzer methods: \"ClassName::method\" -> seconds\r\n    pub exec_times: HashMap<String, f64>,\r\n    /// Total exec counts of analyzer methods: \"ClassName::method\" -> count\r\n    pub exec_counts: HashMap<String, usize>,\r\n}\r\n\r\nimpl BaseAnalyzer {\r\n    pub fn new() -> Self {\r\n        Self::default()\r\n    }\r\n\r\n    /// Measure and increment exec time and count of an analyzer method.\r\n    pub fn measure_exec_time(&mut self, class: &str, method: &str, start_time: Instant) {\r\n        let elapsed = start_time.elapsed().as_secs_f64();\r\n        let key = format!(\"{}::{}\", class, method);\r\n\r\n        *self.exec_times.entry(key.clone()).or_insert(0.0) += elapsed;\r\n        *self.exec_counts.entry(key).or_insert(0) += 1;\r\n    }\r\n\r\n    pub fn get_exec_times(&self) -> &HashMap<String, f64> {\r\n        &self.exec_times\r\n    }\r\n\r\n    pub fn get_exec_counts(&self) -> &HashMap<String, usize> {\r\n        &self.exec_counts\r\n    }\r\n}\r\n"
  },
  {
    "path": "src/analysis/best_practice_analyzer.rs",
    "content": "// SiteOne Crawler - BestPracticeAnalyzer\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\nuse std::time::Instant;\n\nuse regex::Regex;\nuse scraper::{Html, Selector};\n\nuse crate::analysis::analyzer::Analyzer;\nuse crate::analysis::base_analyzer::BaseAnalyzer;\nuse crate::analysis::result::analyzer_stats::AnalyzerStats;\nuse crate::analysis::result::url_analysis_result::UrlAnalysisResult;\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::extra_column::ExtraColumn;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::result::visited_url::VisitedUrl;\nuse crate::types::ContentTypeId;\nuse crate::utils;\n\nconst ANALYSIS_LARGE_SVGS: &str = \"Large inline SVGs\";\nconst ANALYSIS_DUPLICATED_SVGS: &str = \"Duplicate inline SVGs\";\nconst ANALYSIS_INVALID_SVGS: &str = \"Invalid inline SVGs\";\nconst ANALYSIS_MISSING_QUOTES: &str = \"Missing quotes on attributes\";\nconst ANALYSIS_HEADING_STRUCTURE: &str = \"Heading structure\";\nconst ANALYSIS_NON_CLICKABLE_PHONE_NUMBERS: &str = \"Non-clickable phone numbers\";\nconst ANALYSIS_DOM_DEPTH: &str = \"DOM depth\";\nconst ANALYSIS_TITLE_UNIQUENESS: &str = \"Title uniqueness\";\nconst ANALYSIS_DESCRIPTION_UNIQUENESS: &str = \"Description uniqueness\";\nconst ANALYSIS_BROTLI_SUPPORT: &str = \"Brotli support\";\nconst ANALYSIS_WEBP_SUPPORT: &str = \"WebP support\";\nconst ANALYSIS_AVIF_SUPPORT: &str = \"AVIF support\";\n\nconst SUPER_TABLE_BEST_PRACTICES: &str = \"best-practices\";\nconst SUPER_TABLE_NON_UNIQUE_TITLES: &str = \"non-unique-titles\";\nconst SUPER_TABLE_NON_UNIQUE_DESCRIPTIONS: &str = \"non-unique-descriptions\";\n\npub struct BestPracticeAnalyzer {\n    base: BaseAnalyzer,\n    stats: AnalyzerStats,\n\n    // options\n    max_inline_svg_size: usize,\n    max_inline_svg_duplicate_size: usize,\n    max_inline_svg_duplicates: usize,\n    title_uniqueness_percentage: usize,\n    meta_description_uniqueness_percentage: usize,\n    max_dom_depth_warning: usize,\n    max_dom_depth_critical: usize,\n\n    // stats counters\n    pages_with_large_svgs: usize,\n    pages_with_duplicated_svgs: usize,\n    pages_with_invalid_svgs: usize,\n    pages_with_missing_quotes: usize,\n    pages_with_multiple_h1: usize,\n    pages_without_h1: usize,\n    pages_with_skipped_heading_levels: usize,\n    pages_with_deep_dom: usize,\n    pages_with_non_clickable_phone_numbers: usize,\n}\n\nimpl Default for BestPracticeAnalyzer {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl BestPracticeAnalyzer {\n    pub fn new() -> Self {\n        Self {\n            base: BaseAnalyzer::new(),\n            stats: AnalyzerStats::new(),\n\n            max_inline_svg_size: 5 * 1024,\n            max_inline_svg_duplicate_size: 1024,\n            max_inline_svg_duplicates: 5,\n            title_uniqueness_percentage: 10,\n            meta_description_uniqueness_percentage: 10,\n            max_dom_depth_warning: 30,\n            max_dom_depth_critical: 50,\n\n            pages_with_large_svgs: 0,\n            pages_with_duplicated_svgs: 0,\n            pages_with_invalid_svgs: 0,\n            pages_with_missing_quotes: 0,\n            pages_with_multiple_h1: 0,\n            pages_without_h1: 0,\n            pages_with_skipped_heading_levels: 0,\n            pages_with_deep_dom: 0,\n            pages_with_non_clickable_phone_numbers: 0,\n        }\n    }\n\n    fn get_analysis_result(\n        analysis_name: &str,\n        ok: usize,\n        notice: usize,\n        warning: usize,\n        critical: usize,\n    ) -> HashMap<String, String> {\n        let mut row = HashMap::new();\n        row.insert(\"analysisName\".to_string(), analysis_name.to_string());\n        row.insert(\"ok\".to_string(), ok.to_string());\n        row.insert(\"notice\".to_string(), notice.to_string());\n        row.insert(\"warning\".to_string(), warning.to_string());\n        row.insert(\"critical\".to_string(), critical.to_string());\n        row\n    }\n\n    fn analyze_urls(&mut self, status: &Status, output: &mut dyn Output) -> Vec<HashMap<String, String>> {\n        let mut data = self.stats.to_table_data();\n        let visited_urls = status.get_visited_urls();\n\n        let html_urls: Vec<&VisitedUrl> = visited_urls\n            .iter()\n            .filter(|u| u.is_allowed_for_crawling && u.status_code == 200 && u.content_type == ContentTypeId::Html)\n            .collect();\n\n        let image_urls: Vec<&VisitedUrl> = visited_urls\n            .iter()\n            .filter(|u| u.status_code == 200 && u.content_type == ContentTypeId::Image)\n            .collect();\n\n        // Check title uniqueness\n        let s = Instant::now();\n        let titles: Vec<Option<String>> = html_urls\n            .iter()\n            .map(|u| u.extras.as_ref().and_then(|e| e.get(\"Title\").cloned()))\n            .collect();\n        data.push(self.check_title_uniqueness(&titles, status, output));\n        self.base\n            .measure_exec_time(\"BestPracticeAnalyzer\", \"checkTitleUniqueness\", s);\n\n        // Check meta description uniqueness\n        let s = Instant::now();\n        let descriptions: Vec<Option<String>> = html_urls\n            .iter()\n            .map(|u| u.extras.as_ref().and_then(|e| e.get(\"Description\").cloned()))\n            .collect();\n        data.push(self.check_meta_description_uniqueness(&descriptions, status, output));\n        self.base\n            .measure_exec_time(\"BestPracticeAnalyzer\", \"checkMetaDescriptionUniqueness\", s);\n\n        // Check brotli support on internal HTML pages\n        let s = Instant::now();\n        let internal_html: Vec<&VisitedUrl> = html_urls\n            .iter()\n            .filter(|u| !u.is_external && u.content_type == ContentTypeId::Html)\n            .copied()\n            .collect();\n        data.push(self.check_brotli_support(&internal_html, status));\n        self.base\n            .measure_exec_time(\"BestPracticeAnalyzer\", \"checkBrotliSupport\", s);\n\n        // Check WebP support\n        let s = Instant::now();\n        data.push(self.check_webp_support(&image_urls, status));\n        self.base\n            .measure_exec_time(\"BestPracticeAnalyzer\", \"checkWebpSupport\", s);\n\n        // Check AVIF support\n        let s = Instant::now();\n        data.push(self.check_avif_support(&image_urls, status));\n        self.base\n            .measure_exec_time(\"BestPracticeAnalyzer\", \"checkAvifSupport\", s);\n\n        data\n    }\n\n    fn check_inline_svg(&mut self, html: &str, result: &mut UrlAnalysisResult) {\n        use once_cell::sync::Lazy;\n        static RE_SVG: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?is)<svg[^>]*>(.*?)</svg>\").unwrap());\n        let svg_re = &*RE_SVG;\n\n        let matches: Vec<String> = svg_re.find_iter(html).map(|m| m.as_str().to_string()).collect();\n\n        if matches.is_empty() {\n            return;\n        }\n\n        let svg_count = matches.len();\n        let mut large_svgs: Vec<String> = Vec::new();\n        let mut max_found_svg_size: usize = 0;\n        let mut duplicates: HashMap<String, (usize, String, usize)> = HashMap::new();\n        let mut invalid_svgs: HashMap<String, (String, Vec<String>)> = HashMap::new();\n\n        for svg in &matches {\n            // Skip escaped SVGs (code examples)\n            if svg.contains(\"&#x22;\") || svg.contains(\"&#x27;\") {\n                continue;\n            }\n\n            let svg_trimmed = svg.trim();\n            let size = svg_trimmed.len();\n\n            // Use md5 hash as key\n            use md5::{Digest, Md5};\n            let mut hasher = Md5::new();\n            hasher.update(svg_trimmed.as_bytes());\n            let svg_hash = format!(\"{:x}\", hasher.finalize());\n\n            // Check inline SVG size\n            if size > self.max_inline_svg_size {\n                large_svgs.push(sanitize_svg(svg_trimmed));\n                max_found_svg_size = max_found_svg_size.max(size);\n                self.stats.add_warning(ANALYSIS_LARGE_SVGS, Some(svg_trimmed));\n            } else {\n                self.stats.add_ok(ANALYSIS_LARGE_SVGS, Some(svg_trimmed));\n            }\n\n            // Track duplicates\n            let entry = duplicates\n                .entry(svg_hash.clone())\n                .or_insert((0, sanitize_svg(svg_trimmed), size));\n            entry.0 += 1;\n\n            // Check SVG validity\n            let errors = validate_svg(svg_trimmed);\n            if let Some(errors) = errors {\n                invalid_svgs.insert(svg_hash.clone(), (sanitize_svg(svg_trimmed), errors));\n                self.stats.add_warning(ANALYSIS_INVALID_SVGS, Some(svg_trimmed));\n            } else {\n                self.stats.add_ok(ANALYSIS_INVALID_SVGS, Some(svg_trimmed));\n            }\n        }\n\n        // Evaluate duplicated SVGs\n        let mut duplicated_svgs: Vec<String> = Vec::new();\n        for (svg_hash, (count, sanitized, size)) in &duplicates {\n            if *count > self.max_inline_svg_duplicates && *size > self.max_inline_svg_duplicate_size {\n                duplicated_svgs.push(format!(\"{}x SVG ({} B): {}\", count, size, sanitized));\n                self.stats.add_warning(ANALYSIS_DUPLICATED_SVGS, Some(svg_hash));\n            } else {\n                self.stats.add_ok(ANALYSIS_DUPLICATED_SVGS, Some(svg_hash));\n            }\n        }\n\n        // Report large SVGs\n        if !large_svgs.is_empty() {\n            result.add_warning(\n                format!(\n                    \"{} inline svg(s) larger than {} bytes. Largest SVG is {} bytes. Consider loading from an external file to minimize HTML size\",\n                    large_svgs.len(),\n                    self.max_inline_svg_size,\n                    max_found_svg_size\n                ),\n                ANALYSIS_LARGE_SVGS,\n                Some(large_svgs.clone()),\n            );\n            self.pages_with_large_svgs += 1;\n        }\n\n        let small_svgs = svg_count.saturating_sub(large_svgs.len());\n        if small_svgs > 0 {\n            result.add_ok(\n                format!(\n                    \"{} inline svg(s) have a size less than {} bytes\",\n                    small_svgs, self.max_inline_svg_size\n                ),\n                ANALYSIS_LARGE_SVGS,\n                None,\n            );\n        }\n\n        // Report duplicated SVGs\n        let duplicated_count = duplicated_svgs.len();\n        if !duplicated_svgs.is_empty() {\n            result.add_warning(\n                format!(\n                    \"{} inline svg(s) are duplicated. Consider loading from an external file to minimize HTML size\",\n                    duplicated_count\n                ),\n                ANALYSIS_DUPLICATED_SVGS,\n                Some(duplicated_svgs),\n            );\n            self.pages_with_duplicated_svgs += 1;\n        }\n\n        let uniq_svgs = svg_count.saturating_sub(duplicated_count);\n        if uniq_svgs > 0 {\n            result.add_ok(\n                format!(\n                    \"{} inline svg(s) are unique (less than {} duplicates)\",\n                    uniq_svgs, self.max_inline_svg_duplicates\n                ),\n                ANALYSIS_DUPLICATED_SVGS,\n                None,\n            );\n        }\n\n        // Report invalid SVGs\n        let invalid_count = invalid_svgs.len();\n        if !invalid_svgs.is_empty() {\n            let invalid_details: Vec<String> = invalid_svgs\n                .values()\n                .map(|(sanitized, errors)| {\n                    format!(\n                        \"{}<br />Found {} error(s) in SVG. Errors:<br /> &nbsp; &gt; {}\",\n                        sanitized,\n                        errors.len(),\n                        errors.join(\"<br /> &nbsp; &gt; \")\n                    )\n                })\n                .collect();\n            result.add_critical(\n                format!(\n                    \"{} invalid inline svg(s). Check the content of the SVG as it may contain invalid XML and cause unexpected display problems\",\n                    invalid_count\n                ),\n                ANALYSIS_INVALID_SVGS,\n                Some(invalid_details),\n            );\n            self.pages_with_invalid_svgs += 1;\n        }\n\n        let valid_svgs = svg_count.saturating_sub(invalid_count);\n        if valid_svgs > 0 {\n            result.add_ok(\n                format!(\"{} inline svg(s) are valid\", valid_svgs),\n                ANALYSIS_INVALID_SVGS,\n                None,\n            );\n        }\n    }\n\n    fn check_missing_quotes_on_attributes(&mut self, html: &str, result: &mut UrlAnalysisResult) {\n        use once_cell::sync::Lazy;\n        static RE_UNQUOTED_ATTRS: Lazy<Regex> =\n            Lazy::new(|| Regex::new(r#\"<[^>]*\\s(href|src|content|alt|title)\\s*=\\s*([^\"'][^\\s>]*)[^>]*>\"#).unwrap());\n        let re = &*RE_UNQUOTED_ATTRS;\n\n        let mut issues: Vec<String> = Vec::new();\n\n        for caps in re.captures_iter(html) {\n            let full_match = match caps.get(0) {\n                Some(m) => m.as_str(),\n                None => continue,\n            };\n            let attribute = match caps.get(1) {\n                Some(m) => m.as_str(),\n                None => continue,\n            };\n            let value = match caps.get(2) {\n                Some(m) => m.as_str(),\n                None => continue,\n            };\n\n            // Skip attributes without value or with very long value\n            if value.trim().is_empty() || full_match.len() > 1000 {\n                continue;\n            }\n\n            // Skip escaped quotes and special cases\n            if full_match.contains(\"\\\\\\\"\")\n                || full_match.contains(\"\\\\'\")\n                || full_match.contains(\"&#\")\n                || full_match.starts_with(\"<astro\")\n            {\n                continue;\n            }\n\n            // Skip numeric values\n            if value.trim().is_empty() || value.parse::<f64>().is_ok() {\n                continue;\n            }\n\n            issues.push(format!(\n                \"The attribute '{}' has a value '{}' not enclosed in quotes in tag {}\",\n                attribute, value, full_match\n            ));\n            self.stats.add_warning(ANALYSIS_MISSING_QUOTES, Some(full_match));\n        }\n\n        if !issues.is_empty() {\n            result.add_warning(\n                format!(\"{} attribute(s) with missing quotes\", issues.len()),\n                ANALYSIS_MISSING_QUOTES,\n                Some(issues),\n            );\n            self.pages_with_missing_quotes += 1;\n        }\n    }\n\n    fn check_max_dom_depth(&mut self, html: &str, url: &str, result: &mut UrlAnalysisResult) {\n        let document = Html::parse_document(html);\n\n        // Find the body element and compute max depth\n        let body_selector = match Selector::parse(\"body\") {\n            Ok(s) => s,\n            Err(_) => return,\n        };\n\n        let body_node_id = match document.select(&body_selector).next() {\n            Some(b) => b.id(),\n            None => return,\n        };\n\n        let body_node = match document.tree.get(body_node_id) {\n            Some(n) => n,\n            None => return,\n        };\n\n        let max_depth = find_max_depth(body_node, 0);\n\n        if max_depth >= self.max_dom_depth_critical {\n            let msg = format!(\n                \"The DOM depth exceeds the critical limit: {}. Found depth: {}.\",\n                self.max_dom_depth_critical, max_depth\n            );\n            result.add_critical(msg.clone(), ANALYSIS_DOM_DEPTH, Some(vec![msg]));\n            self.stats.add_critical(ANALYSIS_DOM_DEPTH, Some(url));\n            self.pages_with_deep_dom += 1;\n        } else if max_depth >= self.max_dom_depth_warning {\n            let msg = format!(\n                \"The DOM depth exceeds the warning limit: {}. Found depth: {}.\",\n                self.max_dom_depth_warning, max_depth\n            );\n            result.add_warning(msg.clone(), ANALYSIS_DOM_DEPTH, Some(vec![msg]));\n            self.stats.add_warning(ANALYSIS_DOM_DEPTH, Some(url));\n            self.pages_with_deep_dom += 1;\n        } else {\n            result.add_ok(\n                format!(\"The DOM depth is within acceptable limits. Found depth: {}\", max_depth),\n                ANALYSIS_DOM_DEPTH,\n                None,\n            );\n            self.stats.add_ok(ANALYSIS_DOM_DEPTH, Some(url));\n        }\n    }\n\n    fn check_heading_structure(&mut self, html: &str, result: &mut UrlAnalysisResult) {\n        let document = Html::parse_document(html);\n\n        let heading_selector = match Selector::parse(\"h1, h2, h3, h4, h5, h6\") {\n            Ok(s) => s,\n            Err(_) => return,\n        };\n\n        let headings: Vec<(i32, String)> = document\n            .select(&heading_selector)\n            .filter_map(|el| {\n                // Skip headings inside SVG, script, style, template, noscript\n                // (headings inside foreign content are not relevant)\n                let mut parent = el.parent();\n                while let Some(p) = parent {\n                    if let Some(p_el) = p.value().as_element() {\n                        match p_el.name() {\n                            \"svg\" | \"script\" | \"style\" | \"template\" | \"noscript\" => return None,\n                            _ => {}\n                        }\n                    }\n                    parent = p.parent();\n                }\n\n                let tag = el.value().name();\n                let level = tag.strip_prefix('h').and_then(|s| s.parse::<i32>().ok())?;\n                let text = el.text().collect::<String>();\n                Some((level, text))\n            })\n            .collect();\n\n        if headings.is_empty() {\n            result.add_notice(\n                \"No headings found in the HTML content.\".to_string(),\n                ANALYSIS_HEADING_STRUCTURE,\n                Some(vec![\"No headings found in the HTML content.\".to_string()]),\n            );\n            self.stats.add_notice(ANALYSIS_HEADING_STRUCTURE, Some(html));\n            return;\n        }\n\n        let mut warning_issues: Vec<String> = Vec::new();\n        let mut critical_issues: Vec<String> = Vec::new();\n        let mut found_h1 = false;\n        let mut previous_level = 0i32;\n\n        for (level, _text) in &headings {\n            let current_level = *level;\n\n            if current_level == 1 {\n                if found_h1 {\n                    critical_issues.push(\"Multiple <h1> headings found.\".to_string());\n                    self.stats.add_critical(\n                        ANALYSIS_HEADING_STRUCTURE,\n                        Some(&format!(\"{} - multiple h1 tags found\", html)),\n                    );\n                } else {\n                    found_h1 = true;\n                }\n            }\n\n            if current_level > previous_level + 1 {\n                let msg = if previous_level > 0 {\n                    format!(\n                        \"Heading structure is skipping levels: found an <h{}> after an <h{}>.\",\n                        current_level, previous_level\n                    )\n                } else {\n                    format!(\n                        \"Heading structure is skipping levels: found an <h{}> without a previous higher heading.\",\n                        current_level\n                    )\n                };\n                warning_issues.push(msg);\n                self.stats.add_warning(\n                    ANALYSIS_HEADING_STRUCTURE,\n                    Some(&format!(\n                        \"{} - found <h{}> {}\",\n                        html,\n                        current_level,\n                        if previous_level > 0 {\n                            format!(\"after an <h{}>.\", previous_level)\n                        } else {\n                            \"without a previous higher heading.\".to_string()\n                        }\n                    )),\n                );\n            }\n\n            previous_level = current_level;\n        }\n\n        if !found_h1 {\n            critical_issues.push(\"No <h1> tag found in the HTML content.\".to_string());\n            self.pages_without_h1 += 1;\n        } else {\n            result.add_ok(\n                \"At least one h1 tag was found.\".to_string(),\n                ANALYSIS_HEADING_STRUCTURE,\n                None,\n            );\n            self.stats.add_ok(\n                ANALYSIS_HEADING_STRUCTURE,\n                Some(&format!(\"{} - at least one h1 tag found\", html)),\n            );\n\n            if !critical_issues.is_empty() {\n                self.pages_with_multiple_h1 += 1;\n            }\n        }\n\n        let has_critical = !critical_issues.is_empty();\n        let has_warning = !warning_issues.is_empty();\n        let critical_count = critical_issues.len();\n        let warning_count = warning_issues.len();\n\n        if has_critical {\n            if !found_h1 {\n                result.add_critical(\n                    \"No <h1> found.\".to_string(),\n                    ANALYSIS_HEADING_STRUCTURE,\n                    Some(critical_issues),\n                );\n            } else {\n                result.add_critical(\n                    format!(\"Up to {} headings <h1> found.\", critical_count + 1),\n                    ANALYSIS_HEADING_STRUCTURE,\n                    Some(critical_issues),\n                );\n            }\n        }\n        if has_warning {\n            result.add_warning(\n                format!(\"{} heading structure issue(s) found.\", warning_count),\n                ANALYSIS_HEADING_STRUCTURE,\n                Some(warning_issues),\n            );\n            self.pages_with_skipped_heading_levels += 1;\n        }\n        if !has_critical && !has_warning {\n            result.add_ok(\n                \"Heading structure is valid.\".to_string(),\n                ANALYSIS_HEADING_STRUCTURE,\n                None,\n            );\n            self.stats.add_ok(\n                ANALYSIS_HEADING_STRUCTURE,\n                Some(&format!(\"{} - heading structure is valid\", html)),\n            );\n        }\n    }\n\n    fn check_non_clickable_phone_numbers(&mut self, html: &str, result: &mut UrlAnalysisResult) {\n        let all_phones = parse_phone_numbers_from_html(html, false);\n        let non_clickable = parse_phone_numbers_from_html(html, true);\n\n        if !non_clickable.is_empty() {\n            result.add_warning(\n                format!(\"{} non-clickable phone number(s) found.\", non_clickable.len()),\n                ANALYSIS_NON_CLICKABLE_PHONE_NUMBERS,\n                Some(non_clickable.clone()),\n            );\n            for phone in &non_clickable {\n                self.stats\n                    .add_warning(ANALYSIS_NON_CLICKABLE_PHONE_NUMBERS, Some(phone));\n            }\n            self.pages_with_non_clickable_phone_numbers += 1;\n        } else {\n            result.add_ok(\n                \"No non-clickable phone numbers found.\".to_string(),\n                ANALYSIS_NON_CLICKABLE_PHONE_NUMBERS,\n                None,\n            );\n            for phone in &all_phones {\n                if !non_clickable.contains(phone) {\n                    self.stats.add_ok(ANALYSIS_NON_CLICKABLE_PHONE_NUMBERS, Some(phone));\n                }\n            }\n        }\n    }\n\n    fn check_title_uniqueness(\n        &mut self,\n        titles: &[Option<String>],\n        status: &Status,\n        output: &mut dyn Output,\n    ) -> HashMap<String, String> {\n        let summary_code = \"title-uniqueness\";\n\n        // Check unfiltered array first, then filter nulls\n        if titles.is_empty() {\n            status.add_warning_to_summary(summary_code, \"No titles provided for uniqueness check.\");\n            return Self::get_analysis_result(ANALYSIS_TITLE_UNIQUENESS, 0, 0, 1, 0);\n        }\n\n        let filtered: Vec<&str> = titles.iter().filter_map(|t| t.as_deref()).collect();\n\n        if filtered.len() <= 1 {\n            status.add_ok_to_summary(summary_code, \"Only one title provided for uniqueness check.\");\n            return Self::get_analysis_result(ANALYSIS_TITLE_UNIQUENESS, 1, 0, 0, 0);\n        }\n\n        let mut counts: HashMap<&str, usize> = HashMap::new();\n        for title in &filtered {\n            *counts.entry(title).or_insert(0) += 1;\n        }\n\n        let total = filtered.len();\n        let mut ok = 0usize;\n        let mut warnings = 0usize;\n        let mut highest_pct = 0usize;\n        let mut non_unique_found = false;\n\n        for (title, count) in &counts {\n            let pct = (*count * 100) / total;\n            highest_pct = highest_pct.max(pct);\n\n            if *count > 1 && pct > self.title_uniqueness_percentage {\n                status.add_warning_to_summary(\n                    summary_code,\n                    &format!(\n                        \"The title '{}' exceeds the allowed {}% duplicity. {}% of pages have this same title.\",\n                        title, self.title_uniqueness_percentage, pct\n                    ),\n                );\n                non_unique_found = true;\n                warnings += 1;\n            } else {\n                ok += 1;\n            }\n        }\n\n        // Build top non-unique titles table\n        let mut sorted_counts: Vec<(&str, usize)> = counts.into_iter().collect();\n        sorted_counts.sort_by(|a, b| b.1.cmp(&a.1));\n\n        let mut top_titles_data: Vec<HashMap<String, String>> = Vec::new();\n        for (title, count) in sorted_counts.iter().take(10) {\n            if *count > 1 {\n                let mut row = HashMap::new();\n                row.insert(\"count\".to_string(), count.to_string());\n                row.insert(\"title\".to_string(), title.to_string());\n                top_titles_data.push(row);\n            }\n        }\n\n        let console_width = utils::get_console_width();\n        let title_col_width = (console_width as i32 - 10).clamp(20, 200);\n\n        let columns = vec![\n            SuperTableColumn::new(\n                \"count\".to_string(),\n                \"Count\".to_string(),\n                5,\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"title\".to_string(),\n                \"Title\".to_string(),\n                title_col_width,\n                None,\n                None,\n                true,\n                false,\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_NON_UNIQUE_TITLES.to_string(),\n            \"TOP non-unique titles\".to_string(),\n            \"Nothing to report.\".to_string(),\n            columns,\n            true,\n            Some(\"count\".to_string()),\n            \"DESC\".to_string(),\n            None,\n            None,\n            None,\n        );\n        super_table.set_data(top_titles_data);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_end(super_table);\n\n        if !non_unique_found {\n            status.add_ok_to_summary(\n                summary_code,\n                &format!(\n                    \"All {} unique title(s) are within the allowed {}% duplicity. Highest duplicity title has {}%.\",\n                    ok, self.title_uniqueness_percentage, highest_pct\n                ),\n            );\n        }\n\n        Self::get_analysis_result(ANALYSIS_TITLE_UNIQUENESS, ok, 0, warnings, 0)\n    }\n\n    fn check_meta_description_uniqueness(\n        &mut self,\n        descriptions: &[Option<String>],\n        status: &Status,\n        output: &mut dyn Output,\n    ) -> HashMap<String, String> {\n        let summary_code = \"meta-description-uniqueness\";\n\n        // Include empty strings for pages without descriptions\n        let filtered: Vec<&str> = descriptions.iter().map(|d| d.as_deref().unwrap_or(\"\")).collect();\n\n        if filtered.is_empty() {\n            status.add_warning_to_summary(summary_code, \"No meta descriptions provided for uniqueness check.\");\n            return Self::get_analysis_result(ANALYSIS_DESCRIPTION_UNIQUENESS, 0, 0, 1, 0);\n        }\n\n        if filtered.len() <= 1 {\n            status.add_ok_to_summary(summary_code, \"Only one meta description provided for uniqueness check.\");\n            return Self::get_analysis_result(ANALYSIS_DESCRIPTION_UNIQUENESS, 1, 0, 0, 0);\n        }\n\n        let mut counts: HashMap<&str, usize> = HashMap::new();\n        for desc in &filtered {\n            *counts.entry(desc).or_insert(0) += 1;\n        }\n\n        let total = filtered.len();\n        let mut ok = 0usize;\n        let mut warnings = 0usize;\n        let mut highest_pct = 0usize;\n        let mut non_unique_found = false;\n\n        for (desc, count) in &counts {\n            let pct = (*count * 100) / total;\n            highest_pct = highest_pct.max(pct);\n\n            if *count > 1 && pct > self.meta_description_uniqueness_percentage {\n                status.add_warning_to_summary(\n                    summary_code,\n                    &format!(\n                        \"The description '{}' exceeds the allowed {}% duplicity. {}% of pages have this same description.\",\n                        desc, self.meta_description_uniqueness_percentage, pct\n                    ),\n                );\n                non_unique_found = true;\n                warnings += 1;\n            } else {\n                ok += 1;\n            }\n        }\n\n        let mut sorted_counts: Vec<(&str, usize)> = counts.into_iter().collect();\n        sorted_counts.sort_by(|a, b| b.1.cmp(&a.1));\n\n        let mut top_desc_data: Vec<HashMap<String, String>> = Vec::new();\n        for (desc, count) in sorted_counts.iter().take(10) {\n            if *count > 1 {\n                let mut row = HashMap::new();\n                row.insert(\"count\".to_string(), count.to_string());\n                row.insert(\"description\".to_string(), desc.to_string());\n                top_desc_data.push(row);\n            }\n        }\n\n        let console_width = utils::get_console_width();\n        let desc_col_width = (console_width as i32 - 10).clamp(20, 200);\n\n        let columns = vec![\n            SuperTableColumn::new(\n                \"count\".to_string(),\n                \"Count\".to_string(),\n                5,\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"description\".to_string(),\n                \"Description\".to_string(),\n                desc_col_width,\n                None,\n                None,\n                true,\n                false,\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_NON_UNIQUE_DESCRIPTIONS.to_string(),\n            \"TOP non-unique descriptions\".to_string(),\n            \"Nothing to report.\".to_string(),\n            columns,\n            true,\n            Some(\"count\".to_string()),\n            \"DESC\".to_string(),\n            None,\n            None,\n            None,\n        );\n        super_table.set_data(top_desc_data);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_end(super_table);\n\n        if !non_unique_found {\n            status.add_ok_to_summary(\n                summary_code,\n                &format!(\n                    \"All {} description(s) are within the allowed {}% duplicity. Highest duplicity description has {}%.\",\n                    ok, self.meta_description_uniqueness_percentage, highest_pct\n                ),\n            );\n        }\n\n        Self::get_analysis_result(ANALYSIS_DESCRIPTION_UNIQUENESS, ok, 0, warnings, 0)\n    }\n\n    fn check_brotli_support(&self, urls: &[&VisitedUrl], status: &Status) -> HashMap<String, String> {\n        let summary_code = \"brotli-support\";\n        let without_brotli = urls\n            .iter()\n            .filter(|u| u.content_encoding.as_deref() != Some(\"br\"))\n            .count();\n        let with_brotli = urls.len().saturating_sub(without_brotli);\n\n        if without_brotli > 0 {\n            status.add_warning_to_summary(\n                summary_code,\n                &format!(\"{} page(s) do not support Brotli compression.\", without_brotli),\n            );\n        } else {\n            status.add_ok_to_summary(summary_code, \"All pages support Brotli compression.\");\n        }\n\n        Self::get_analysis_result(ANALYSIS_BROTLI_SUPPORT, with_brotli, 0, without_brotli, 0)\n    }\n\n    fn check_webp_support(&self, urls: &[&VisitedUrl], status: &Status) -> HashMap<String, String> {\n        let summary_code = \"webp-support\";\n        let webp_count = urls\n            .iter()\n            .filter(|u| u.content_type_header.as_deref() == Some(\"image/webp\"))\n            .count();\n        let avif_count = urls\n            .iter()\n            .filter(|u| u.content_type_header.as_deref() == Some(\"image/avif\"))\n            .count();\n\n        if webp_count > 0 {\n            status.add_ok_to_summary(\n                summary_code,\n                &format!(\"{} WebP image(s) found on the website.\", webp_count),\n            );\n        } else if avif_count > 0 {\n            status.add_ok_to_summary(\n                summary_code,\n                &format!(\n                    \"No WebP images found, but AVIF (more modern format) is supported with {} image(s).\",\n                    avif_count\n                ),\n            );\n            return Self::get_analysis_result(ANALYSIS_WEBP_SUPPORT, 1, 0, 0, 0);\n        } else {\n            status.add_warning_to_summary(summary_code, \"No WebP image found on the website.\");\n        }\n\n        Self::get_analysis_result(\n            ANALYSIS_WEBP_SUPPORT,\n            webp_count,\n            0,\n            if webp_count > 0 { 0 } else { 1 },\n            0,\n        )\n    }\n\n    fn check_avif_support(&self, urls: &[&VisitedUrl], status: &Status) -> HashMap<String, String> {\n        let summary_code = \"avif-support\";\n        let avif_count = urls\n            .iter()\n            .filter(|u| u.content_type_header.as_deref() == Some(\"image/avif\"))\n            .count();\n\n        if avif_count > 0 {\n            status.add_ok_to_summary(\n                summary_code,\n                &format!(\"{} AVIF image(s) found on the website.\", avif_count),\n            );\n        } else {\n            status.add_warning_to_summary(summary_code, \"No AVIF image found on the website.\");\n        }\n\n        Self::get_analysis_result(\n            ANALYSIS_AVIF_SUPPORT,\n            avif_count,\n            0,\n            if avif_count > 0 { 0 } else { 1 },\n            0,\n        )\n    }\n\n    fn set_findings_to_summary(&self, status: &Status) {\n        // Missing quotes\n        if self.pages_with_missing_quotes > 0 {\n            status.add_warning_to_summary(\n                \"pages-with-missing-quotes\",\n                &format!(\n                    \"{} page(s) with missing quotes on attributes\",\n                    self.pages_with_missing_quotes\n                ),\n            );\n        } else {\n            status.add_ok_to_summary(\"pages-with-missing-quotes\", \"All pages have quoted attributes\");\n        }\n\n        // Inline SVGs\n        if self.pages_with_large_svgs > 0 {\n            status.add_warning_to_summary(\n                \"pages-with-large-svgs\",\n                &format!(\n                    \"{} page(s) with large inline SVGs (> {} bytes)\",\n                    self.pages_with_large_svgs, self.max_inline_svg_size\n                ),\n            );\n        } else {\n            status.add_ok_to_summary(\n                \"pages-with-large-svgs\",\n                &format!(\n                    \"All pages have inline SVGs smaller than {} bytes\",\n                    self.max_inline_svg_size\n                ),\n            );\n        }\n\n        if self.pages_with_duplicated_svgs > 0 {\n            status.add_warning_to_summary(\n                \"pages-with-duplicated-svgs\",\n                &format!(\n                    \"{} page(s) with duplicated inline SVGs (> {} duplicates)\",\n                    self.pages_with_duplicated_svgs, self.max_inline_svg_duplicates\n                ),\n            );\n        } else {\n            status.add_ok_to_summary(\n                \"pages-with-duplicated-svgs\",\n                &format!(\n                    \"All pages have inline SVGs with less than {} duplicates\",\n                    self.max_inline_svg_duplicates\n                ),\n            );\n        }\n\n        if self.pages_with_invalid_svgs > 0 {\n            status.add_warning_to_summary(\n                \"pages-with-invalid-svgs\",\n                &format!(\"{} page(s) with invalid inline SVGs\", self.pages_with_invalid_svgs),\n            );\n        } else {\n            status.add_ok_to_summary(\"pages-with-invalid-svgs\", \"All pages have valid or none inline SVGs\");\n        }\n\n        // Heading structure\n        if self.pages_with_multiple_h1 > 0 {\n            status.add_critical_to_summary(\n                \"pages-with-multiple-h1\",\n                &format!(\"{} page(s) with multiple <h1> headings\", self.pages_with_multiple_h1),\n            );\n        } else {\n            status.add_ok_to_summary(\"pages-with-multiple-h1\", \"All pages without multiple <h1> headings\");\n        }\n\n        if self.pages_without_h1 > 0 {\n            status.add_critical_to_summary(\n                \"pages-without-h1\",\n                &format!(\"{} page(s) without <h1> heading\", self.pages_without_h1),\n            );\n        } else {\n            status.add_ok_to_summary(\"pages-without-h1\", \"All pages have <h1> heading\");\n        }\n\n        if self.pages_with_skipped_heading_levels > 0 {\n            status.add_warning_to_summary(\n                \"pages-with-skipped-heading-levels\",\n                &format!(\n                    \"{} page(s) with skipped heading levels\",\n                    self.pages_with_skipped_heading_levels\n                ),\n            );\n        } else {\n            status.add_ok_to_summary(\n                \"pages-with-skipped-heading-levels\",\n                \"All pages have heading structure without skipped levels\",\n            );\n        }\n\n        // DOM depth\n        if self.pages_with_deep_dom > 0 {\n            status.add_warning_to_summary(\n                \"pages-with-deep-dom\",\n                &format!(\n                    \"{} page(s) with deep DOM (> {} levels)\",\n                    self.pages_with_deep_dom, self.max_dom_depth_warning\n                ),\n            );\n        } else {\n            status.add_ok_to_summary(\n                \"pages-with-deep-dom\",\n                &format!(\"All pages have DOM depth less than {}\", self.max_dom_depth_warning),\n            );\n        }\n\n        // Non-clickable phone numbers\n        if self.pages_with_non_clickable_phone_numbers > 0 {\n            status.add_warning_to_summary(\n                \"pages-with-non-clickable-phone-numbers\",\n                &format!(\n                    \"{} page(s) with non-clickable (non-interactive) phone numbers\",\n                    self.pages_with_non_clickable_phone_numbers\n                ),\n            );\n        } else {\n            status.add_ok_to_summary(\n                \"pages-with-non-clickable-phone-numbers\",\n                \"All pages have clickable (interactive) phone numbers\",\n            );\n        }\n    }\n}\n\nimpl Analyzer for BestPracticeAnalyzer {\n    fn analyze(&mut self, status: &Status, output: &mut dyn Output) {\n        let max_svg_size = self.max_inline_svg_size;\n        let max_svg_dup = self.max_inline_svg_duplicates;\n        let max_svg_dup_size = self.max_inline_svg_duplicate_size;\n        let max_dom_depth = self.max_dom_depth_warning;\n        let title_pct = self.title_uniqueness_percentage;\n        let desc_pct = self.meta_description_uniqueness_percentage;\n\n        let columns = vec![\n            SuperTableColumn::new(\n                \"analysisName\".to_string(),\n                \"Analysis name\".to_string(),\n                -1, // AUTO_WIDTH\n                Some(Box::new(move |value: &str, _render_into: &str| match value {\n                    \"Large inline SVGs\" => format!(\"{} (> {} B)\", value, max_svg_size),\n                    \"Duplicate inline SVGs\" => format!(\"{} (> {} and > {} B)\", value, max_svg_dup, max_svg_dup_size),\n                    \"DOM depth\" => format!(\"{} (> {})\", value, max_dom_depth),\n                    \"Title uniqueness\" => format!(\"{} (> {}%)\", value, title_pct),\n                    \"Description uniqueness\" => format!(\"{} (> {}%)\", value, desc_pct),\n                    _ => value.to_string(),\n                })),\n                None,\n                false,\n                true,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"ok\".to_string(),\n                \"OK\".to_string(),\n                5,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<usize>()\n                        && v > 0\n                    {\n                        return utils::get_color_text(&v.to_string(), \"green\", false);\n                    }\n                    \"0\".to_string()\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"notice\".to_string(),\n                \"Notice\".to_string(),\n                6,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<usize>()\n                        && v > 0\n                    {\n                        return utils::get_color_text(&v.to_string(), \"blue\", false);\n                    }\n                    \"0\".to_string()\n                })),\n                None,\n                false,\n                false, // color-only formatter doesn't change visible length\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"warning\".to_string(),\n                \"Warning\".to_string(),\n                7,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<usize>()\n                        && v > 0\n                    {\n                        return utils::get_color_text(&v.to_string(), \"magenta\", true);\n                    }\n                    \"0\".to_string()\n                })),\n                None,\n                false,\n                false, // color-only formatter doesn't change visible length\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"critical\".to_string(),\n                \"Critical\".to_string(),\n                8,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<usize>()\n                        && v > 0\n                    {\n                        return utils::get_color_text(&v.to_string(), \"red\", true);\n                    }\n                    \"0\".to_string()\n                })),\n                None,\n                false,\n                false, // color-only formatter doesn't change visible length\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        let data = self.analyze_urls(status, output);\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_BEST_PRACTICES.to_string(),\n            \"Best practices\".to_string(),\n            \"Nothing to report.\".to_string(),\n            columns,\n            true,\n            None,\n            \"ASC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        super_table.set_data(data);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_end(super_table);\n\n        self.set_findings_to_summary(status);\n    }\n\n    fn analyze_visited_url(\n        &mut self,\n        visited_url: &VisitedUrl,\n        body: Option<&str>,\n        _headers: Option<&HashMap<String, String>>,\n    ) -> Option<UrlAnalysisResult> {\n        let is_html = visited_url.content_type == ContentTypeId::Html && body.is_some();\n\n        if !is_html {\n            return None;\n        }\n\n        let html = body?;\n        let mut result = UrlAnalysisResult::new();\n\n        let s = Instant::now();\n        self.check_inline_svg(html, &mut result);\n        self.base.measure_exec_time(\"BestPracticeAnalyzer\", \"checkInlineSvg\", s);\n\n        let s = Instant::now();\n        self.check_missing_quotes_on_attributes(html, &mut result);\n        self.base\n            .measure_exec_time(\"BestPracticeAnalyzer\", \"checkMissingQuotesOnAttributes\", s);\n\n        let s = Instant::now();\n        self.check_max_dom_depth(html, &visited_url.url, &mut result);\n        self.base\n            .measure_exec_time(\"BestPracticeAnalyzer\", \"checkMaxDOMDepth\", s);\n\n        let s = Instant::now();\n        self.check_heading_structure(html, &mut result);\n        self.base\n            .measure_exec_time(\"BestPracticeAnalyzer\", \"checkHeadingStructure\", s);\n\n        let s = Instant::now();\n        self.check_non_clickable_phone_numbers(html, &mut result);\n        self.base\n            .measure_exec_time(\"BestPracticeAnalyzer\", \"checkNonClickablePhoneNumbers\", s);\n\n        Some(result)\n    }\n\n    fn show_analyzed_visited_url_result_as_column(&self) -> Option<ExtraColumn> {\n        ExtraColumn::new(\"Best pr.\".to_string(), Some(8), false, None, None, None).ok()\n    }\n\n    fn should_be_activated(&self) -> bool {\n        true\n    }\n\n    fn get_order(&self) -> i32 {\n        170\n    }\n\n    fn get_name(&self) -> &str {\n        \"BestPracticeAnalyzer\"\n    }\n\n    fn get_exec_times(&self) -> &HashMap<String, f64> {\n        self.base.get_exec_times()\n    }\n\n    fn get_exec_counts(&self) -> &HashMap<String, usize> {\n        self.base.get_exec_counts()\n    }\n}\n\n/// Validate SVG XML and return None for valid or Some(errors) for invalid\nfn validate_svg(svg: &str) -> Option<Vec<String>> {\n    use quick_xml::Reader;\n    use quick_xml::events::Event;\n\n    let mut reader = Reader::from_str(svg);\n    let mut errors = Vec::new();\n\n    loop {\n        match reader.read_event() {\n            Ok(Event::Eof) => break,\n            Ok(_) => {}\n            Err(e) => {\n                errors.push(format!(\"{}\", e));\n            }\n        }\n    }\n\n    if errors.is_empty() { None } else { Some(errors) }\n}\n\n/// Sanitize SVG: remove content, keep only the opening tag\nfn sanitize_svg(svg: &str) -> String {\n    if let Some(end) = svg.find('>') {\n        format!(\"{}> ...\", &svg[..end])\n    } else {\n        svg.to_string()\n    }\n}\n\n/// Find max DOM depth using the scraper tree\nfn find_max_depth(node_ref: ego_tree::NodeRef<scraper::Node>, depth: usize) -> usize {\n    let mut max = depth;\n    for child in node_ref.children() {\n        let child_depth = find_max_depth(child, depth + 1);\n        max = max.max(child_depth);\n    }\n    max\n}\n\n/// Parse phone numbers from HTML. Returns numbers found outside tel: links if only_non_clickable is true.\nfn parse_phone_numbers_from_html(html: &str, only_non_clickable: bool) -> Vec<String> {\n    use once_cell::sync::Lazy;\n    // Formats with country codes and spaces, e.g.: +420 123 456 789 or +1234 1234567890\n    static RE_PHONE_COUNTRY: Lazy<Regex> = Lazy::new(|| Regex::new(r\"\\+\\d{1,4}(\\s?[0-9\\- ]{1,5}){1,5}\").unwrap());\n    // Formats with country codes without spaces, e.g.: +420123456789\n    static RE_PHONE_NO_SPACE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"\\+[0-9\\- ]{7,20}\").unwrap());\n    // US format with parentheses, e.g.: (123) 456-7890\n    static RE_PHONE_US: Lazy<Regex> = Lazy::new(|| Regex::new(r\"\\(\\d{1,5}\\)\\s?\\d{3,4}-\\d{4}\").unwrap());\n    // Regular format with dashes, e.g.: 123-456-7890\n    static RE_PHONE_DASH: Lazy<Regex> = Lazy::new(|| Regex::new(r\"\\d{1,5}-\\d{3,4}-\\d{4}\").unwrap());\n\n    let mut phones: Vec<String> = Vec::new();\n\n    // Strip JavaScript and CSS content first (phone numbers are not visible in these)\n    let html_clean = strip_js_and_css(html);\n\n    // Replace &nbsp; with space\n    let html_clean = html_clean.replace(\"&nbsp;\", \" \");\n\n    let phone_regexes: [&Regex; 4] = [&RE_PHONE_COUNTRY, &RE_PHONE_NO_SPACE, &RE_PHONE_US, &RE_PHONE_DASH];\n    for re in &phone_regexes {\n        for m in re.find_iter(&html_clean) {\n            let phone = m.as_str().trim().to_string();\n            if !phones.contains(&phone) {\n                phones.push(phone);\n            }\n        }\n    }\n\n    // Filter: phone number must be at least 8 chars\n    phones.retain(|p| p.len() >= 8);\n\n    if only_non_clickable {\n        phones.retain(|phone| {\n            let escaped = regex::escape(phone);\n\n            // Check pattern 1: <a href=\"tel:PHONE\">...</a>\n            let tel_pattern1 = format!(r#\"<a[^>]*href=[\"']tel:{}[\"'][^>]*>.*?</a>\"#, escaped);\n            let in_tel1 = Regex::new(&tel_pattern1).map(|re| re.is_match(html)).unwrap_or(false);\n\n            // Check pattern 2: <a href=\"tel:...\">...PHONE...</a>\n            let tel_pattern2 = format!(r#\"(?is)<a[^>]*href=[\"']tel:[^\"'>]+[\"'][^>]*>.*?{}.*?</a>\"#, escaped);\n            let in_tel2 = Regex::new(&tel_pattern2).map(|re| re.is_match(html)).unwrap_or(false);\n\n            // Check unwanted pattern: phone number is part of a larger alphanumeric string\n            let unwanted_pattern = format!(r\"(?i)[0-9a-z._-]{}[0-9a-z._-]\", escaped);\n            let is_unwanted = Regex::new(&unwanted_pattern)\n                .map(|re| re.is_match(html))\n                .unwrap_or(false);\n\n            !in_tel1 && !in_tel2 && !is_unwanted\n        });\n    }\n\n    phones\n}\n\n/// Strip JavaScript content from HTML\nfn strip_js_and_css(html: &str) -> String {\n    use once_cell::sync::Lazy;\n    static RE_SCRIPT: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?is)<script[^>]*>.*?</script>\").unwrap());\n    static RE_STYLE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?is)<style[^>]*>.*?</style>\").unwrap());\n\n    let result = RE_SCRIPT.replace_all(html, \" \").to_string();\n    RE_STYLE.replace_all(&result, \" \").to_string()\n}\n"
  },
  {
    "path": "src/analysis/caching_analyzer.rs",
    "content": "// SiteOne Crawler - CachingAnalyzer\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse crate::analysis::analyzer::Analyzer;\nuse crate::analysis::base_analyzer::BaseAnalyzer;\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::result::visited_url::VisitedUrl;\nuse crate::utils;\n\nconst SUPER_TABLE_CACHING_PER_CONTENT_TYPE: &str = \"caching-per-content-type\";\nconst SUPER_TABLE_CACHING_PER_DOMAIN: &str = \"caching-per-domain\";\nconst SUPER_TABLE_CACHING_PER_DOMAIN_AND_CONTENT_TYPE: &str = \"caching-per-domain-and-content-type\";\n\npub struct CachingAnalyzer {\n    base: BaseAnalyzer,\n}\n\nimpl Default for CachingAnalyzer {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl CachingAnalyzer {\n    pub fn new() -> Self {\n        Self {\n            base: BaseAnalyzer::new(),\n        }\n    }\n\n    fn update_cache_stat(stat: &mut CacheStat, visited_url: &VisitedUrl) {\n        stat.count += 1;\n        if let Some(lifetime) = visited_url.cache_lifetime {\n            stat.count_with_lifetime += 1;\n            stat.total_lifetime += lifetime;\n            stat.avg_lifetime = Some(stat.total_lifetime as f64 / stat.count_with_lifetime as f64);\n            stat.min_lifetime = Some(match stat.min_lifetime {\n                Some(min) => min.min(lifetime),\n                None => lifetime,\n            });\n            stat.max_lifetime = Some(match stat.max_lifetime {\n                Some(max) => max.max(lifetime),\n                None => lifetime,\n            });\n        }\n    }\n\n    fn build_lifetime_columns(first_col_name: &str, first_col_key: &str) -> Vec<SuperTableColumn> {\n        let mut columns = vec![SuperTableColumn::new(\n            first_col_key.to_string(),\n            first_col_name.to_string(),\n            if first_col_key == \"domain\" { 20 } else { 12 },\n            None,\n            None,\n            false,\n            false,\n            false,\n            true,\n            None,\n        )];\n\n        // Add cacheType column only when not the first column\n        if first_col_key != \"cacheType\" {\n            columns.push(SuperTableColumn::new(\n                \"cacheType\".to_string(),\n                \"Cache type\".to_string(),\n                12,\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ));\n        }\n\n        columns.extend(vec![\n            SuperTableColumn::new(\n                \"count\".to_string(),\n                \"URLs\".to_string(),\n                5,\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"avgLifetime\".to_string(),\n                \"AVG lifetime\".to_string(),\n                10,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<i64>() {\n                        utils::get_colored_cache_lifetime(v, 6)\n                    } else {\n                        \"-\".to_string()\n                    }\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"minLifetime\".to_string(),\n                \"MIN lifetime\".to_string(),\n                10,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<i64>() {\n                        utils::get_colored_cache_lifetime(v, 6)\n                    } else {\n                        \"-\".to_string()\n                    }\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"maxLifetime\".to_string(),\n                \"MAX lifetime\".to_string(),\n                10,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<i64>() {\n                        utils::get_colored_cache_lifetime(v, 6)\n                    } else {\n                        \"-\".to_string()\n                    }\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n        ]);\n\n        columns\n    }\n}\n\nimpl Analyzer for CachingAnalyzer {\n    fn analyze(&mut self, status: &Status, output: &mut dyn Output) {\n        let visited_urls = status.get_visited_urls();\n\n        let mut stats_per_content_type: HashMap<String, CacheStatWithType> = HashMap::new();\n        let mut stats_per_domain: HashMap<String, CacheStatWithDomain> = HashMap::new();\n        let mut stats_per_domain_and_ct: HashMap<String, CacheStatWithDomainAndType> = HashMap::new();\n\n        for visited_url in &visited_urls {\n            let content_type_name = visited_url.content_type.name().to_string();\n            let cache_type_label = visited_url.get_cache_type_label();\n            let domain_name = visited_url.get_host().unwrap_or_else(|| \"unknown\".to_string());\n\n            // Per domain\n            {\n                let key = format!(\"{}.{}\", domain_name, cache_type_label);\n                let stat = stats_per_domain.entry(key).or_insert_with(|| CacheStatWithDomain {\n                    domain: domain_name.clone(),\n                    cache_type: cache_type_label.clone(),\n                    stat: CacheStat::default(),\n                });\n                Self::update_cache_stat(&mut stat.stat, visited_url);\n            }\n\n            // Per domain and content type\n            {\n                let key = format!(\"{}.{}.{}\", domain_name, content_type_name, cache_type_label);\n                let stat = stats_per_domain_and_ct\n                    .entry(key)\n                    .or_insert_with(|| CacheStatWithDomainAndType {\n                        domain: domain_name.clone(),\n                        content_type: content_type_name.clone(),\n                        cache_type: cache_type_label.clone(),\n                        stat: CacheStat::default(),\n                    });\n                Self::update_cache_stat(&mut stat.stat, visited_url);\n            }\n\n            // Per content type (only crawlable domains)\n            if visited_url.is_allowed_for_crawling {\n                let key = format!(\"{}.{}\", content_type_name, cache_type_label);\n                let stat = stats_per_content_type.entry(key).or_insert_with(|| CacheStatWithType {\n                    content_type: content_type_name.clone(),\n                    cache_type: cache_type_label.clone(),\n                    stat: CacheStat::default(),\n                });\n                Self::update_cache_stat(&mut stat.stat, visited_url);\n            }\n        }\n\n        // Per content type table\n        if !stats_per_content_type.is_empty() {\n            let data: Vec<HashMap<String, String>> = stats_per_content_type.values().map(|s| s.to_row()).collect();\n\n            let columns = Self::build_lifetime_columns(\"Content type\", \"contentType\");\n\n            let mut super_table = SuperTable::new(\n                SUPER_TABLE_CACHING_PER_CONTENT_TYPE.to_string(),\n                \"HTTP Caching by content type (only from crawlable domains)\".to_string(),\n                \"No URLs found.\".to_string(),\n                columns,\n                true,\n                Some(\"count\".to_string()),\n                \"DESC\".to_string(),\n                None,\n                None,\n                Some(\"HTTP cache\".to_string()),\n            );\n\n            super_table.set_data(data);\n            status.configure_super_table_url_stripping(&mut super_table);\n            output.add_super_table(&super_table);\n            status.add_super_table_at_beginning(super_table);\n        }\n\n        // Per domain table\n        {\n            let data: Vec<HashMap<String, String>> = stats_per_domain.values().map(|s| s.to_row()).collect();\n\n            let columns = Self::build_lifetime_columns(\"Domain\", \"domain\");\n\n            let mut super_table = SuperTable::new(\n                SUPER_TABLE_CACHING_PER_DOMAIN.to_string(),\n                \"HTTP Caching by domain\".to_string(),\n                \"No URLs found.\".to_string(),\n                columns,\n                true,\n                Some(\"count\".to_string()),\n                \"DESC\".to_string(),\n                None,\n                None,\n                None,\n            );\n\n            super_table.set_data(data);\n            status.configure_super_table_url_stripping(&mut super_table);\n            output.add_super_table(&super_table);\n            status.add_super_table_at_beginning(super_table);\n        }\n\n        // Per domain and content type table\n        {\n            let data: Vec<HashMap<String, String>> = stats_per_domain_and_ct.values().map(|s| s.to_row()).collect();\n\n            let mut columns = Self::build_lifetime_columns(\"Domain\", \"domain\");\n            columns.insert(\n                1,\n                SuperTableColumn::new(\n                    \"contentType\".to_string(),\n                    \"Content type\".to_string(),\n                    12,\n                    None,\n                    None,\n                    false,\n                    false,\n                    false,\n                    true,\n                    None,\n                ),\n            );\n\n            let mut super_table = SuperTable::new(\n                SUPER_TABLE_CACHING_PER_DOMAIN_AND_CONTENT_TYPE.to_string(),\n                \"HTTP Caching by domain and content type\".to_string(),\n                \"No URLs found.\".to_string(),\n                columns,\n                true,\n                Some(\"count\".to_string()),\n                \"DESC\".to_string(),\n                None,\n                None,\n                None,\n            );\n\n            super_table.set_data(data);\n            status.configure_super_table_url_stripping(&mut super_table);\n            output.add_super_table(&super_table);\n            status.add_super_table_at_beginning(super_table);\n        }\n    }\n\n    fn should_be_activated(&self) -> bool {\n        true\n    }\n\n    fn get_order(&self) -> i32 {\n        116\n    }\n\n    fn get_name(&self) -> &str {\n        \"CachingAnalyzer\"\n    }\n\n    fn get_exec_times(&self) -> &HashMap<String, f64> {\n        self.base.get_exec_times()\n    }\n\n    fn get_exec_counts(&self) -> &HashMap<String, usize> {\n        self.base.get_exec_counts()\n    }\n}\n\n#[derive(Default)]\nstruct CacheStat {\n    count: usize,\n    count_with_lifetime: usize,\n    total_lifetime: i64,\n    avg_lifetime: Option<f64>,\n    min_lifetime: Option<i64>,\n    max_lifetime: Option<i64>,\n}\n\nstruct CacheStatWithType {\n    content_type: String,\n    cache_type: String,\n    stat: CacheStat,\n}\n\nimpl CacheStatWithType {\n    fn to_row(&self) -> HashMap<String, String> {\n        let mut row = HashMap::new();\n        row.insert(\"contentType\".to_string(), self.content_type.clone());\n        row.insert(\"cacheType\".to_string(), self.cache_type.clone());\n        row.insert(\"count\".to_string(), self.stat.count.to_string());\n        row.insert(\n            \"avgLifetime\".to_string(),\n            self.stat\n                .avg_lifetime\n                .map(|v| format!(\"{}\", v as i64))\n                .unwrap_or_default(),\n        );\n        row.insert(\n            \"minLifetime\".to_string(),\n            self.stat.min_lifetime.map(|v| v.to_string()).unwrap_or_default(),\n        );\n        row.insert(\n            \"maxLifetime\".to_string(),\n            self.stat.max_lifetime.map(|v| v.to_string()).unwrap_or_default(),\n        );\n        row\n    }\n}\n\nstruct CacheStatWithDomain {\n    domain: String,\n    cache_type: String,\n    stat: CacheStat,\n}\n\nimpl CacheStatWithDomain {\n    fn to_row(&self) -> HashMap<String, String> {\n        let mut row = HashMap::new();\n        row.insert(\"domain\".to_string(), self.domain.clone());\n        row.insert(\"cacheType\".to_string(), self.cache_type.clone());\n        row.insert(\"count\".to_string(), self.stat.count.to_string());\n        row.insert(\n            \"avgLifetime\".to_string(),\n            self.stat\n                .avg_lifetime\n                .map(|v| format!(\"{}\", v as i64))\n                .unwrap_or_default(),\n        );\n        row.insert(\n            \"minLifetime\".to_string(),\n            self.stat.min_lifetime.map(|v| v.to_string()).unwrap_or_default(),\n        );\n        row.insert(\n            \"maxLifetime\".to_string(),\n            self.stat.max_lifetime.map(|v| v.to_string()).unwrap_or_default(),\n        );\n        row\n    }\n}\n\nstruct CacheStatWithDomainAndType {\n    domain: String,\n    content_type: String,\n    cache_type: String,\n    stat: CacheStat,\n}\n\nimpl CacheStatWithDomainAndType {\n    fn to_row(&self) -> HashMap<String, String> {\n        let mut row = HashMap::new();\n        row.insert(\"domain\".to_string(), self.domain.clone());\n        row.insert(\"contentType\".to_string(), self.content_type.clone());\n        row.insert(\"cacheType\".to_string(), self.cache_type.clone());\n        row.insert(\"count\".to_string(), self.stat.count.to_string());\n        row.insert(\n            \"avgLifetime\".to_string(),\n            self.stat\n                .avg_lifetime\n                .map(|v| format!(\"{}\", v as i64))\n                .unwrap_or_default(),\n        );\n        row.insert(\n            \"minLifetime\".to_string(),\n            self.stat.min_lifetime.map(|v| v.to_string()).unwrap_or_default(),\n        );\n        row.insert(\n            \"maxLifetime\".to_string(),\n            self.stat.max_lifetime.map(|v| v.to_string()).unwrap_or_default(),\n        );\n        row\n    }\n}\n"
  },
  {
    "path": "src/analysis/content_type_analyzer.rs",
    "content": "// SiteOne Crawler - ContentTypeAnalyzer\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\nuse std::collections::HashMap;\r\n\r\nuse crate::analysis::analyzer::Analyzer;\r\nuse crate::analysis::base_analyzer::BaseAnalyzer;\r\nuse crate::components::super_table::SuperTable;\r\nuse crate::components::super_table_column::SuperTableColumn;\r\nuse crate::output::output::Output;\r\nuse crate::result::status::Status;\r\nuse crate::types::ContentTypeId;\r\nuse crate::utils;\r\n\r\nconst SUPER_TABLE_CONTENT_TYPES: &str = \"content-types\";\r\nconst SUPER_TABLE_CONTENT_MIME_TYPES: &str = \"content-types-raw\";\r\n\r\npub struct ContentTypeAnalyzer {\r\n    base: BaseAnalyzer,\r\n}\r\n\r\nimpl Default for ContentTypeAnalyzer {\r\n    fn default() -> Self {\r\n        Self::new()\r\n    }\r\n}\r\n\r\nimpl ContentTypeAnalyzer {\r\n    pub fn new() -> Self {\r\n        Self {\r\n            base: BaseAnalyzer::new(),\r\n        }\r\n    }\r\n\r\n    fn add_content_type_super_table(&self, status: &Status, output: &mut dyn Output) {\r\n        let visited_urls = status.get_visited_urls();\r\n        let content_type_ids = get_all_content_type_ids();\r\n\r\n        let mut stats: HashMap<String, ContentTypeStat> = HashMap::new();\r\n        for ct_id in &content_type_ids {\r\n            let key = format!(\"{:?}\", ct_id);\r\n            stats.insert(\r\n                key,\r\n                ContentTypeStat {\r\n                    content_type_id: *ct_id,\r\n                    content_type: ct_id.name().to_string(),\r\n                    count: 0,\r\n                    total_size: 0,\r\n                    total_time: 0.0,\r\n                    status_20x: 0,\r\n                    status_30x: 0,\r\n                    status_40x: 0,\r\n                    status_42x: 0,\r\n                    status_50x: 0,\r\n                    status_other: 0,\r\n                },\r\n            );\r\n        }\r\n\r\n        for visited_url in &visited_urls {\r\n            if visited_url.has_error_status_code() {\r\n                continue;\r\n            }\r\n            let key = format!(\"{:?}\", visited_url.content_type);\r\n            if let Some(stat) = stats.get_mut(&key) {\r\n                stat.count += 1;\r\n                stat.total_size += visited_url.size.unwrap_or(0);\r\n                stat.total_time += visited_url.request_time;\r\n\r\n                let status_code = visited_url.status_code;\r\n                if (200..300).contains(&status_code) {\r\n                    stat.status_20x += 1;\r\n                } else if (300..400).contains(&status_code) {\r\n                    stat.status_30x += 1;\r\n                } else if (400..420).contains(&status_code) {\r\n                    stat.status_40x += 1;\r\n                } else if (420..500).contains(&status_code) {\r\n                    stat.status_42x += 1;\r\n                } else if (500..600).contains(&status_code) {\r\n                    stat.status_50x += 1;\r\n                } else {\r\n                    stat.status_other += 1;\r\n                }\r\n            }\r\n        }\r\n\r\n        // Remove empty stats and compute avg time\r\n        let data: Vec<HashMap<String, String>> = stats\r\n            .values()\r\n            .filter(|s| s.count > 0)\r\n            .map(|s| {\r\n                let avg_time = s.total_time / s.count as f64;\r\n                let mut row = HashMap::new();\r\n                row.insert(\"contentType\".to_string(), s.content_type.clone());\r\n                row.insert(\"count\".to_string(), s.count.to_string());\r\n                row.insert(\"totalSize\".to_string(), s.total_size.to_string());\r\n                row.insert(\"totalTime\".to_string(), format!(\"{:.4}\", s.total_time));\r\n                row.insert(\"avgTime\".to_string(), format!(\"{:.4}\", avg_time));\r\n                row.insert(\"status20x\".to_string(), s.status_20x.to_string());\r\n                row.insert(\"status30x\".to_string(), s.status_30x.to_string());\r\n                row.insert(\"status40x\".to_string(), s.status_40x.to_string());\r\n                row.insert(\"status42x\".to_string(), s.status_42x.to_string());\r\n                row.insert(\"status50x\".to_string(), s.status_50x.to_string());\r\n                row.insert(\"statusOther\".to_string(), s.status_other.to_string());\r\n                row\r\n            })\r\n            .collect();\r\n\r\n        let columns = build_content_type_columns();\r\n\r\n        let mut super_table = SuperTable::new(\r\n            SUPER_TABLE_CONTENT_TYPES.to_string(),\r\n            \"Content types\".to_string(),\r\n            \"No URLs found.\".to_string(),\r\n            columns,\r\n            true,\r\n            Some(\"count\".to_string()),\r\n            \"DESC\".to_string(),\r\n            None,\r\n            None,\r\n            None,\r\n        );\r\n\r\n        super_table.set_show_only_columns_with_values(true);\r\n        super_table.set_data(data);\r\n        status.configure_super_table_url_stripping(&mut super_table);\r\n        output.add_super_table(&super_table);\r\n        status.add_super_table_at_beginning(super_table);\r\n    }\r\n\r\n    fn add_content_type_raw_super_table(&self, status: &Status, output: &mut dyn Output) {\r\n        let visited_urls = status.get_visited_urls();\r\n\r\n        let mut stats: HashMap<String, MimeTypeStat> = HashMap::new();\r\n\r\n        for visited_url in &visited_urls {\r\n            if visited_url.has_error_status_code() {\r\n                continue;\r\n            }\r\n            let key = visited_url\r\n                .content_type_header\r\n                .clone()\r\n                .unwrap_or_else(|| \"unknown\".to_string());\r\n\r\n            let stat = stats.entry(key.clone()).or_insert_with(|| MimeTypeStat {\r\n                content_type: key,\r\n                count: 0,\r\n                total_size: 0,\r\n                total_time: 0.0,\r\n                status_20x: 0,\r\n                status_30x: 0,\r\n                status_40x: 0,\r\n                status_42x: 0,\r\n                status_50x: 0,\r\n                status_other: 0,\r\n            });\r\n\r\n            stat.count += 1;\r\n            stat.total_size += visited_url.size.unwrap_or(0);\r\n            stat.total_time += visited_url.request_time;\r\n\r\n            let status_code = visited_url.status_code;\r\n            if (200..300).contains(&status_code) {\r\n                stat.status_20x += 1;\r\n            } else if (300..400).contains(&status_code) {\r\n                stat.status_30x += 1;\r\n            } else if (400..420).contains(&status_code) {\r\n                stat.status_40x += 1;\r\n            } else if (420..500).contains(&status_code) {\r\n                stat.status_42x += 1;\r\n            } else if (500..600).contains(&status_code) {\r\n                stat.status_50x += 1;\r\n            } else {\r\n                stat.status_other += 1;\r\n            }\r\n        }\r\n\r\n        let data: Vec<HashMap<String, String>> = stats\r\n            .values()\r\n            .map(|s| {\r\n                let avg_time = if s.count > 0 {\r\n                    s.total_time / s.count as f64\r\n                } else {\r\n                    0.0\r\n                };\r\n                let mut row = HashMap::new();\r\n                row.insert(\"contentType\".to_string(), s.content_type.clone());\r\n                row.insert(\"count\".to_string(), s.count.to_string());\r\n                row.insert(\"totalSize\".to_string(), s.total_size.to_string());\r\n                row.insert(\"totalTime\".to_string(), format!(\"{:.4}\", s.total_time));\r\n                row.insert(\"avgTime\".to_string(), format!(\"{:.4}\", avg_time));\r\n                row.insert(\"status20x\".to_string(), s.status_20x.to_string());\r\n                row.insert(\"status30x\".to_string(), s.status_30x.to_string());\r\n                row.insert(\"status40x\".to_string(), s.status_40x.to_string());\r\n                row.insert(\"status42x\".to_string(), s.status_42x.to_string());\r\n                row.insert(\"status50x\".to_string(), s.status_50x.to_string());\r\n                row.insert(\"statusOther\".to_string(), s.status_other.to_string());\r\n                row\r\n            })\r\n            .collect();\r\n\r\n        let mut columns = build_content_type_columns();\r\n        // Adjust content type column width for MIME types\r\n        if let Some(col) = columns.first_mut() {\r\n            col.width = 26;\r\n        }\r\n\r\n        let mut super_table = SuperTable::new(\r\n            SUPER_TABLE_CONTENT_MIME_TYPES.to_string(),\r\n            \"Content types (MIME types)\".to_string(),\r\n            \"No MIME types found.\".to_string(),\r\n            columns,\r\n            true,\r\n            Some(\"count\".to_string()),\r\n            \"DESC\".to_string(),\r\n            None,\r\n            None,\r\n            None,\r\n        );\r\n\r\n        super_table.set_show_only_columns_with_values(true);\r\n        super_table.set_data(data);\r\n        status.configure_super_table_url_stripping(&mut super_table);\r\n        output.add_super_table(&super_table);\r\n        status.add_super_table_at_beginning(super_table);\r\n    }\r\n}\r\n\r\nimpl Analyzer for ContentTypeAnalyzer {\r\n    fn analyze(&mut self, status: &Status, output: &mut dyn Output) {\r\n        self.add_content_type_super_table(status, output);\r\n        self.add_content_type_raw_super_table(status, output);\r\n    }\r\n\r\n    fn should_be_activated(&self) -> bool {\r\n        true\r\n    }\r\n\r\n    fn get_order(&self) -> i32 {\r\n        210\r\n    }\r\n\r\n    fn get_name(&self) -> &str {\r\n        \"ContentTypeAnalyzer\"\r\n    }\r\n\r\n    fn get_exec_times(&self) -> &HashMap<String, f64> {\r\n        self.base.get_exec_times()\r\n    }\r\n\r\n    fn get_exec_counts(&self) -> &HashMap<String, usize> {\r\n        self.base.get_exec_counts()\r\n    }\r\n}\r\n\r\nstruct ContentTypeStat {\r\n    #[allow(dead_code)]\r\n    content_type_id: ContentTypeId,\r\n    content_type: String,\r\n    count: usize,\r\n    total_size: i64,\r\n    total_time: f64,\r\n    status_20x: usize,\r\n    status_30x: usize,\r\n    status_40x: usize,\r\n    status_42x: usize,\r\n    status_50x: usize,\r\n    status_other: usize,\r\n}\r\n\r\nstruct MimeTypeStat {\r\n    content_type: String,\r\n    count: usize,\r\n    total_size: i64,\r\n    total_time: f64,\r\n    status_20x: usize,\r\n    status_30x: usize,\r\n    status_40x: usize,\r\n    status_42x: usize,\r\n    status_50x: usize,\r\n    status_other: usize,\r\n}\r\n\r\nfn build_content_type_columns() -> Vec<SuperTableColumn> {\r\n    vec![\r\n        SuperTableColumn::new(\r\n            \"contentType\".to_string(),\r\n            \"Content type\".to_string(),\r\n            12,\r\n            None,\r\n            None,\r\n            false,\r\n            false,\r\n            false,\r\n            true,\r\n            None,\r\n        ),\r\n        SuperTableColumn::new(\r\n            \"count\".to_string(),\r\n            \"URLs\".to_string(),\r\n            5,\r\n            None,\r\n            None,\r\n            false,\r\n            false,\r\n            false,\r\n            true,\r\n            None,\r\n        ),\r\n        SuperTableColumn::new(\r\n            \"totalSize\".to_string(),\r\n            \"Total size\".to_string(),\r\n            10,\r\n            Some(Box::new(|value: &str, _render_into: &str| {\r\n                if let Ok(v) = value.parse::<i64>() {\r\n                    if v > 0 {\r\n                        utils::get_formatted_size(v, 0)\r\n                    } else {\r\n                        \"-\".to_string()\r\n                    }\r\n                } else {\r\n                    \"-\".to_string()\r\n                }\r\n            })),\r\n            None,\r\n            false,\r\n            false,\r\n            false,\r\n            true,\r\n            None,\r\n        ),\r\n        SuperTableColumn::new(\r\n            \"totalTime\".to_string(),\r\n            \"Total time\".to_string(),\r\n            10,\r\n            Some(Box::new(|value: &str, _render_into: &str| {\r\n                if let Ok(v) = value.parse::<f64>() {\r\n                    utils::get_formatted_duration(v)\r\n                } else {\r\n                    value.to_string()\r\n                }\r\n            })),\r\n            None,\r\n            false,\r\n            false,\r\n            false,\r\n            true,\r\n            None,\r\n        ),\r\n        SuperTableColumn::new(\r\n            \"avgTime\".to_string(),\r\n            \"Avg time\".to_string(),\r\n            8,\r\n            Some(Box::new(|value: &str, _render_into: &str| {\r\n                if let Ok(v) = value.parse::<f64>() {\r\n                    utils::get_colored_request_time(v, 8)\r\n                } else {\r\n                    value.to_string()\r\n                }\r\n            })),\r\n            None,\r\n            false,\r\n            false,\r\n            false,\r\n            true,\r\n            None,\r\n        ),\r\n        SuperTableColumn::new(\r\n            \"status20x\".to_string(),\r\n            \"Status 20x\".to_string(),\r\n            10,\r\n            Some(Box::new(|value: &str, _render_into: &str| {\r\n                if let Ok(v) = value.parse::<i32>() {\r\n                    if v > 0 {\r\n                        utils::get_color_text(&format!(\"{:<10}\", v), \"green\", false)\r\n                    } else {\r\n                        value.to_string()\r\n                    }\r\n                } else {\r\n                    value.to_string()\r\n                }\r\n            })),\r\n            None,\r\n            false,\r\n            false,\r\n            false,\r\n            true,\r\n            None,\r\n        ),\r\n        SuperTableColumn::new(\r\n            \"status30x\".to_string(),\r\n            \"Status 30x\".to_string(),\r\n            10,\r\n            Some(Box::new(|value: &str, _render_into: &str| {\r\n                if let Ok(v) = value.parse::<i32>() {\r\n                    if v > 0 {\r\n                        utils::get_color_text(&format!(\"{:<10}\", v), \"yellow\", true)\r\n                    } else {\r\n                        value.to_string()\r\n                    }\r\n                } else {\r\n                    value.to_string()\r\n                }\r\n            })),\r\n            None,\r\n            false,\r\n            false,\r\n            false,\r\n            true,\r\n            None,\r\n        ),\r\n        SuperTableColumn::new(\r\n            \"status40x\".to_string(),\r\n            \"Status 40x\".to_string(),\r\n            10,\r\n            Some(Box::new(|value: &str, _render_into: &str| {\r\n                if let Ok(v) = value.parse::<i32>() {\r\n                    if v > 0 {\r\n                        utils::get_color_text(&format!(\"{:<10}\", v), \"magenta\", true)\r\n                    } else {\r\n                        value.to_string()\r\n                    }\r\n                } else {\r\n                    value.to_string()\r\n                }\r\n            })),\r\n            None,\r\n            false,\r\n            false,\r\n            false,\r\n            true,\r\n            None,\r\n        ),\r\n        SuperTableColumn::new(\r\n            \"status42x\".to_string(),\r\n            \"Status 42x\".to_string(),\r\n            10,\r\n            Some(Box::new(|value: &str, _render_into: &str| {\r\n                if let Ok(v) = value.parse::<i32>() {\r\n                    if v > 0 {\r\n                        utils::get_color_text(&format!(\"{:<10}\", v), \"magenta\", true)\r\n                    } else {\r\n                        value.to_string()\r\n                    }\r\n                } else {\r\n                    value.to_string()\r\n                }\r\n            })),\r\n            None,\r\n            false,\r\n            false,\r\n            false,\r\n            true,\r\n            None,\r\n        ),\r\n        SuperTableColumn::new(\r\n            \"status50x\".to_string(),\r\n            \"Status 50x\".to_string(),\r\n            10,\r\n            Some(Box::new(|value: &str, _render_into: &str| {\r\n                if let Ok(v) = value.parse::<i32>() {\r\n                    if v > 0 {\r\n                        utils::get_color_text(&format!(\"{:<10}\", v), \"red\", true)\r\n                    } else {\r\n                        value.to_string()\r\n                    }\r\n                } else {\r\n                    value.to_string()\r\n                }\r\n            })),\r\n            None,\r\n            false,\r\n            false,\r\n            false,\r\n            true,\r\n            None,\r\n        ),\r\n        SuperTableColumn::new(\r\n            \"statusOther\".to_string(),\r\n            \"Status ERR\".to_string(),\r\n            10,\r\n            Some(Box::new(|value: &str, _render_into: &str| {\r\n                if let Ok(v) = value.parse::<i32>() {\r\n                    if v > 0 {\r\n                        utils::get_color_text(&format!(\"{:<10}\", v), \"red\", true)\r\n                    } else {\r\n                        value.to_string()\r\n                    }\r\n                } else {\r\n                    value.to_string()\r\n                }\r\n            })),\r\n            None,\r\n            false,\r\n            false,\r\n            false,\r\n            true,\r\n            None,\r\n        ),\r\n    ]\r\n}\r\n\r\nfn get_all_content_type_ids() -> Vec<ContentTypeId> {\r\n    vec![\r\n        ContentTypeId::Html,\r\n        ContentTypeId::Script,\r\n        ContentTypeId::Stylesheet,\r\n        ContentTypeId::Image,\r\n        ContentTypeId::Video,\r\n        ContentTypeId::Audio,\r\n        ContentTypeId::Font,\r\n        ContentTypeId::Document,\r\n        ContentTypeId::Json,\r\n        ContentTypeId::Xml,\r\n        ContentTypeId::Redirect,\r\n        ContentTypeId::Other,\r\n    ]\r\n}\r\n"
  },
  {
    "path": "src/analysis/dns_analyzer.rs",
    "content": "// SiteOne Crawler - DnsAnalyzer\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse crate::analysis::analyzer::Analyzer;\nuse crate::analysis::base_analyzer::BaseAnalyzer;\nuse crate::analysis::result::dns_analysis_result::DnsAnalysisResult;\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::utils;\n\nconst SUPER_TABLE_DNS: &str = \"dns\";\n\npub struct DnsAnalyzer {\n    base: BaseAnalyzer,\n}\n\nimpl Default for DnsAnalyzer {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl DnsAnalyzer {\n    pub fn new() -> Self {\n        Self {\n            base: BaseAnalyzer::new(),\n        }\n    }\n\n    /// Resolve DNS for the given domain using hickory-resolver.\n    fn get_dns_info(&self, domain: &str) -> Result<DnsAnalysisResult, String> {\n        use hickory_resolver::Resolver;\n        use hickory_resolver::proto::rr::RecordType;\n\n        let domain_owned = domain.to_string();\n\n        // Use block_in_place to allow blocking the current thread while running async DNS lookups\n        tokio::task::block_in_place(|| {\n            let rt = tokio::runtime::Handle::current();\n            rt.block_on(async {\n                let resolver = Resolver::builder_tokio()\n                    .map_err(|e| format!(\"Failed to create DNS resolver: {}\", e))?\n                    .build();\n\n                let mut resolved_domains = vec![domain_owned.clone()];\n                let mut ipv4_addresses = Vec::new();\n                let mut ipv6_addresses = Vec::new();\n\n                // Resolve CNAME records\n                if let Ok(cname_response) = resolver.lookup(domain_owned.as_str(), RecordType::CNAME).await {\n                    for record in cname_response.iter() {\n                        let cname_str = record.to_string().trim_end_matches('.').to_string();\n                        if !resolved_domains.contains(&cname_str) {\n                            resolved_domains.push(cname_str);\n                        }\n                    }\n                }\n\n                // Resolve A records (IPv4)\n                if let Ok(ipv4_response) = resolver.lookup(domain_owned.as_str(), RecordType::A).await {\n                    for record in ipv4_response.iter() {\n                        let ip_str = record.to_string();\n                        if !ip_str.is_empty() {\n                            ipv4_addresses.push(ip_str);\n                        }\n                    }\n                }\n\n                // Resolve AAAA records (IPv6)\n                if let Ok(ipv6_response) = resolver.lookup(domain_owned.as_str(), RecordType::AAAA).await {\n                    for record in ipv6_response.iter() {\n                        let ip_str = record.to_string();\n                        if !ip_str.is_empty() {\n                            ipv6_addresses.push(ip_str);\n                        }\n                    }\n                }\n\n                if ipv4_addresses.is_empty() && ipv6_addresses.is_empty() {\n                    return Err(format!(\"Unable to resolve DNS records for {}\", domain_owned));\n                }\n\n                let dns_server_ip = Self::get_system_dns_server().unwrap_or_else(|| \"0.0.0.0\".to_string());\n                let dns_server_name = dns_server_ip.clone();\n\n                Ok(DnsAnalysisResult::new(\n                    dns_server_name,\n                    dns_server_ip,\n                    resolved_domains,\n                    ipv4_addresses,\n                    ipv6_addresses,\n                ))\n            })\n        })\n    }\n\n    /// Read the first nameserver entry from /etc/resolv.conf to get the system DNS server IP.\n    fn get_system_dns_server() -> Option<String> {\n        let contents = std::fs::read_to_string(\"/etc/resolv.conf\").ok()?;\n        for line in contents.lines() {\n            let trimmed = line.trim();\n            if trimmed.starts_with(\"nameserver\")\n                && let Some(ip) = trimmed.split_whitespace().nth(1)\n            {\n                return Some(ip.to_string());\n            }\n        }\n        None\n    }\n}\n\nimpl Analyzer for DnsAnalyzer {\n    fn analyze(&mut self, status: &Status, output: &mut dyn Output) {\n        let columns = vec![SuperTableColumn::new(\n            \"info\".to_string(),\n            \"DNS resolving tree\".to_string(),\n            70,\n            Some(Box::new(|value: &str, _render_into: &str| {\n                let mut result = value.to_string();\n                // Colorize IPv4 addresses\n                if let Ok(re) = regex::Regex::new(r\"(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})\") {\n                    result = re\n                        .replace_all(&result, |caps: &regex::Captures| {\n                            let ip = &caps[1];\n                            if ip.parse::<std::net::Ipv4Addr>().is_ok() {\n                                utils::get_color_text(ip, \"blue\", true)\n                            } else {\n                                ip.to_string()\n                            }\n                        })\n                        .to_string();\n                }\n                // Colorize IPv6 addresses\n                if let Ok(re) = regex::Regex::new(r\"([0-9a-f:]+:+)+[0-9a-f]+\") {\n                    result = re\n                        .replace_all(&result, |caps: &regex::Captures| {\n                            let ip = &caps[0];\n                            if ip.parse::<std::net::Ipv6Addr>().is_ok() {\n                                utils::get_color_text(ip, \"blue\", true)\n                            } else {\n                                ip.to_string()\n                            }\n                        })\n                        .to_string();\n                }\n                result\n            })),\n            None,\n            true,\n            false,\n            true,\n            false,\n            None,\n        )];\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_DNS.to_string(),\n            \"DNS info\".to_string(),\n            \"No DNS info found.\".to_string(),\n            columns,\n            false,\n            None,\n            \"ASC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        let mut data: Vec<HashMap<String, String>> = Vec::new();\n\n        // Extract domain from the first visited URL\n        let domain = status\n            .get_visited_urls()\n            .first()\n            .and_then(|u| u.get_host())\n            .unwrap_or_else(|| \"unknown\".to_string());\n\n        match self.get_dns_info(&domain) {\n            Ok(dns_info) => {\n                for line in dns_info.get_txt_description().lines() {\n                    let mut row = HashMap::new();\n                    row.insert(\"info\".to_string(), line.to_string());\n                    data.push(row);\n                }\n\n                let resolved_domain = dns_info\n                    .resolved_domains\n                    .first()\n                    .cloned()\n                    .unwrap_or_else(|| \"unknown\".to_string());\n\n                // DNS server suffix — omit when unknown (e.g. on Windows where /etc/resolv.conf doesn't exist)\n                let dns_suffix = if dns_info.dns_server_ip_address != \"0.0.0.0\" {\n                    format!(\" (DNS server: {})\", dns_info.dns_server_name)\n                } else {\n                    String::new()\n                };\n\n                // IPv4 summary\n                if !dns_info.ipv4_addresses.is_empty() {\n                    status.add_ok_to_summary(\n                        \"dns-ipv4\",\n                        &format!(\n                            \"DNS IPv4 OK: domain {} resolved to {}{}\",\n                            resolved_domain,\n                            dns_info.ipv4_addresses.join(\", \"),\n                            dns_suffix\n                        ),\n                    );\n                } else {\n                    status.add_notice_to_summary(\n                        \"dns-ipv4\",\n                        &format!(\n                            \"DNS IPv4: domain {} does not support IPv4{}\",\n                            resolved_domain, dns_suffix\n                        ),\n                    );\n                }\n\n                // IPv6 summary\n                if !dns_info.ipv6_addresses.is_empty() {\n                    status.add_ok_to_summary(\n                        \"dns-ipv6\",\n                        &format!(\n                            \"DNS IPv6 OK: domain {} resolved to {}{}\",\n                            resolved_domain,\n                            dns_info.ipv6_addresses.join(\", \"),\n                            dns_suffix\n                        ),\n                    );\n                } else {\n                    status.add_notice_to_summary(\n                        \"dns-ipv6\",\n                        &format!(\n                            \"DNS IPv6: domain {} does not support IPv6{}\",\n                            resolved_domain, dns_suffix\n                        ),\n                    );\n                }\n\n                // CNAME chain summary\n                if dns_info.resolved_domains.len() > 1 {\n                    status.add_info_to_summary(\n                        \"dns-aliases\",\n                        &format!(\n                            \"DNS Aliases: IP(s) for domain {} were resolved by CNAME chain {}.\",\n                            resolved_domain,\n                            dns_info.resolved_domains.join(\" > \")\n                        ),\n                    );\n                }\n            }\n            Err(e) => {\n                let mut row = HashMap::new();\n                row.insert(\"info\".to_string(), e.clone());\n                data.push(row);\n                status.add_critical_to_summary(\"dns\", &format!(\"Problem with DNS analysis: {}\", e));\n            }\n        }\n\n        super_table.set_data(data);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_end(super_table);\n    }\n\n    fn should_be_activated(&self) -> bool {\n        true\n    }\n\n    fn get_order(&self) -> i32 {\n        215\n    }\n\n    fn get_name(&self) -> &str {\n        \"DnsAnalyzer\"\n    }\n\n    fn get_exec_times(&self) -> &HashMap<String, f64> {\n        self.base.get_exec_times()\n    }\n\n    fn get_exec_counts(&self) -> &HashMap<String, usize> {\n        self.base.get_exec_counts()\n    }\n}\n"
  },
  {
    "path": "src/analysis/external_links_analyzer.rs",
    "content": "// SiteOne Crawler - ExternalLinksAnalyzer\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Presents external URLs discovered during crawling as a dedicated section.\n// Groups external URLs, shows occurrence count and up to 5 source pages.\n\nuse std::collections::HashMap;\n\nuse crate::analysis::analyzer::Analyzer;\nuse crate::analysis::base_analyzer::BaseAnalyzer;\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::types::SkippedReason;\n\nconst SUPER_TABLE_EXTERNAL_URLS: &str = \"external-urls\";\nconst MAX_SOURCE_PAGES: usize = 5;\n\npub struct ExternalLinksAnalyzer {\n    base: BaseAnalyzer,\n}\n\nimpl Default for ExternalLinksAnalyzer {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl ExternalLinksAnalyzer {\n    pub fn new() -> Self {\n        Self {\n            base: BaseAnalyzer::new(),\n        }\n    }\n}\n\nimpl Analyzer for ExternalLinksAnalyzer {\n    fn analyze(&mut self, status: &Status, output: &mut dyn Output) {\n        let skipped_entries = status.get_skipped_urls();\n\n        // Filter only external links (NotAllowedHost reason)\n        let external_entries: Vec<_> = skipped_entries\n            .iter()\n            .filter(|e| matches!(e.reason, SkippedReason::NotAllowedHost))\n            .collect();\n\n        // Group by external URL: collect count and source page URLs\n        let mut url_data: HashMap<String, Vec<String>> = HashMap::new();\n        for entry in &external_entries {\n            let source_url = status.get_url_by_uq_id(&entry.source_uq_id).unwrap_or_default();\n            let sources = url_data.entry(entry.url.clone()).or_default();\n            if !source_url.is_empty() && !sources.contains(&source_url) {\n                sources.push(source_url);\n            }\n        }\n\n        let total_urls = url_data.len();\n\n        let mut rows: Vec<HashMap<String, String>> = url_data\n            .iter()\n            .map(|(ext_url, sources)| {\n                let mut row = HashMap::new();\n                row.insert(\"url\".to_string(), ext_url.clone());\n                row.insert(\"count\".to_string(), sources.len().to_string());\n                let display_sources: Vec<&str> = sources.iter().take(MAX_SOURCE_PAGES).map(|s| s.as_str()).collect();\n                let mut found_on = display_sources.join(\", \");\n                if sources.len() > MAX_SOURCE_PAGES {\n                    found_on.push_str(&format!(\" (+{})\", sources.len() - MAX_SOURCE_PAGES));\n                }\n                row.insert(\"foundOn\".to_string(), found_on);\n                row\n            })\n            .collect();\n        rows.sort_by(|a, b| {\n            let count_a: usize = a.get(\"count\").and_then(|c| c.parse().ok()).unwrap_or(0);\n            let count_b: usize = b.get(\"count\").and_then(|c| c.parse().ok()).unwrap_or(0);\n            count_b.cmp(&count_a).then_with(|| a.get(\"url\").cmp(&b.get(\"url\")))\n        });\n\n        let url_column_width = 60;\n\n        let columns = vec![\n            SuperTableColumn::new(\n                \"url\".to_string(),\n                \"External URL\".to_string(),\n                url_column_width,\n                None,\n                None,\n                true,\n                true,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"count\".to_string(),\n                \"Pages\".to_string(),\n                5,\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"foundOn\".to_string(),\n                \"Found on URL (max 5)\".to_string(),\n                url_column_width,\n                None,\n                None,\n                true,\n                true,\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_EXTERNAL_URLS.to_string(),\n            \"External URLs\".to_string(),\n            \"No external URLs found.\".to_string(),\n            columns,\n            true,\n            Some(\"count\".to_string()),\n            \"DESC\".to_string(),\n            Some(format!(\"{} external URL(s)\", total_urls)),\n            None,\n            None,\n        );\n\n        super_table.set_data(rows);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_beginning(super_table);\n\n        status.add_summary_item_by_ranges(\n            \"external-urls\",\n            total_urls as f64,\n            &[(0.0, 0.0), (1.0, f64::MAX)],\n            &[\n                \"External URLs - no external URLs found\",\n                \"External URLs - {} external URL(s) found\",\n            ],\n        );\n    }\n\n    fn should_be_activated(&self) -> bool {\n        true\n    }\n\n    fn get_order(&self) -> i32 {\n        7 // After skipped URLs (6)\n    }\n\n    fn get_name(&self) -> &str {\n        \"ExternalLinksAnalyzer\"\n    }\n\n    fn get_exec_times(&self) -> &HashMap<String, f64> {\n        self.base.get_exec_times()\n    }\n\n    fn get_exec_counts(&self) -> &HashMap<String, usize> {\n        self.base.get_exec_counts()\n    }\n}\n"
  },
  {
    "path": "src/analysis/fastest_analyzer.rs",
    "content": "// SiteOne Crawler - FastestAnalyzer\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse crate::analysis::analyzer::Analyzer;\nuse crate::analysis::base_analyzer::BaseAnalyzer;\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::types::ContentTypeId;\nuse crate::utils;\n\nconst SUPER_TABLE_FASTEST_URLS: &str = \"fastest-urls\";\n\npub struct FastestAnalyzer {\n    base: BaseAnalyzer,\n    fastest_top_limit: usize,\n    fastest_max_time: f64,\n}\n\nimpl Default for FastestAnalyzer {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl FastestAnalyzer {\n    pub fn new() -> Self {\n        Self {\n            base: BaseAnalyzer::new(),\n            fastest_top_limit: 20,\n            fastest_max_time: 1.0,\n        }\n    }\n\n    /// Set configuration from CoreOptions.\n    pub fn set_config(&mut self, fastest_top_limit: usize, fastest_max_time: f64) {\n        self.fastest_top_limit = fastest_top_limit;\n        self.fastest_max_time = fastest_max_time;\n    }\n}\n\nimpl Analyzer for FastestAnalyzer {\n    fn analyze(&mut self, status: &Status, output: &mut dyn Output) {\n        let visited_urls = status.get_visited_urls();\n\n        let mut fast_urls: Vec<_> = visited_urls\n            .into_iter()\n            .filter(|u| {\n                u.status_code == 200\n                    && u.is_allowed_for_crawling\n                    && u.content_type == ContentTypeId::Html\n                    && u.request_time <= self.fastest_max_time\n            })\n            .collect();\n\n        fast_urls.sort_by(|a, b| {\n            a.request_time\n                .partial_cmp(&b.request_time)\n                .unwrap_or(std::cmp::Ordering::Equal)\n        });\n        fast_urls.truncate(self.fastest_top_limit);\n\n        let console_width = utils::get_console_width();\n        let url_column_width = (console_width as i32 - 20).max(20);\n\n        let columns = vec![\n            SuperTableColumn::new(\n                \"requestTime\".to_string(),\n                \"Time\".to_string(),\n                6,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<f64>() {\n                        utils::get_colored_request_time(v, 6)\n                    } else {\n                        value.to_string()\n                    }\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"statusCode\".to_string(),\n                \"Status\".to_string(),\n                6,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<i32>() {\n                        utils::get_colored_status_code(v, 6)\n                    } else {\n                        value.to_string()\n                    }\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"url\".to_string(),\n                \"Fast URL\".to_string(),\n                url_column_width,\n                None,\n                None,\n                true,\n                true,\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        let data: Vec<HashMap<String, String>> = fast_urls\n            .iter()\n            .map(|u| {\n                let mut row = HashMap::new();\n                row.insert(\"requestTime\".to_string(), format!(\"{:.4}\", u.request_time));\n                row.insert(\"statusCode\".to_string(), u.status_code.to_string());\n                row.insert(\"url\".to_string(), u.url.clone());\n                row\n            })\n            .collect();\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_FASTEST_URLS.to_string(),\n            \"TOP fastest URLs\".to_string(),\n            format!(\"No fast URLs faster than {} second(s) found.\", self.fastest_max_time),\n            columns,\n            true,\n            Some(\"requestTime\".to_string()),\n            \"ASC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        super_table.set_data(data);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_beginning(super_table);\n    }\n\n    fn should_be_activated(&self) -> bool {\n        true\n    }\n\n    fn get_order(&self) -> i32 {\n        100\n    }\n\n    fn get_name(&self) -> &str {\n        \"FastestAnalyzer\"\n    }\n\n    fn get_exec_times(&self) -> &HashMap<String, f64> {\n        self.base.get_exec_times()\n    }\n\n    fn get_exec_counts(&self) -> &HashMap<String, usize> {\n        self.base.get_exec_counts()\n    }\n}\n"
  },
  {
    "path": "src/analysis/headers_analyzer.rs",
    "content": "// SiteOne Crawler - HeadersAnalyzer\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse crate::analysis::analyzer::Analyzer;\nuse crate::analysis::base_analyzer::BaseAnalyzer;\nuse crate::analysis::result::header_stats::HeaderStats;\nuse crate::analysis::result::url_analysis_result::UrlAnalysisResult;\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::result::visited_url::VisitedUrl;\nuse crate::utils;\n\nconst SUPER_TABLE_HEADERS: &str = \"headers\";\nconst SUPER_TABLE_HEADERS_VALUES: &str = \"headers-values\";\n\npub struct HeadersAnalyzer {\n    base: BaseAnalyzer,\n    header_stats: HashMap<String, HeaderStats>,\n}\n\nimpl Default for HeadersAnalyzer {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl HeadersAnalyzer {\n    pub fn new() -> Self {\n        Self {\n            base: BaseAnalyzer::new(),\n            header_stats: HashMap::new(),\n        }\n    }\n}\n\nimpl Analyzer for HeadersAnalyzer {\n    fn analyze(&mut self, status: &Status, output: &mut dyn Output) {\n        let console_width = utils::get_console_width();\n\n        // Basic header stats table\n        let data: Vec<HashMap<String, String>> = self\n            .header_stats\n            .values()\n            .map(|hs| {\n                let mut row = HashMap::new();\n                row.insert(\"header\".to_string(), hs.get_formatted_header_name());\n                row.insert(\"occurrences\".to_string(), hs.occurrences.to_string());\n\n                let unique_count = hs.unique_values.len();\n                let unique_str = if unique_count == 0 {\n                    \"-\".to_string()\n                } else if hs.unique_values_limit_reached {\n                    format!(\"{}+\", unique_count)\n                } else {\n                    unique_count.to_string()\n                };\n                row.insert(\"uniqueValues\".to_string(), unique_str);\n\n                row.insert(\"valuesPreview\".to_string(), hs.get_values_preview(120));\n\n                let min_value = hs.get_min_value().unwrap_or_default();\n                let max_value = hs.get_max_value().unwrap_or_default();\n\n                // Format min/max for content-length and age\n                if hs.header == \"content-length\" {\n                    if let Some(min_int) = hs.min_int_value {\n                        row.insert(\"minValue\".to_string(), utils::get_formatted_size(min_int, 0));\n                    } else {\n                        row.insert(\"minValue\".to_string(), String::new());\n                    }\n                    if let Some(max_int) = hs.max_int_value {\n                        row.insert(\"maxValue\".to_string(), utils::get_formatted_size(max_int, 0));\n                    } else {\n                        row.insert(\"maxValue\".to_string(), String::new());\n                    }\n                } else if hs.header == \"age\" {\n                    if let Some(min_int) = hs.min_int_value {\n                        row.insert(\"minValue\".to_string(), utils::get_formatted_age(min_int));\n                    } else {\n                        row.insert(\"minValue\".to_string(), String::new());\n                    }\n                    if let Some(max_int) = hs.max_int_value {\n                        row.insert(\"maxValue\".to_string(), utils::get_formatted_age(max_int));\n                    } else {\n                        row.insert(\"maxValue\".to_string(), String::new());\n                    }\n                } else {\n                    row.insert(\"minValue\".to_string(), min_value);\n                    row.insert(\"maxValue\".to_string(), max_value);\n                }\n\n                row\n            })\n            .collect();\n\n        let columns = vec![\n            SuperTableColumn::new(\n                \"header\".to_string(),\n                \"Header\".to_string(),\n                -1, // AUTO_WIDTH\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"occurrences\".to_string(),\n                \"Occurs\".to_string(),\n                6,\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"uniqueValues\".to_string(),\n                \"Unique\".to_string(),\n                6,\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"valuesPreview\".to_string(),\n                \"Values preview\".to_string(),\n                (console_width as i32 - 90).max(20),\n                None,\n                None,\n                true,\n                true,\n                false,\n                false,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"minValue\".to_string(),\n                \"Min value\".to_string(),\n                10,\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"maxValue\".to_string(),\n                \"Max value\".to_string(),\n                10,\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_HEADERS.to_string(),\n            \"HTTP headers\".to_string(),\n            \"No HTTP headers found.\".to_string(),\n            columns,\n            true,\n            Some(\"header\".to_string()),\n            \"ASC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        super_table.set_data(data);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_end(super_table);\n\n        let unique_count = self.header_stats.len();\n        status.add_summary_item_by_ranges(\n            \"unique-headers\",\n            unique_count as f64,\n            &[(0.0, 30.0), (31.0, 40.0), (41.0, 50.0), (51.0, f64::MAX)],\n            &[\n                \"HTTP headers - found {} unique headers\",\n                \"HTTP headers - found {} unique headers\",\n                \"HTTP headers - found {} unique headers (too many)\",\n                \"HTTP headers - found {} unique headers (too many)\",\n            ],\n        );\n\n        // Detail info with header values\n        let mut details: Vec<HashMap<String, String>> = Vec::new();\n        for header_stat in self.header_stats.values() {\n            for (value, count) in &header_stat.unique_values {\n                let mut row = HashMap::new();\n                row.insert(\"header\".to_string(), header_stat.get_formatted_header_name());\n                row.insert(\"occurrences\".to_string(), count.to_string());\n                row.insert(\"value\".to_string(), value.clone());\n                details.push(row);\n            }\n        }\n\n        // Sort by header asc, then by occurrences desc\n        details.sort_by(|a, b| {\n            let header_a = a.get(\"header\").cloned().unwrap_or_default();\n            let header_b = b.get(\"header\").cloned().unwrap_or_default();\n            if header_a == header_b {\n                let occ_a = a.get(\"occurrences\").and_then(|v| v.parse::<usize>().ok()).unwrap_or(0);\n                let occ_b = b.get(\"occurrences\").and_then(|v| v.parse::<usize>().ok()).unwrap_or(0);\n                occ_b.cmp(&occ_a)\n            } else {\n                header_a.cmp(&header_b)\n            }\n        });\n\n        let detail_columns = vec![\n            SuperTableColumn::new(\n                \"header\".to_string(),\n                \"Header\".to_string(),\n                -1, // AUTO_WIDTH\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"occurrences\".to_string(),\n                \"Occurs\".to_string(),\n                6,\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"value\".to_string(),\n                \"Value\".to_string(),\n                (console_width as i32 - 56).max(20),\n                None,\n                None,\n                true,\n                true,\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        let mut detail_table = SuperTable::new(\n            SUPER_TABLE_HEADERS_VALUES.to_string(),\n            \"HTTP header values\".to_string(),\n            \"No HTTP headers found.\".to_string(),\n            detail_columns,\n            true,\n            None,\n            \"ASC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        detail_table.set_data(details);\n        status.configure_super_table_url_stripping(&mut detail_table);\n        output.add_super_table(&detail_table);\n        status.add_super_table_at_end(detail_table);\n    }\n\n    fn analyze_visited_url(\n        &mut self,\n        visited_url: &VisitedUrl,\n        _body: Option<&str>,\n        headers: Option<&HashMap<String, String>>,\n    ) -> Option<UrlAnalysisResult> {\n        let headers = headers?;\n        if !visited_url.is_allowed_for_crawling {\n            return None;\n        }\n\n        for (header, values) in headers {\n            let header_lower = header.to_lowercase();\n            let stat = self\n                .header_stats\n                .entry(header_lower.clone())\n                .or_insert_with(|| HeaderStats::new(header_lower));\n\n            stat.add_value(values);\n        }\n\n        None\n    }\n\n    fn should_be_activated(&self) -> bool {\n        true\n    }\n\n    fn get_order(&self) -> i32 {\n        115\n    }\n\n    fn get_name(&self) -> &str {\n        \"HeadersAnalyzer\"\n    }\n\n    fn get_exec_times(&self) -> &HashMap<String, f64> {\n        self.base.get_exec_times()\n    }\n\n    fn get_exec_counts(&self) -> &HashMap<String, usize> {\n        self.base.get_exec_counts()\n    }\n}\n"
  },
  {
    "path": "src/analysis/manager.rs",
    "content": "// SiteOne Crawler - Analysis Manager\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse crate::analysis::analyzer::Analyzer;\nuse crate::analysis::result::url_analysis_result::UrlAnalysisResult;\nuse crate::output::output::Output;\nuse crate::result::manager_stats::ManagerStats;\nuse crate::result::status::Status;\nuse crate::result::visited_url::VisitedUrl;\nuse crate::utils;\n\npub const SUPER_TABLE_ANALYSIS_STATS: &str = \"analysis-stats\";\n\npub struct AnalysisManager {\n    analyzers: Vec<Box<dyn Analyzer>>,\n    stats: ManagerStats,\n}\n\nimpl AnalysisManager {\n    pub fn new() -> Self {\n        Self {\n            analyzers: Vec::new(),\n            stats: ManagerStats::new(),\n        }\n    }\n\n    /// Register all analyzer instances. Each analyzer's should_be_activated()\n    /// determines whether it is actually used.\n    pub fn register_analyzer(&mut self, analyzer: Box<dyn Analyzer>) {\n        self.analyzers.push(analyzer);\n    }\n\n    /// Auto-activate: remove analyzers that should not be activated based on options.\n    pub fn auto_activate_analyzers(&mut self) {\n        self.analyzers.retain(|a| a.should_be_activated());\n    }\n\n    /// Filter analyzers by regex pattern.\n    /// Only analyzers whose name matches the regex are kept.\n    /// Supports PCRE-style delimited patterns (e.g., /security/i).\n    pub fn filter_analyzers_by_regex(&mut self, filter_regex: &str) {\n        let pattern = utils::extract_pcre_regex_pattern(filter_regex);\n        if let Ok(re) = fancy_regex::Regex::new(&pattern) {\n            self.analyzers.retain(|a| re.is_match(a.get_name()).unwrap_or(true));\n        }\n    }\n\n    /// Run analyze_visited_url for each active analyzer.\n    /// Called per URL during the crawl.\n    pub fn analyze_visited_url(\n        &mut self,\n        visited_url: &VisitedUrl,\n        body: Option<&str>,\n        headers: Option<&HashMap<String, String>>,\n        status: &Status,\n    ) -> Vec<(String, UrlAnalysisResult)> {\n        let mut results = Vec::new();\n\n        for analyzer in &mut self.analyzers {\n            if let Some(result) = analyzer.analyze_visited_url(visited_url, body, headers) {\n                let name = analyzer.get_name().to_string();\n                status.add_url_analysis_result(\n                    &visited_url.uq_id,\n                    crate::result::status::UrlAnalysisResultEntry {\n                        analysis_name: name.clone(),\n                        result: result.clone(),\n                    },\n                );\n                results.push((name, result));\n            }\n        }\n\n        results\n    }\n\n    /// Run post-crawl analysis for all active analyzers, sorted by order.\n    pub fn run_analyzers(&mut self, status: &Status, output: &mut dyn Output) {\n        // Check if there are any working URLs\n        if status.get_number_of_working_visited_urls() == 0 {\n            let error_message =\n                \"The analysis has been suspended because no working URL could be found. Please check the URL/domain.\";\n            output.add_error(error_message);\n            status.add_critical_to_summary(\"analysis-manager-error\", error_message);\n            return;\n        }\n\n        // Sort analyzers by order\n        self.analyzers.sort_by_key(|a| a.get_order());\n\n        for analyzer in &mut self.analyzers {\n            analyzer.analyze(status, output);\n        }\n\n        // Collect and merge exec times from all analyzers\n        if !self.analyzers.is_empty() {\n            let mut all_exec_times: HashMap<String, f64> = HashMap::new();\n            let mut all_exec_counts: HashMap<String, usize> = HashMap::new();\n\n            for analyzer in &self.analyzers {\n                for (key, time) in analyzer.get_exec_times() {\n                    *all_exec_times.entry(key.clone()).or_insert(0.0) += time;\n                }\n                for (key, count) in analyzer.get_exec_counts() {\n                    *all_exec_counts.entry(key.clone()).or_insert(0) += count;\n                }\n            }\n\n            let super_table = self.stats.get_super_table(\n                SUPER_TABLE_ANALYSIS_STATS,\n                \"Analysis stats\",\n                \"No analysis stats\",\n                Some(&all_exec_times),\n                Some(&all_exec_counts),\n            );\n\n            let mut super_table = super_table;\n            status.configure_super_table_url_stripping(&mut super_table);\n            output.add_super_table(&super_table);\n            status.add_super_table_at_end(super_table);\n        }\n    }\n\n    /// Get all analyzers\n    pub fn get_analyzers(&self) -> &[Box<dyn Analyzer>] {\n        &self.analyzers\n    }\n\n    /// Check if analyzer with given name is active\n    pub fn has_analyzer(&self, name: &str) -> bool {\n        self.analyzers.iter().any(|a| a.get_name() == name)\n    }\n\n    /// Get extra columns from all analyzers that want to show results as columns.\n    /// Returns columns in registration order (alphabetical).\n    pub fn get_extra_columns(&self) -> Vec<crate::extra_column::ExtraColumn> {\n        self.analyzers\n            .iter()\n            .filter_map(|a| a.show_analyzed_visited_url_result_as_column())\n            .collect()\n    }\n\n    /// Map analysis results to extra column values for the progress table.\n    /// Returns a HashMap of column_name -> colorized_value_string.\n    pub fn get_analysis_column_values(\n        &self,\n        analysis_results: &[(String, UrlAnalysisResult)],\n    ) -> HashMap<String, String> {\n        let mut result = HashMap::new();\n\n        for analyzer in &self.analyzers {\n            if let Some(extra_col) = analyzer.show_analyzed_visited_url_result_as_column() {\n                let analyzer_name = analyzer.get_name();\n                // Find the matching result for this analyzer\n                if let Some((_, url_result)) = analysis_results.iter().find(|(name, _)| name == analyzer_name) {\n                    let colorized = url_result.to_colorized_string(true);\n                    if !colorized.is_empty() {\n                        result.insert(extra_col.name.clone(), colorized);\n                    }\n                }\n            }\n        }\n\n        result\n    }\n}\n\nimpl Default for AnalysisManager {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n"
  },
  {
    "path": "src/analysis/mod.rs",
    "content": "pub mod analyzer;\r\npub mod base_analyzer;\r\npub mod manager;\r\npub mod result;\r\n\r\n// Simple analyzers\r\npub mod caching_analyzer;\r\npub mod content_type_analyzer;\r\npub mod dns_analyzer;\r\npub mod external_links_analyzer;\r\npub mod fastest_analyzer;\r\npub mod headers_analyzer;\r\npub mod page404_analyzer;\r\npub mod redirects_analyzer;\r\npub mod skipped_urls_analyzer;\r\npub mod slowest_analyzer;\r\npub mod source_domains_analyzer;\r\n\r\n// Complex analyzers (DOM parsing / TLS inspection)\r\npub mod accessibility_analyzer;\r\npub mod best_practice_analyzer;\r\npub mod security_analyzer;\r\npub mod seo_opengraph_analyzer;\r\npub mod ssl_tls_analyzer;\r\n"
  },
  {
    "path": "src/analysis/page404_analyzer.rs",
    "content": "// SiteOne Crawler - Page404Analyzer\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse crate::analysis::analyzer::Analyzer;\nuse crate::analysis::base_analyzer::BaseAnalyzer;\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::utils;\n\nconst SUPER_TABLE_404: &str = \"404\";\n\npub struct Page404Analyzer {\n    base: BaseAnalyzer,\n}\n\nimpl Default for Page404Analyzer {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl Page404Analyzer {\n    pub fn new() -> Self {\n        Self {\n            base: BaseAnalyzer::new(),\n        }\n    }\n}\n\nimpl Analyzer for Page404Analyzer {\n    fn analyze(&mut self, status: &Status, output: &mut dyn Output) {\n        let visited_urls = status.get_visited_urls();\n\n        let urls_404: Vec<_> = visited_urls.iter().filter(|u| u.status_code == 404).cloned().collect();\n\n        let console_width = utils::get_console_width();\n        let url_column_size = ((console_width as i32 - 16) / 2).max(20);\n\n        let status_ref = status;\n        let columns = vec![\n            SuperTableColumn::new(\n                \"statusCode\".to_string(),\n                \"Status\".to_string(),\n                6,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<i32>() {\n                        utils::get_colored_status_code(v, 6)\n                    } else {\n                        value.to_string()\n                    }\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"url\".to_string(),\n                \"URL 404\".to_string(),\n                url_column_size,\n                None,\n                None,\n                true,\n                true,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"sourceUqId\".to_string(),\n                \"Found at URL\".to_string(),\n                url_column_size,\n                None,\n                None,\n                true,\n                true,\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        let data: Vec<HashMap<String, String>> = urls_404\n            .iter()\n            .map(|u| {\n                let mut row = HashMap::new();\n                row.insert(\"statusCode\".to_string(), u.status_code.to_string());\n                row.insert(\"url\".to_string(), u.url.clone());\n                let source_url = if !u.source_uq_id.is_empty() {\n                    status_ref.get_url_by_uq_id(&u.source_uq_id).unwrap_or_default()\n                } else {\n                    String::new()\n                };\n                row.insert(\"sourceUqId\".to_string(), source_url);\n                row\n            })\n            .collect();\n\n        let count_404 = data.len();\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_404.to_string(),\n            \"404 URLs\".to_string(),\n            \"No 404 URLs found.\".to_string(),\n            columns,\n            true,\n            Some(\"url\".to_string()),\n            \"ASC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        super_table.set_data(data);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_beginning(super_table);\n\n        status.add_summary_item_by_ranges(\n            \"404\",\n            count_404 as f64,\n            &[(0.0, 0.0), (1.0, 2.0), (3.0, 5.0), (6.0, f64::MAX)],\n            &[\n                \"404 OK - all pages exists, no non-existent pages found\",\n                \"404 NOTICE - {} non-existent page(s) found\",\n                \"404 WARNING - {} non-existent pages found\",\n                \"404 CRITICAL - {} non-existent pages found\",\n            ],\n        );\n    }\n\n    fn should_be_activated(&self) -> bool {\n        true\n    }\n\n    fn get_order(&self) -> i32 {\n        20\n    }\n\n    fn get_name(&self) -> &str {\n        \"Page404Analyzer\"\n    }\n\n    fn get_exec_times(&self) -> &HashMap<String, f64> {\n        self.base.get_exec_times()\n    }\n\n    fn get_exec_counts(&self) -> &HashMap<String, usize> {\n        self.base.get_exec_counts()\n    }\n}\n"
  },
  {
    "path": "src/analysis/redirects_analyzer.rs",
    "content": "// SiteOne Crawler - RedirectsAnalyzer\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse crate::analysis::analyzer::Analyzer;\nuse crate::analysis::base_analyzer::BaseAnalyzer;\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::utils;\n\nconst SUPER_TABLE_REDIRECTS: &str = \"redirects\";\n\npub struct RedirectsAnalyzer {\n    base: BaseAnalyzer,\n}\n\nimpl Default for RedirectsAnalyzer {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl RedirectsAnalyzer {\n    pub fn new() -> Self {\n        Self {\n            base: BaseAnalyzer::new(),\n        }\n    }\n}\n\nimpl Analyzer for RedirectsAnalyzer {\n    fn analyze(&mut self, status: &Status, output: &mut dyn Output) {\n        let visited_urls = status.get_visited_urls();\n\n        let url_redirects: Vec<_> = visited_urls\n            .iter()\n            .filter(|u| u.status_code >= 301 && u.status_code <= 308)\n            .cloned()\n            .collect();\n\n        let console_width = utils::get_console_width();\n        let url_column_width = ((console_width as i32 - 20) / 3).max(20);\n\n        let columns = vec![\n            SuperTableColumn::new(\n                \"statusCode\".to_string(),\n                \"Status\".to_string(),\n                6,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<i32>() {\n                        utils::get_colored_status_code(v, 6)\n                    } else {\n                        value.to_string()\n                    }\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"url\".to_string(),\n                \"Redirected URL\".to_string(),\n                url_column_width,\n                None,\n                None,\n                true,\n                true,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"targetUrl\".to_string(),\n                \"Target URL\".to_string(),\n                url_column_width,\n                None,\n                None,\n                true,\n                true,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"sourceUqId\".to_string(),\n                \"Found at URL\".to_string(),\n                url_column_width,\n                None,\n                None,\n                true,\n                true,\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        let data: Vec<HashMap<String, String>> = url_redirects\n            .iter()\n            .map(|u| {\n                let mut row = HashMap::new();\n                row.insert(\"statusCode\".to_string(), u.status_code.to_string());\n                row.insert(\"url\".to_string(), u.url.clone());\n                // Target URL from the Location header in extras\n                let target = u\n                    .extras\n                    .as_ref()\n                    .and_then(|e| e.get(\"Location\"))\n                    .cloned()\n                    .unwrap_or_else(|| \"?\".to_string());\n                row.insert(\"targetUrl\".to_string(), target);\n                let source_url = if !u.source_uq_id.is_empty() {\n                    status.get_url_by_uq_id(&u.source_uq_id).unwrap_or_default()\n                } else {\n                    String::new()\n                };\n                row.insert(\"sourceUqId\".to_string(), source_url);\n                row\n            })\n            .collect();\n\n        let count_redirects = data.len();\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_REDIRECTS.to_string(),\n            \"Redirected URLs\".to_string(),\n            \"No redirects found.\".to_string(),\n            columns,\n            true,\n            Some(\"url\".to_string()),\n            \"ASC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        super_table.set_data(data);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_beginning(super_table);\n\n        status.add_summary_item_by_ranges(\n            \"redirects\",\n            count_redirects as f64,\n            &[(0.0, 0.0), (1.0, 2.0), (3.0, 9.0), (10.0, f64::MAX)],\n            &[\n                \"Redirects - no redirects found\",\n                \"Redirects - {} redirect(s) found\",\n                \"Redirects - {} redirects found\",\n                \"Redirects - {} redirects found\",\n            ],\n        );\n    }\n\n    fn should_be_activated(&self) -> bool {\n        true\n    }\n\n    fn get_order(&self) -> i32 {\n        10\n    }\n\n    fn get_name(&self) -> &str {\n        \"RedirectsAnalyzer\"\n    }\n\n    fn get_exec_times(&self) -> &HashMap<String, f64> {\n        self.base.get_exec_times()\n    }\n\n    fn get_exec_counts(&self) -> &HashMap<String, usize> {\n        self.base.get_exec_counts()\n    }\n}\n"
  },
  {
    "path": "src/analysis/result/analyzer_stats.rs",
    "content": "// SiteOne Crawler - AnalyzerStats\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\nuse std::collections::HashMap;\r\n\r\n#[derive(Debug, Clone, Default)]\r\npub struct AnalyzerStats {\r\n    /// analysis_name -> severity -> set of subject hashes (or just counted entries)\r\n    severity_counts_per_analysis: HashMap<String, SeverityCounts>,\r\n}\r\n\r\n#[derive(Debug, Clone, Default)]\r\nstruct SeverityCounts {\r\n    ok: HashMap<String, bool>,\r\n    notice: HashMap<String, bool>,\r\n    warning: HashMap<String, bool>,\r\n    critical: HashMap<String, bool>,\r\n}\r\n\r\nimpl AnalyzerStats {\r\n    pub fn new() -> Self {\r\n        Self::default()\r\n    }\r\n\r\n    pub fn add_ok(&mut self, analysis_name: &str, subject: Option<&str>) {\r\n        self.add_result(analysis_name, \"ok\", subject);\r\n    }\r\n\r\n    pub fn add_warning(&mut self, analysis_name: &str, subject: Option<&str>) {\r\n        self.add_result(analysis_name, \"warning\", subject);\r\n    }\r\n\r\n    pub fn add_critical(&mut self, analysis_name: &str, subject: Option<&str>) {\r\n        self.add_result(analysis_name, \"critical\", subject);\r\n    }\r\n\r\n    pub fn add_notice(&mut self, analysis_name: &str, subject: Option<&str>) {\r\n        self.add_result(analysis_name, \"notice\", subject);\r\n    }\r\n\r\n    pub fn to_table_data(&self) -> Vec<HashMap<String, String>> {\r\n        let mut result = Vec::new();\r\n        for (analysis_name, counts) in &self.severity_counts_per_analysis {\r\n            let mut row = HashMap::new();\r\n            row.insert(\"analysisName\".to_string(), analysis_name.clone());\r\n            row.insert(\"ok\".to_string(), counts.ok.len().to_string());\r\n            row.insert(\"notice\".to_string(), counts.notice.len().to_string());\r\n            row.insert(\"warning\".to_string(), counts.warning.len().to_string());\r\n            row.insert(\"critical\".to_string(), counts.critical.len().to_string());\r\n            result.push(row);\r\n        }\r\n        result\r\n    }\r\n\r\n    fn add_result(&mut self, analysis_name: &str, severity: &str, subject: Option<&str>) {\r\n        let counts = self\r\n            .severity_counts_per_analysis\r\n            .entry(analysis_name.to_string())\r\n            .or_default();\r\n\r\n        let subject_hash = subject.map(|s| {\r\n            use md5::{Digest, Md5};\r\n            let mut hasher = Md5::new();\r\n            hasher.update(s.trim().as_bytes());\r\n            let result = hasher.finalize();\r\n            format!(\"{:x}\", result)[..10].to_string()\r\n        });\r\n\r\n        let map = match severity {\r\n            \"ok\" => &mut counts.ok,\r\n            \"notice\" => &mut counts.notice,\r\n            \"warning\" => &mut counts.warning,\r\n            \"critical\" => &mut counts.critical,\r\n            _ => return,\r\n        };\r\n\r\n        if let Some(hash) = subject_hash {\r\n            map.insert(hash, true);\r\n        } else {\r\n            // Use a unique key based on current count\r\n            let key = format!(\"_auto_{}\", map.len());\r\n            map.insert(key, true);\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "src/analysis/result/dns_analysis_result.rs",
    "content": "// SiteOne Crawler - DnsAnalysisResult\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\n#[derive(Debug, Clone)]\r\npub struct DnsAnalysisResult {\r\n    pub dns_server_name: String,\r\n    pub dns_server_ip_address: String,\r\n    /// DNS resolved domain names (aliases) with all CNAMEs.\r\n    /// First is the original domain name and last is the final resolved domain name.\r\n    pub resolved_domains: Vec<String>,\r\n    /// Final resolved IPv4 addresses\r\n    pub ipv4_addresses: Vec<String>,\r\n    /// Final resolved IPv6 addresses (when available)\r\n    pub ipv6_addresses: Vec<String>,\r\n}\r\n\r\nimpl DnsAnalysisResult {\r\n    pub fn new(\r\n        dns_server_name: String,\r\n        dns_server_ip_address: String,\r\n        resolved_domains: Vec<String>,\r\n        ipv4_addresses: Vec<String>,\r\n        ipv6_addresses: Vec<String>,\r\n    ) -> Self {\r\n        Self {\r\n            dns_server_name,\r\n            dns_server_ip_address,\r\n            resolved_domains,\r\n            ipv4_addresses,\r\n            ipv6_addresses,\r\n        }\r\n    }\r\n\r\n    /// Get text description of DNS analysis result in format respecting the\r\n    /// hierarchy of resolved domains/CNAMEs and IPs.\r\n    pub fn get_txt_description(&self) -> String {\r\n        let mut result = String::new();\r\n\r\n        for (i, domain) in self.resolved_domains.iter().enumerate() {\r\n            result.push_str(&\"  \".repeat(i));\r\n            result.push_str(domain);\r\n            result.push('\\n');\r\n        }\r\n\r\n        let indent = \"  \".repeat(self.resolved_domains.len());\r\n        for ip in &self.ipv4_addresses {\r\n            result.push_str(&indent);\r\n            result.push_str(&format!(\"IPv4: {}\\n\", ip));\r\n        }\r\n        for ip in &self.ipv6_addresses {\r\n            result.push_str(&indent);\r\n            result.push_str(&format!(\"IPv6: {}\\n\", ip));\r\n        }\r\n\r\n        // Add DNS server info if available (0.0.0.0 means unknown, typical for CYGWIN)\r\n        if self.dns_server_ip_address != \"0.0.0.0\" {\r\n            if self.dns_server_name != self.dns_server_ip_address {\r\n                result.push_str(&format!(\r\n                    \"\\nDNS server: {} ({})\\n\",\r\n                    self.dns_server_name, self.dns_server_ip_address\r\n                ));\r\n            } else {\r\n                result.push_str(&format!(\"\\nDNS server: {}\\n\", self.dns_server_name));\r\n            }\r\n        }\r\n\r\n        result.trim().to_string()\r\n    }\r\n}\r\n"
  },
  {
    "path": "src/analysis/result/header_stats.rs",
    "content": "// SiteOne Crawler - HeaderStats\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse crate::utils;\n\nconst MAX_UNIQUE_VALUES: usize = 20;\n\n#[derive(Debug, Clone)]\npub struct HeaderStats {\n    pub header: String,\n    pub occurrences: usize,\n    pub unique_values: HashMap<String, usize>,\n    pub unique_values_limit_reached: bool,\n    pub min_date_value: Option<String>,\n    pub max_date_value: Option<String>,\n    pub min_int_value: Option<i64>,\n    pub max_int_value: Option<i64>,\n}\n\nimpl HeaderStats {\n    pub fn new(header: String) -> Self {\n        Self {\n            header,\n            occurrences: 0,\n            unique_values: HashMap::new(),\n            unique_values_limit_reached: false,\n            min_date_value: None,\n            max_date_value: None,\n            min_int_value: None,\n            max_int_value: None,\n        }\n    }\n\n    pub fn add_value(&mut self, value: &str) {\n        self.occurrences += 1;\n\n        if self.ignore_header_values(&self.header.clone()) {\n        } else if self.is_value_for_min_max_date(&self.header.clone()) {\n            self.add_value_for_min_max_date(value);\n        } else if self.is_value_for_min_max_int(&self.header.clone()) {\n            self.add_value_for_min_max_int(value);\n        } else {\n            if self.unique_values.len() >= MAX_UNIQUE_VALUES {\n                self.unique_values_limit_reached = true;\n                return;\n            }\n            *self.unique_values.entry(value.to_string()).or_insert(0) += 1;\n        }\n    }\n\n    pub fn get_sorted_unique_values(&self) -> Vec<(&String, &usize)> {\n        let mut sorted: Vec<_> = self.unique_values.iter().collect();\n        sorted.sort_by(|a, b| b.1.cmp(a.1));\n        sorted\n    }\n\n    pub fn get_formatted_header_name(&self) -> String {\n        let words: Vec<String> = self\n            .header\n            .split('-')\n            .map(|w| {\n                let mut chars = w.chars();\n                match chars.next() {\n                    Some(c) => format!(\"{}{}\", c.to_uppercase(), chars.as_str()),\n                    None => String::new(),\n                }\n            })\n            .collect();\n        words.join(\"-\").replace(\"Xss\", \"XSS\")\n    }\n\n    pub fn is_value_for_min_max_int(&self, header: &str) -> bool {\n        header == \"content-length\" || header == \"age\"\n    }\n\n    pub fn is_value_for_min_max_date(&self, header: &str) -> bool {\n        header == \"date\" || header == \"expires\" || header == \"last-modified\"\n    }\n\n    pub fn ignore_header_values(&self, header: &str) -> bool {\n        matches!(header, \"etag\" | \"cf-ray\" | \"set-cookie\" | \"content-disposition\")\n    }\n\n    pub fn get_min_value(&self) -> Option<String> {\n        self.min_int_value\n            .map(|v| v.to_string())\n            .or_else(|| self.min_date_value.clone())\n    }\n\n    pub fn get_max_value(&self) -> Option<String> {\n        self.max_int_value\n            .map(|v| v.to_string())\n            .or_else(|| self.max_date_value.clone())\n    }\n\n    pub fn get_values_preview(&self, max_length: usize) -> String {\n        if self.unique_values.len() == 1\n            && let Some(first_value) = self.unique_values.keys().next()\n        {\n            if first_value.chars().count() > max_length {\n                return utils::truncate_in_two_thirds(first_value, max_length, \"\\u{2026}\", None);\n            }\n            return first_value.clone();\n        }\n\n        let values_length: usize = self.unique_values.keys().map(|k| k.len()).sum();\n\n        if values_length < max_length.saturating_sub(10) {\n            let mut sorted: Vec<_> = self.unique_values.iter().collect();\n            sorted.sort_by(|a, b| b.1.cmp(a.1));\n\n            let mut result = String::new();\n            for (value, count) in sorted {\n                result.push_str(&format!(\"{} ({}) / \", value, count));\n            }\n\n            let trimmed = result.trim().trim_end_matches(\" /\").to_string();\n            if trimmed.is_empty() {\n                return \"[ignored generic values]\".to_string();\n            }\n\n            return utils::truncate_in_two_thirds(&trimmed, max_length, \"\\u{2026}\", None);\n        }\n\n        \"[see values below]\".to_string()\n    }\n\n    fn add_value_for_min_max_int(&mut self, value: &str) {\n        if let Ok(int_val) = value.parse::<i64>() {\n            match self.min_int_value {\n                None => self.min_int_value = Some(int_val),\n                Some(min) if int_val < min => self.min_int_value = Some(int_val),\n                _ => {}\n            }\n            match self.max_int_value {\n                None => self.max_int_value = Some(int_val),\n                Some(max) if int_val > max => self.max_int_value = Some(int_val),\n                _ => {}\n            }\n        }\n    }\n\n    fn add_value_for_min_max_date(&mut self, value: &str) {\n        // Try to parse HTTP date format into a simple YYYY-MM-DD string\n        if let Ok(dt) = chrono::DateTime::parse_from_rfc2822(value) {\n            let date = dt.format(\"%Y-%m-%d\").to_string();\n            match &self.min_date_value {\n                None => self.min_date_value = Some(date.clone()),\n                Some(min) if &date < min => self.min_date_value = Some(date.clone()),\n                _ => {}\n            }\n            match &self.max_date_value {\n                None => self.max_date_value = Some(date),\n                Some(max) if &date > max => self.max_date_value = Some(date),\n                _ => {}\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "src/analysis/result/heading_tree_item.rs",
    "content": "// SiteOne Crawler - HeadingTreeItem\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nfn html_escape(s: &str) -> String {\n    s.replace('&', \"&amp;\")\n        .replace('<', \"&lt;\")\n        .replace('>', \"&gt;\")\n        .replace('\"', \"&quot;\")\n        .replace('\\'', \"&#39;\")\n}\n\n#[derive(Debug, Clone)]\npub struct HeadingTreeItem {\n    /// Heading level (1-6)\n    pub level: i32,\n    /// Real heading level by heading structure in HTML\n    pub real_level: Option<i32>,\n    /// Heading text\n    pub text: String,\n    /// Heading ID attribute\n    pub id: Option<String>,\n    /// Children headings\n    pub children: Vec<HeadingTreeItem>,\n    /// Error text in case of error (typically multiple H1s or wrong heading level)\n    pub error_text: Option<String>,\n}\n\nimpl HeadingTreeItem {\n    pub fn new(level: i32, text: String, id: Option<String>) -> Self {\n        Self {\n            level,\n            real_level: None,\n            text,\n            id,\n            children: Vec::new(),\n            error_text: None,\n        }\n    }\n\n    pub fn has_error(&self) -> bool {\n        self.error_text.is_some()\n    }\n\n    /// Get heading tree as a plain text list\n    pub fn get_heading_tree_txt_list(items: &[HeadingTreeItem]) -> String {\n        let mut result = String::new();\n        for item in items {\n            result.push_str(&Self::get_heading_tree_txt(item, true));\n        }\n        // Collapse whitespace\n        let re = regex::Regex::new(r\"\\s+\").unwrap_or_else(|_| regex::Regex::new(\".^\").unwrap());\n        re.replace_all(&result, \" \").trim().to_string()\n    }\n\n    fn get_heading_tree_txt(item: &HeadingTreeItem, add_item: bool) -> String {\n        let mut result = String::new();\n        if add_item {\n            result.push_str(&format!(\"<h{}> {}\", item.level, item.text));\n            if let Some(ref id) = item.id {\n                result.push_str(&format!(\" [#{}]\", id));\n            }\n            result.push('\\n');\n        }\n        for child in &item.children {\n            result.push_str(&\"  \".repeat((child.level - 1) as usize));\n            result.push_str(&format!(\"<h{}> {}\", child.level, child.text));\n            if let Some(ref id) = child.id {\n                result.push_str(&format!(\" [#{}]\", id));\n            }\n            result.push('\\n');\n            result.push_str(&Self::get_heading_tree_txt(child, false));\n        }\n        result\n    }\n\n    /// Get heading tree as an HTML `<ul><li>` list.\n    pub fn get_heading_tree_ul_li_list(items: &[HeadingTreeItem]) -> String {\n        let mut result = String::from(\"<ul>\");\n        for item in items {\n            result.push_str(\"<li>\");\n            result.push_str(&Self::get_heading_tree_ul_li(item, true));\n            result.push_str(\"</li>\");\n        }\n        result.push_str(\"</ul>\");\n        result\n    }\n\n    fn get_heading_tree_ul_li(item: &HeadingTreeItem, add_item: bool) -> String {\n        let mut result = String::new();\n        if add_item {\n            let txt_row = format!(\n                \"&lt;h{}&gt; {}{}\",\n                item.level,\n                html_escape(&item.text),\n                item.id\n                    .as_ref()\n                    .map(|id| format!(\" [#{}]\", html_escape(id)))\n                    .unwrap_or_default()\n            );\n            if item.has_error() {\n                let error_text = html_escape(item.error_text.as_deref().unwrap_or(\"\"));\n                let colored = crate::utils::get_color_text(&txt_row, \"magenta\", false);\n                let colored_html = crate::utils::convert_bash_colors_in_text_to_html(&colored);\n                result.push_str(&format!(\n                    \"<span class=\\\"help\\\" title=\\\"{}\\\">{}</span>\",\n                    error_text, colored_html\n                ));\n            } else {\n                result.push_str(&txt_row);\n            }\n        }\n\n        if !item.children.is_empty() {\n            result.push_str(\"<ul>\");\n            for child in &item.children {\n                result.push_str(\"<li>\");\n                let txt_row = format!(\n                    \"&lt;h{}&gt; {}{}\",\n                    child.level,\n                    html_escape(&child.text),\n                    child\n                        .id\n                        .as_ref()\n                        .map(|id| format!(\" [#{}]\", html_escape(id)))\n                        .unwrap_or_default()\n                );\n                if child.has_error() {\n                    let error_text = html_escape(child.error_text.as_deref().unwrap_or(\"\"));\n                    let colored = crate::utils::get_color_text(&txt_row, \"magenta\", false);\n                    let colored_html = crate::utils::convert_bash_colors_in_text_to_html(&colored);\n                    result.push_str(&format!(\n                        \"<span class=\\\"help\\\" title=\\\"{}\\\">{}</span>\",\n                        error_text, colored_html\n                    ));\n                } else {\n                    result.push_str(&txt_row);\n                }\n                result.push_str(&Self::get_heading_tree_ul_li(child, false));\n                result.push_str(\"</li>\");\n            }\n            result.push_str(\"</ul>\");\n        }\n        result\n    }\n\n    /// Count total headings in tree\n    pub fn get_headings_count(items: &[HeadingTreeItem]) -> usize {\n        let mut count = 0;\n        for item in items {\n            count += 1;\n            count += Self::get_headings_count(&item.children);\n        }\n        count\n    }\n\n    /// Count headings with errors in tree\n    pub fn get_headings_with_error_count(items: &[HeadingTreeItem]) -> usize {\n        let mut count = 0;\n        for item in items {\n            if item.has_error() {\n                count += 1;\n            }\n            count += Self::get_headings_with_error_count(&item.children);\n        }\n        count\n    }\n}\n"
  },
  {
    "path": "src/analysis/result/mod.rs",
    "content": "pub mod analyzer_stats;\r\npub mod dns_analysis_result;\r\npub mod header_stats;\r\npub mod heading_tree_item;\r\npub mod security_checked_header;\r\npub mod security_result;\r\npub mod seo_opengraph_result;\r\npub mod url_analysis_result;\r\n"
  },
  {
    "path": "src/analysis/result/security_checked_header.rs",
    "content": "// SiteOne Crawler - SecurityCheckedHeader\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\npub const SEVERITY_OK: i32 = 1;\npub const SEVERITY_NOTICE: i32 = 2;\npub const SEVERITY_WARNING: i32 = 3;\npub const SEVERITY_CRITICAL: i32 = 4;\n\n#[derive(Debug, Clone)]\npub struct SecurityCheckedHeader {\n    pub header: String,\n    pub highest_severity: Option<i32>,\n    /// severity -> count\n    pub count_per_severity: HashMap<i32, usize>,\n    /// All unique values of this header\n    pub values: Vec<String>,\n    pub recommendations: Vec<String>,\n}\n\nimpl SecurityCheckedHeader {\n    pub fn new(header: String) -> Self {\n        Self {\n            header,\n            highest_severity: None,\n            count_per_severity: HashMap::new(),\n            values: Vec::new(),\n            recommendations: Vec::new(),\n        }\n    }\n\n    pub fn set_finding(&mut self, value: Option<&str>, severity: i32, recommendation: Option<&str>) {\n        if let Some(val) = value\n            && !self.values.contains(&val.to_string())\n        {\n            self.values.push(val.to_string());\n        }\n        if let Some(rec) = recommendation\n            && !self.recommendations.contains(&rec.to_string())\n        {\n            self.recommendations.push(rec.to_string());\n        }\n        if self.highest_severity.is_none() || severity > self.highest_severity.unwrap_or(0) {\n            self.highest_severity = Some(severity);\n        }\n        *self.count_per_severity.entry(severity).or_insert(0) += 1;\n    }\n\n    pub fn get_formatted_header(&self) -> String {\n        let words: Vec<String> = self\n            .header\n            .split('-')\n            .map(|w| {\n                let mut chars = w.chars();\n                match chars.next() {\n                    Some(c) => format!(\"{}{}\", c.to_uppercase(), chars.as_str()),\n                    None => String::new(),\n                }\n            })\n            .collect();\n        words.join(\"-\").replace(\"Xss\", \"XSS\")\n    }\n\n    pub fn get_severity_name(&self) -> &'static str {\n        match self.highest_severity {\n            Some(SEVERITY_OK) => \"ok\",\n            Some(SEVERITY_NOTICE) => \"notice\",\n            Some(SEVERITY_WARNING) => \"warning\",\n            Some(SEVERITY_CRITICAL) => \"critical\",\n            _ => \"unknown\",\n        }\n    }\n}\n"
  },
  {
    "path": "src/analysis/result/security_result.rs",
    "content": "// SiteOne Crawler - SecurityResult\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse indexmap::IndexMap;\n\nuse super::security_checked_header::{SEVERITY_OK, SecurityCheckedHeader};\n\n#[derive(Debug, Clone, Default)]\npub struct SecurityResult {\n    pub checked_headers: IndexMap<String, SecurityCheckedHeader>,\n}\n\nimpl SecurityResult {\n    pub fn new() -> Self {\n        Self::default()\n    }\n\n    pub fn get_checked_header(&mut self, header: &str) -> &mut SecurityCheckedHeader {\n        self.checked_headers\n            .entry(header.to_string())\n            .or_insert_with(|| SecurityCheckedHeader::new(header.to_string()))\n    }\n\n    pub fn get_highest_severity(&self) -> i32 {\n        let mut highest = SEVERITY_OK;\n        for item in self.checked_headers.values() {\n            if let Some(sev) = item.highest_severity\n                && sev > highest\n            {\n                highest = sev;\n            }\n        }\n        highest\n    }\n}\n"
  },
  {
    "path": "src/analysis/result/seo_opengraph_result.rs",
    "content": "// SiteOne Crawler - SeoAndOpenGraphResult\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\nuse super::heading_tree_item::HeadingTreeItem;\r\n\r\npub const ROBOTS_INDEX: i32 = 1;\r\npub const ROBOTS_NOINDEX: i32 = 0;\r\npub const ROBOTS_FOLLOW: i32 = 1;\r\npub const ROBOTS_NOFOLLOW: i32 = 2;\r\n\r\n#[derive(Debug, Clone)]\r\npub struct SeoAndOpenGraphResult {\r\n    pub url_uq_id: String,\r\n    pub url_path_and_query: String,\r\n\r\n    pub title: Option<String>,\r\n    pub description: Option<String>,\r\n    pub keywords: Option<String>,\r\n    pub h1: Option<String>,\r\n\r\n    pub robots_index: Option<i32>,\r\n    pub robots_follow: Option<i32>,\r\n    pub denied_by_robots_txt: bool,\r\n\r\n    pub og_title: Option<String>,\r\n    pub og_type: Option<String>,\r\n    pub og_image: Option<String>,\r\n    pub og_url: Option<String>,\r\n    pub og_description: Option<String>,\r\n    pub og_site_name: Option<String>,\r\n\r\n    pub twitter_card: Option<String>,\r\n    pub twitter_site: Option<String>,\r\n    pub twitter_creator: Option<String>,\r\n    pub twitter_title: Option<String>,\r\n    pub twitter_description: Option<String>,\r\n    pub twitter_image: Option<String>,\r\n\r\n    pub heading_tree_items: Vec<HeadingTreeItem>,\r\n    pub headings_count: usize,\r\n    pub headings_errors_count: usize,\r\n}\r\n\r\nimpl SeoAndOpenGraphResult {\r\n    pub fn new(url_uq_id: String, url_path_and_query: String) -> Self {\r\n        Self {\r\n            url_uq_id,\r\n            url_path_and_query,\r\n            title: None,\r\n            description: None,\r\n            keywords: None,\r\n            h1: None,\r\n            robots_index: None,\r\n            robots_follow: None,\r\n            denied_by_robots_txt: false,\r\n            og_title: None,\r\n            og_type: None,\r\n            og_image: None,\r\n            og_url: None,\r\n            og_description: None,\r\n            og_site_name: None,\r\n            twitter_card: None,\r\n            twitter_site: None,\r\n            twitter_creator: None,\r\n            twitter_title: None,\r\n            twitter_description: None,\r\n            twitter_image: None,\r\n            heading_tree_items: Vec::new(),\r\n            headings_count: 0,\r\n            headings_errors_count: 0,\r\n        }\r\n    }\r\n\r\n    /// Check if URL is denied by robots.txt\r\n    pub fn is_denied_by_robots_txt(url_path_and_query: &str, robots_txt_content: &str) -> bool {\r\n        if robots_txt_content.is_empty() {\r\n            return false;\r\n        }\r\n\r\n        // Remove query string from URL\r\n        let url_path = if let Some(pos) = url_path_and_query.find('?') {\r\n            &url_path_and_query[..pos]\r\n        } else {\r\n            url_path_and_query\r\n        };\r\n\r\n        // Remove scheme and host from URL if present\r\n        let url_path = if url_path.contains(\"://\") {\r\n            if let Ok(parsed) = url::Url::parse(url_path) {\r\n                parsed.path().to_string()\r\n            } else {\r\n                url_path.to_string()\r\n            }\r\n        } else {\r\n            url_path.to_string()\r\n        };\r\n\r\n        for line in robots_txt_content.lines() {\r\n            let line = line.trim();\r\n            if let Some(disallowed_path) = line.strip_prefix(\"Disallow:\") {\r\n                let disallowed_path = disallowed_path.trim();\r\n                if !disallowed_path.is_empty() && url_path.starts_with(disallowed_path) {\r\n                    return true;\r\n                }\r\n            }\r\n        }\r\n\r\n        false\r\n    }\r\n}\r\n"
  },
  {
    "path": "src/analysis/result/url_analysis_result.rs",
    "content": "// SiteOne Crawler - UrlAnalysisResult\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse crate::utils;\n\n#[derive(Debug, Clone, Default)]\npub struct UrlAnalysisResult {\n    ok: Vec<String>,\n    notice: Vec<String>,\n    warning: Vec<String>,\n    critical: Vec<String>,\n\n    ok_details: HashMap<String, Vec<String>>,\n    notice_details: HashMap<String, Vec<String>>,\n    warning_details: HashMap<String, Vec<String>>,\n    critical_details: HashMap<String, Vec<String>>,\n\n    /// Stats per analysis and severity: analysis_name -> severity -> count\n    stats_per_analysis: HashMap<String, HashMap<String, usize>>,\n}\n\nimpl UrlAnalysisResult {\n    pub fn new() -> Self {\n        Self::default()\n    }\n\n    pub fn add_ok(&mut self, message: String, analysis_name: &str, detail: Option<Vec<String>>) {\n        self.ok.push(message);\n        if let Some(d) = detail {\n            self.ok_details.entry(analysis_name.to_string()).or_default().extend(d);\n        }\n        *self\n            .stats_per_analysis\n            .entry(analysis_name.to_string())\n            .or_default()\n            .entry(\"ok\".to_string())\n            .or_insert(0) += 1;\n    }\n\n    pub fn add_notice(&mut self, message: String, analysis_name: &str, detail: Option<Vec<String>>) {\n        self.notice.push(message);\n        if let Some(d) = detail {\n            self.notice_details\n                .entry(analysis_name.to_string())\n                .or_default()\n                .extend(d);\n        }\n        *self\n            .stats_per_analysis\n            .entry(analysis_name.to_string())\n            .or_default()\n            .entry(\"notice\".to_string())\n            .or_insert(0) += 1;\n    }\n\n    pub fn add_warning(&mut self, message: String, analysis_name: &str, detail: Option<Vec<String>>) {\n        self.warning.push(message);\n        if let Some(d) = detail {\n            self.warning_details\n                .entry(analysis_name.to_string())\n                .or_default()\n                .extend(d);\n        }\n        *self\n            .stats_per_analysis\n            .entry(analysis_name.to_string())\n            .or_default()\n            .entry(\"warning\".to_string())\n            .or_insert(0) += 1;\n    }\n\n    pub fn add_critical(&mut self, message: String, analysis_name: &str, detail: Option<Vec<String>>) {\n        self.critical.push(message);\n        if let Some(d) = detail {\n            self.critical_details\n                .entry(analysis_name.to_string())\n                .or_default()\n                .extend(d);\n        }\n        *self\n            .stats_per_analysis\n            .entry(analysis_name.to_string())\n            .or_default()\n            .entry(\"critical\".to_string())\n            .or_insert(0) += 1;\n    }\n\n    pub fn get_stats_per_analysis(&self) -> &HashMap<String, HashMap<String, usize>> {\n        &self.stats_per_analysis\n    }\n\n    pub fn get_ok(&self) -> &[String] {\n        &self.ok\n    }\n\n    pub fn get_notice(&self) -> &[String] {\n        &self.notice\n    }\n\n    pub fn get_warning(&self) -> &[String] {\n        &self.warning\n    }\n\n    pub fn get_critical(&self) -> &[String] {\n        &self.critical\n    }\n\n    pub fn get_ok_details(&self) -> &HashMap<String, Vec<String>> {\n        &self.ok_details\n    }\n\n    pub fn get_notice_details(&self) -> &HashMap<String, Vec<String>> {\n        &self.notice_details\n    }\n\n    pub fn get_warning_details(&self) -> &HashMap<String, Vec<String>> {\n        &self.warning_details\n    }\n\n    pub fn get_critical_details(&self) -> &HashMap<String, Vec<String>> {\n        &self.critical_details\n    }\n\n    pub fn get_all_count(&self) -> usize {\n        self.ok.len() + self.notice.len() + self.warning.len() + self.critical.len()\n    }\n\n    pub fn get_details_of_severity_and_analysis_name(&self, severity: &str, analysis_name: &str) -> Vec<String> {\n        match severity {\n            \"ok\" => self.ok_details.get(analysis_name).cloned().unwrap_or_default(),\n            \"notice\" => self.notice_details.get(analysis_name).cloned().unwrap_or_default(),\n            \"warning\" => self.warning_details.get(analysis_name).cloned().unwrap_or_default(),\n            \"critical\" => self.critical_details.get(analysis_name).cloned().unwrap_or_default(),\n            _ => Vec::new(),\n        }\n    }\n\n    pub fn to_icon_string(&self) -> String {\n        let mut result = String::new();\n\n        let count_critical = self.critical.len();\n        let count_warning = self.warning.len();\n        let count_notice = self.notice.len();\n        let count_ok = self.ok.len();\n\n        if count_critical > 0 {\n            result.push_str(&format!(\"{}\\u{26d4} \", count_critical));\n        }\n        if count_warning > 0 {\n            result.push_str(&format!(\"{}\\u{26a0} \", count_warning));\n        }\n        if count_notice > 0 {\n            result.push_str(&format!(\"{}\\u{2139}\\u{fe0f} \", count_notice));\n        }\n        if count_ok > 0 {\n            result.push_str(&format!(\"{}\\u{2705} \", count_ok));\n        }\n\n        result.trim().to_string()\n    }\n\n    pub fn to_colorized_string(&self, strip_whitespaces: bool) -> String {\n        let mut result = String::new();\n\n        let count_critical = self.critical.len();\n        let count_warning = self.warning.len();\n        let count_notice = self.notice.len();\n        let count_ok = self.ok.len();\n\n        if count_critical > 0 {\n            result.push_str(&utils::get_color_text(&count_critical.to_string(), \"red\", true));\n            result.push_str(\" / \");\n        }\n        if count_warning > 0 {\n            result.push_str(&utils::get_color_text(&count_warning.to_string(), \"magenta\", false));\n            result.push_str(\" / \");\n        }\n        if count_notice > 0 {\n            result.push_str(&utils::get_color_text(&count_notice.to_string(), \"blue\", false));\n            result.push_str(\" / \");\n        }\n        if count_ok > 0 {\n            result.push_str(&utils::get_color_text(&count_ok.to_string(), \"green\", false));\n            result.push_str(\" / \");\n        }\n\n        let trimmed = result.trim_end_matches(\" / \").to_string();\n        if strip_whitespaces {\n            trimmed.replace(' ', \"\")\n        } else {\n            trimmed\n        }\n    }\n\n    pub fn to_not_colorized_string(&self, strip_whitespaces: bool) -> String {\n        let mut result = String::new();\n\n        let count_critical = self.critical.len();\n        let count_warning = self.warning.len();\n        let count_notice = self.notice.len();\n        let count_ok = self.ok.len();\n\n        if count_critical > 0 {\n            result.push_str(&format!(\"{} / \", count_critical));\n        }\n        if count_warning > 0 {\n            result.push_str(&format!(\"{} / \", count_warning));\n        }\n        if count_notice > 0 {\n            result.push_str(&format!(\"{} / \", count_notice));\n        }\n        if count_ok > 0 {\n            result.push_str(&format!(\"{} / \", count_ok));\n        }\n\n        let trimmed = result.trim_end_matches(\" / \").to_string();\n        if strip_whitespaces {\n            trimmed.replace(' ', \"\")\n        } else {\n            trimmed\n        }\n    }\n\n    pub fn get_all_details_for_analysis(&self, analysis_name: &str) -> HashMap<String, Vec<String>> {\n        let mut result = HashMap::new();\n        result.insert(\n            \"ok\".to_string(),\n            self.ok_details.get(analysis_name).cloned().unwrap_or_default(),\n        );\n        result.insert(\n            \"notice\".to_string(),\n            self.notice_details.get(analysis_name).cloned().unwrap_or_default(),\n        );\n        result.insert(\n            \"warning\".to_string(),\n            self.warning_details.get(analysis_name).cloned().unwrap_or_default(),\n        );\n        result.insert(\n            \"critical\".to_string(),\n            self.critical_details.get(analysis_name).cloned().unwrap_or_default(),\n        );\n        result\n    }\n}\n\nimpl std::fmt::Display for UrlAnalysisResult {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}\", self.to_colorized_string(true))\n    }\n}\n"
  },
  {
    "path": "src/analysis/security_analyzer.rs",
    "content": "// SiteOne Crawler - SecurityAnalyzer\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\nuse std::time::Instant;\n\nuse regex::Regex;\n\nuse crate::analysis::analyzer::Analyzer;\nuse crate::analysis::base_analyzer::BaseAnalyzer;\nuse crate::analysis::result::security_checked_header::{\n    SEVERITY_CRITICAL, SEVERITY_NOTICE, SEVERITY_OK, SEVERITY_WARNING,\n};\nuse crate::analysis::result::security_result::SecurityResult;\nuse crate::analysis::result::url_analysis_result::UrlAnalysisResult;\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::result::visited_url::VisitedUrl;\nuse crate::types::ContentTypeId;\nuse crate::utils;\n\nconst SUPER_TABLE_SECURITY: &str = \"security\";\nconst ANALYSIS_HEADERS: &str = \"Security headers\";\n\nconst HEADER_ACCESS_CONTROL_ALLOW_ORIGIN: &str = \"access-control-allow-origin\";\nconst HEADER_STRICT_TRANSPORT_SECURITY: &str = \"strict-transport-security\";\nconst HEADER_X_FRAME_OPTIONS: &str = \"x-frame-options\";\nconst HEADER_X_XSS_PROTECTION: &str = \"x-xss-protection\";\nconst HEADER_X_CONTENT_TYPE_OPTIONS: &str = \"x-content-type-options\";\nconst HEADER_REFERRER_POLICY: &str = \"referrer-policy\";\nconst HEADER_CONTENT_SECURITY_POLICY: &str = \"content-security-policy\";\nconst HEADER_FEATURE_POLICY: &str = \"feature-policy\";\nconst HEADER_PERMISSIONS_POLICY: &str = \"permissions-policy\";\nconst HEADER_SERVER: &str = \"server\";\nconst HEADER_X_POWERED_BY: &str = \"x-powered-by\";\nconst HEADER_SET_COOKIE: &str = \"set-cookie\";\n\nconst CHECKED_HEADERS: &[&str] = &[\n    HEADER_ACCESS_CONTROL_ALLOW_ORIGIN,\n    HEADER_STRICT_TRANSPORT_SECURITY,\n    HEADER_X_FRAME_OPTIONS,\n    HEADER_X_XSS_PROTECTION,\n    HEADER_X_CONTENT_TYPE_OPTIONS,\n    HEADER_REFERRER_POLICY,\n    HEADER_CONTENT_SECURITY_POLICY,\n    HEADER_FEATURE_POLICY,\n    HEADER_PERMISSIONS_POLICY,\n    HEADER_SERVER,\n    HEADER_X_POWERED_BY,\n    HEADER_SET_COOKIE,\n];\n\npub struct SecurityAnalyzer {\n    base: BaseAnalyzer,\n    result: SecurityResult,\n    pages_with_critical: usize,\n    pages_with_warning: usize,\n    pages_with_notice: usize,\n}\n\nimpl Default for SecurityAnalyzer {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl SecurityAnalyzer {\n    pub fn new() -> Self {\n        Self {\n            base: BaseAnalyzer::new(),\n            result: SecurityResult::new(),\n            pages_with_critical: 0,\n            pages_with_warning: 0,\n            pages_with_notice: 0,\n        }\n    }\n\n    fn check_headers(&mut self, headers: &HashMap<String, String>, is_https: bool, url_result: &mut UrlAnalysisResult) {\n        for &header in CHECKED_HEADERS {\n            match header {\n                HEADER_ACCESS_CONTROL_ALLOW_ORIGIN => {\n                    self.check_access_control_allow_origin(headers, url_result);\n                }\n                HEADER_STRICT_TRANSPORT_SECURITY => {\n                    if is_https {\n                        self.check_strict_transport_security(headers, url_result);\n                    }\n                }\n                HEADER_X_FRAME_OPTIONS => {\n                    self.check_x_frame_options(headers, url_result);\n                }\n                HEADER_X_XSS_PROTECTION => {\n                    self.check_x_xss_protection(headers, url_result);\n                }\n                HEADER_X_CONTENT_TYPE_OPTIONS => {\n                    self.check_x_content_type_options(headers, url_result);\n                }\n                HEADER_REFERRER_POLICY => {\n                    self.check_referrer_policy(headers, url_result);\n                }\n                HEADER_CONTENT_SECURITY_POLICY => {\n                    self.check_content_security_policy(headers, url_result);\n                }\n                HEADER_FEATURE_POLICY => {\n                    self.check_feature_policy(headers, url_result);\n                }\n                HEADER_PERMISSIONS_POLICY => {\n                    self.check_permissions_policy(headers, url_result);\n                }\n                HEADER_SERVER => {\n                    self.check_server(headers, url_result);\n                }\n                HEADER_X_POWERED_BY => {\n                    self.check_x_powered_by(headers, url_result);\n                }\n                HEADER_SET_COOKIE => {\n                    self.check_set_cookie(headers, is_https, url_result);\n                }\n                _ => {}\n            }\n        }\n    }\n\n    fn check_html_security(&mut self, html: &str, is_https: bool, url_result: &mut UrlAnalysisResult) {\n        if !is_https {\n            return;\n        }\n\n        use once_cell::sync::Lazy;\n        static RE_FORM_HTTP: Lazy<Regex> =\n            Lazy::new(|| Regex::new(r#\"(?i)<form[^>]*action=[\"']http://[^\"']+[\"'][^>]*>\"#).unwrap());\n        static RE_IFRAME_HTTP: Lazy<Regex> =\n            Lazy::new(|| Regex::new(r#\"(?i)<iframe[^>]*src=[\"']http://[^\"']+[\"'][^>]*>\"#).unwrap());\n\n        // Check for form actions over non-secure HTTP\n        for mat in RE_FORM_HTTP.find_iter(html) {\n            let finding = format!(\n                \"Form actions that send data over non-secure HTTP detected in {}\",\n                mat.as_str()\n            );\n            url_result.add_critical(finding.clone(), ANALYSIS_HEADERS, Some(vec![finding]));\n        }\n\n        // Check for iframes with non-secure HTTP\n        for mat in RE_IFRAME_HTTP.find_iter(html) {\n            let finding = format!(\"Iframe with non-secure HTTP detected in {}\", mat.as_str());\n            url_result.add_critical(finding.clone(), ANALYSIS_HEADERS, Some(vec![finding]));\n        }\n    }\n\n    fn get_header_value(headers: &HashMap<String, String>, header: &str) -> Option<String> {\n        headers.get(header).map(|s| s.to_string())\n    }\n\n    fn check_access_control_allow_origin(\n        &mut self,\n        headers: &HashMap<String, String>,\n        url_result: &mut UrlAnalysisResult,\n    ) {\n        let value = Self::get_header_value(headers, HEADER_ACCESS_CONTROL_ALLOW_ORIGIN);\n\n        let value_ref = value.as_deref();\n        match value_ref {\n            None => {}\n            Some(\"*\") => {\n                let rec = \"Access-Control-Allow-Origin is set to '*' which allows any origin to access the resource. This can be a security risk.\";\n                url_result.add_warning(rec.to_string(), ANALYSIS_HEADERS, Some(vec![rec.to_string()]));\n                self.result\n                    .get_checked_header(HEADER_ACCESS_CONTROL_ALLOW_ORIGIN)\n                    .set_finding(value_ref, SEVERITY_WARNING, Some(rec));\n            }\n            Some(v) if v != \"same-origin\" && v != \"none\" => {\n                let rec = format!(\n                    \"Access-Control-Allow-Origin is set to '{}' which allows this origin to access the resource.\",\n                    v\n                );\n                url_result.add_notice(rec.clone(), ANALYSIS_HEADERS, Some(vec![rec.clone()]));\n                self.result\n                    .get_checked_header(HEADER_ACCESS_CONTROL_ALLOW_ORIGIN)\n                    .set_finding(value_ref, SEVERITY_NOTICE, Some(&rec));\n            }\n            _ => {\n                self.result\n                    .get_checked_header(HEADER_ACCESS_CONTROL_ALLOW_ORIGIN)\n                    .set_finding(value_ref, SEVERITY_OK, None);\n            }\n        }\n    }\n\n    fn check_strict_transport_security(\n        &mut self,\n        headers: &HashMap<String, String>,\n        url_result: &mut UrlAnalysisResult,\n    ) {\n        let value = Self::get_header_value(headers, HEADER_STRICT_TRANSPORT_SECURITY);\n        let value_ref = value.as_deref();\n\n        match value_ref {\n            None => {\n                let rec = \"Strict-Transport-Security header is not set. It enforces secure connections and protects against MITM attacks.\";\n                url_result.add_critical(rec.to_string(), ANALYSIS_HEADERS, Some(vec![rec.to_string()]));\n                self.result\n                    .get_checked_header(HEADER_STRICT_TRANSPORT_SECURITY)\n                    .set_finding(None, SEVERITY_CRITICAL, Some(rec));\n            }\n            Some(v) if v.contains(\"max-age=0\") => {\n                let rec = \"Strict-Transport-Security header is set to max-age=0 which disables HSTS. This can be a security risk.\";\n                url_result.add_critical(rec.to_string(), ANALYSIS_HEADERS, Some(vec![rec.to_string()]));\n                self.result\n                    .get_checked_header(HEADER_STRICT_TRANSPORT_SECURITY)\n                    .set_finding(value_ref, SEVERITY_CRITICAL, Some(rec));\n            }\n            Some(v) => {\n                use once_cell::sync::Lazy;\n                static RE_MAX_AGE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)max-age=([0-9]+)\").unwrap());\n                if let Some(caps) = RE_MAX_AGE.captures(v)\n                    && let Some(age_str) = caps.get(1)\n                    && let Ok(age) = age_str.as_str().parse::<i64>()\n                    && age < 31 * 24 * 60 * 60\n                {\n                    let rec = format!(\n                        \"Strict-Transport-Security header is set to max-age={} which is less than 31 days. This can be a security risk.\",\n                        age\n                    );\n                    url_result.add_warning(rec.clone(), ANALYSIS_HEADERS, Some(vec![rec.clone()]));\n                    self.result\n                        .get_checked_header(HEADER_STRICT_TRANSPORT_SECURITY)\n                        .set_finding(value_ref, SEVERITY_WARNING, Some(&rec));\n                    return;\n                }\n                self.result\n                    .get_checked_header(HEADER_STRICT_TRANSPORT_SECURITY)\n                    .set_finding(value_ref, SEVERITY_OK, None);\n            }\n        }\n    }\n\n    fn check_x_frame_options(&mut self, headers: &HashMap<String, String>, url_result: &mut UrlAnalysisResult) {\n        let value = Self::get_header_value(headers, HEADER_X_FRAME_OPTIONS);\n        let value_ref = value.as_deref();\n\n        match value_ref {\n            None => {\n                let rec = \"X-Frame-Options header is not set. It prevents clickjacking attacks when set to 'deny' or 'sameorigin.\";\n                url_result.add_warning(rec.to_string(), ANALYSIS_HEADERS, Some(vec![rec.to_string()]));\n                self.result\n                    .get_checked_header(HEADER_X_FRAME_OPTIONS)\n                    .set_finding(None, SEVERITY_WARNING, Some(rec));\n            }\n            Some(\"DENY\") => {\n                self.result\n                    .get_checked_header(HEADER_X_FRAME_OPTIONS)\n                    .set_finding(value_ref, SEVERITY_OK, None);\n            }\n            Some(\"SAMEORIGIN\") => {\n                let rec = \"X-Frame-Options header is set to SAMEORIGIN which allows this origin to embed the resource in a frame.\";\n                url_result.add_notice(rec.to_string(), ANALYSIS_HEADERS, Some(vec![rec.to_string()]));\n                self.result.get_checked_header(HEADER_X_FRAME_OPTIONS).set_finding(\n                    value_ref,\n                    SEVERITY_NOTICE,\n                    Some(rec),\n                );\n            }\n            Some(\"ALLOW-FROM\") => {\n                let rec = \"X-Frame-Options header is set to ALLOW-FROM which allows this origin to embed the resource in a frame.\";\n                url_result.add_notice(rec.to_string(), ANALYSIS_HEADERS, Some(vec![rec.to_string()]));\n                self.result.get_checked_header(HEADER_X_FRAME_OPTIONS).set_finding(\n                    value_ref,\n                    SEVERITY_NOTICE,\n                    Some(rec),\n                );\n            }\n            Some(v) => {\n                let rec = format!(\n                    \"X-Frame-Options header is set to '{}' which allows this origin to embed the resource in a frame. This can be a security risk.\",\n                    v\n                );\n                url_result.add_warning(rec.clone(), ANALYSIS_HEADERS, Some(vec![rec.clone()]));\n                self.result.get_checked_header(HEADER_X_FRAME_OPTIONS).set_finding(\n                    value_ref,\n                    SEVERITY_WARNING,\n                    Some(&rec),\n                );\n            }\n        }\n    }\n\n    fn check_x_xss_protection(&mut self, headers: &HashMap<String, String>, url_result: &mut UrlAnalysisResult) {\n        let value = Self::get_header_value(headers, HEADER_X_XSS_PROTECTION);\n        let value_ref = value.as_deref();\n\n        // X-XSS-Protection is deprecated (MDN) and non-standard. Modern browsers have removed\n        // XSS auditor support. The recommended approach is to use Content-Security-Policy instead.\n        // Not setting this header is the correct modern behavior.\n        match value_ref {\n            None | Some(\"0\") => {\n                // Not set or explicitly disabled — correct modern behavior\n                self.result\n                    .get_checked_header(HEADER_X_XSS_PROTECTION)\n                    .set_finding(value_ref, SEVERITY_OK, None);\n            }\n            Some(\"1\") | Some(\"1; mode=block\") | Some(\"1;mode=block\") => {\n                let rec = \"X-XSS-Protection header is set but deprecated. Consider removing it and using Content-Security-Policy instead.\";\n                url_result.add_notice(rec.to_string(), ANALYSIS_HEADERS, Some(vec![rec.to_string()]));\n                self.result.get_checked_header(HEADER_X_XSS_PROTECTION).set_finding(\n                    value_ref,\n                    SEVERITY_NOTICE,\n                    Some(rec),\n                );\n            }\n            Some(v) => {\n                let rec = format!(\n                    \"X-XSS-Protection header is set to '{}'. This header is deprecated; use Content-Security-Policy instead.\",\n                    v\n                );\n                url_result.add_notice(rec.clone(), ANALYSIS_HEADERS, Some(vec![rec.clone()]));\n                self.result.get_checked_header(HEADER_X_XSS_PROTECTION).set_finding(\n                    value_ref,\n                    SEVERITY_NOTICE,\n                    Some(&rec),\n                );\n            }\n        }\n    }\n\n    fn check_x_content_type_options(&mut self, headers: &HashMap<String, String>, url_result: &mut UrlAnalysisResult) {\n        let value = Self::get_header_value(headers, HEADER_X_CONTENT_TYPE_OPTIONS);\n        let value_ref = value.as_deref();\n\n        match value_ref {\n            None => {\n                let rec = \"X-Content-Type-Options header is not set. It stops MIME type sniffing and mitigates content type attacks.\";\n                url_result.add_warning(rec.to_string(), ANALYSIS_HEADERS, Some(vec![rec.to_string()]));\n                self.result\n                    .get_checked_header(HEADER_X_CONTENT_TYPE_OPTIONS)\n                    .set_finding(None, SEVERITY_WARNING, Some(rec));\n            }\n            Some(\"nosniff\") => {\n                self.result\n                    .get_checked_header(HEADER_X_CONTENT_TYPE_OPTIONS)\n                    .set_finding(value_ref, SEVERITY_OK, None);\n            }\n            Some(v) => {\n                let rec = format!(\n                    \"X-Content-Type-Options header is set to '{}'. This can be a security risk.\",\n                    v\n                );\n                url_result.add_warning(rec.clone(), ANALYSIS_HEADERS, Some(vec![rec.clone()]));\n                self.result\n                    .get_checked_header(HEADER_X_CONTENT_TYPE_OPTIONS)\n                    .set_finding(value_ref, SEVERITY_WARNING, Some(&rec));\n            }\n        }\n    }\n\n    fn check_referrer_policy(&mut self, headers: &HashMap<String, String>, url_result: &mut UrlAnalysisResult) {\n        let value = Self::get_header_value(headers, HEADER_REFERRER_POLICY);\n        let value_ref = value.as_deref();\n\n        let ok_values = [\n            \"no-referrer\",\n            \"no-referrer-when-downgrade\",\n            \"origin\",\n            \"origin-when-cross-origin\",\n            \"same-origin\",\n            \"strict-origin\",\n            \"strict-origin-when-cross-origin\",\n            \"unsafe-url\",\n        ];\n\n        match value_ref {\n            None => {\n                let rec = \"Referrer-Policy header is not set. It controls referrer header sharing and enhances privacy and security.\";\n                url_result.add_warning(rec.to_string(), ANALYSIS_HEADERS, Some(vec![rec.to_string()]));\n                self.result\n                    .get_checked_header(HEADER_REFERRER_POLICY)\n                    .set_finding(None, SEVERITY_WARNING, Some(rec));\n            }\n            Some(v) if ok_values.contains(&v) => {\n                self.result\n                    .get_checked_header(HEADER_REFERRER_POLICY)\n                    .set_finding(value_ref, SEVERITY_OK, None);\n            }\n            Some(v) => {\n                let rec = format!(\"Referrer-Policy header is set to '{}'. This can be a security risk.\", v);\n                url_result.add_notice(rec.clone(), ANALYSIS_HEADERS, Some(vec![rec.clone()]));\n                self.result.get_checked_header(HEADER_REFERRER_POLICY).set_finding(\n                    value_ref,\n                    SEVERITY_NOTICE,\n                    Some(&rec),\n                );\n            }\n        }\n    }\n\n    fn check_content_security_policy(&mut self, headers: &HashMap<String, String>, url_result: &mut UrlAnalysisResult) {\n        let value = Self::get_header_value(headers, HEADER_CONTENT_SECURITY_POLICY);\n        let value_ref = value.as_deref();\n\n        match value_ref {\n            None => {\n                let rec = \"Content-Security-Policy header is not set. It restricts resources the page can load and prevents XSS attacks.\";\n                url_result.add_critical(rec.to_string(), ANALYSIS_HEADERS, Some(vec![rec.to_string()]));\n                self.result\n                    .get_checked_header(HEADER_CONTENT_SECURITY_POLICY)\n                    .set_finding(None, SEVERITY_CRITICAL, Some(rec));\n            }\n            _ => {\n                self.result\n                    .get_checked_header(HEADER_CONTENT_SECURITY_POLICY)\n                    .set_finding(value_ref, SEVERITY_OK, None);\n            }\n        }\n    }\n\n    fn check_feature_policy(&mut self, headers: &HashMap<String, String>, url_result: &mut UrlAnalysisResult) {\n        let value = Self::get_header_value(headers, HEADER_FEATURE_POLICY);\n        let value_ref = value.as_deref();\n\n        let has_permissions_policy = Self::get_header_value(headers, HEADER_PERMISSIONS_POLICY).is_some();\n\n        match value_ref {\n            None if has_permissions_policy => {\n                let rec = \"Feature-Policy header is not set but Permissions-Policy is set. That's enough.\";\n                url_result.add_notice(rec.to_string(), ANALYSIS_HEADERS, Some(vec![rec.to_string()]));\n                self.result\n                    .get_checked_header(HEADER_FEATURE_POLICY)\n                    .set_finding(None, SEVERITY_NOTICE, Some(rec));\n            }\n            None => {\n                let rec = \"Feature-Policy header is not set. It allows enabling/disabling browser APIs and features for security. Not important if Permissions-Policy is set.\";\n                url_result.add_warning(rec.to_string(), ANALYSIS_HEADERS, Some(vec![rec.to_string()]));\n                self.result\n                    .get_checked_header(HEADER_FEATURE_POLICY)\n                    .set_finding(None, SEVERITY_WARNING, Some(rec));\n            }\n            _ => {\n                self.result\n                    .get_checked_header(HEADER_FEATURE_POLICY)\n                    .set_finding(value_ref, SEVERITY_OK, None);\n            }\n        }\n    }\n\n    fn check_permissions_policy(&mut self, headers: &HashMap<String, String>, url_result: &mut UrlAnalysisResult) {\n        let value = Self::get_header_value(headers, HEADER_PERMISSIONS_POLICY);\n        let value_ref = value.as_deref();\n\n        let has_feature_policy = Self::get_header_value(headers, HEADER_FEATURE_POLICY).is_some();\n\n        match value_ref {\n            None if has_feature_policy => {\n                let rec = \"Permissions-Policy header is not set but Feature-Policy is. We recommend transforming it to this newer header.\";\n                url_result.add_warning(rec.to_string(), ANALYSIS_HEADERS, Some(vec![rec.to_string()]));\n                self.result.get_checked_header(HEADER_PERMISSIONS_POLICY).set_finding(\n                    None,\n                    SEVERITY_WARNING,\n                    Some(rec),\n                );\n            }\n            None => {\n                let rec = \"Permissions-Policy header is not set. It allows enabling/disabling browser APIs and features for security.\";\n                url_result.add_warning(rec.to_string(), ANALYSIS_HEADERS, Some(vec![rec.to_string()]));\n                self.result.get_checked_header(HEADER_PERMISSIONS_POLICY).set_finding(\n                    None,\n                    SEVERITY_WARNING,\n                    Some(rec),\n                );\n            }\n            _ => {\n                self.result\n                    .get_checked_header(HEADER_PERMISSIONS_POLICY)\n                    .set_finding(value_ref, SEVERITY_OK, None);\n            }\n        }\n    }\n\n    fn check_server(&mut self, headers: &HashMap<String, String>, url_result: &mut UrlAnalysisResult) {\n        let value = Self::get_header_value(headers, HEADER_SERVER);\n        let value_ref = value.as_deref();\n\n        let known_values = [\"Apache\", \"nginx\", \"Microsoft-IIS\"];\n\n        let check_for_known = |v: &str| -> bool {\n            known_values\n                .iter()\n                .any(|kv| v.to_lowercase().contains(&kv.to_lowercase()))\n        };\n\n        let is_empty_or_whitespace = value_ref\n            .map(|v| v.trim_matches(|c: char| \" /-.~:\".contains(c)).is_empty())\n            .unwrap_or(true);\n\n        if value_ref.is_none() || is_empty_or_whitespace {\n            let rec = \"Server header is not set or empty. This is recommended.\";\n            url_result.add_notice(rec.to_string(), ANALYSIS_HEADERS, Some(vec![rec.to_string()]));\n            self.result\n                .get_checked_header(HEADER_SERVER)\n                .set_finding(value_ref, SEVERITY_OK, Some(rec));\n        } else if let Some(v) = value_ref {\n            let has_version = v.chars().any(|c| c.is_ascii_digit());\n\n            if has_version {\n                let rec = format!(\n                    \"Server header is set to '{}'. It is better not to reveal the technologies used and especially their versions.\",\n                    v\n                );\n                url_result.add_critical(rec.clone(), ANALYSIS_HEADERS, Some(vec![rec.clone()]));\n                self.result\n                    .get_checked_header(HEADER_SERVER)\n                    .set_finding(value_ref, SEVERITY_CRITICAL, Some(&rec));\n            } else if check_for_known(v) {\n                let rec = format!(\n                    \"Server header is set to known '{}'. It is better not to reveal used technologies.\",\n                    v\n                );\n                url_result.add_notice(rec.clone(), ANALYSIS_HEADERS, Some(vec![rec.clone()]));\n                self.result\n                    .get_checked_header(HEADER_SERVER)\n                    .set_finding(value_ref, SEVERITY_WARNING, Some(&rec));\n            } else {\n                let rec = format!(\n                    \"Server header is set to '{}'. It is better not to reveal used technologies.\",\n                    v\n                );\n                url_result.add_notice(rec.clone(), ANALYSIS_HEADERS, Some(vec![rec.clone()]));\n                self.result\n                    .get_checked_header(HEADER_SERVER)\n                    .set_finding(value_ref, SEVERITY_NOTICE, Some(&rec));\n            }\n        }\n    }\n\n    fn check_x_powered_by(&mut self, headers: &HashMap<String, String>, url_result: &mut UrlAnalysisResult) {\n        let value = Self::get_header_value(headers, HEADER_X_POWERED_BY);\n        let value_ref = value.as_deref();\n\n        if let Some(v) = value_ref {\n            let has_version = v.chars().any(|c| c.is_ascii_digit());\n\n            if has_version {\n                let rec = format!(\n                    \"X-Powered-By header is set to '{}'. It is better not to reveal the technologies used and especially their versions.\",\n                    v\n                );\n                url_result.add_critical(rec.clone(), ANALYSIS_HEADERS, Some(vec![rec.clone()]));\n                self.result.get_checked_header(HEADER_X_POWERED_BY).set_finding(\n                    value_ref,\n                    SEVERITY_CRITICAL,\n                    Some(&rec),\n                );\n            } else {\n                let rec = format!(\n                    \"X-Powered-By header is set to '{}'. It is better not to reveal used technologies.\",\n                    v\n                );\n                url_result.add_warning(rec.clone(), ANALYSIS_HEADERS, Some(vec![rec.clone()]));\n                self.result.get_checked_header(HEADER_X_POWERED_BY).set_finding(\n                    value_ref,\n                    SEVERITY_WARNING,\n                    Some(&rec),\n                );\n            }\n        }\n    }\n\n    fn check_set_cookie(\n        &mut self,\n        headers: &HashMap<String, String>,\n        is_https: bool,\n        url_result: &mut UrlAnalysisResult,\n    ) {\n        let value = match headers.get(HEADER_SET_COOKIE) {\n            Some(v) => v,\n            None => return,\n        };\n\n        // Multiple cookies may be separated by newlines or exist as a single value\n        for cookie in value.split('\\n') {\n            let cookie = cookie.trim();\n            if !cookie.is_empty() {\n                self.check_set_cookie_value(cookie, is_https, url_result);\n            }\n        }\n    }\n\n    fn check_set_cookie_value(&mut self, set_cookie: &str, is_https: bool, url_result: &mut UrlAnalysisResult) {\n        let mut severity = SEVERITY_OK;\n        let cookie_name = set_cookie.split('=').next().unwrap_or(\"unknown\");\n\n        let set_cookie_lower = set_cookie.to_lowercase();\n\n        if !set_cookie_lower.contains(\"samesite\") {\n            severity = SEVERITY_NOTICE;\n            let rec = format!(\n                \"Set-Cookie header for '{}' does not have 'SameSite' flag. Consider using 'SameSite=Strict' or 'SameSite=Lax'.\",\n                cookie_name\n            );\n            url_result.add_notice(rec.clone(), ANALYSIS_HEADERS, Some(vec![rec.clone()]));\n        }\n        if !set_cookie_lower.contains(\"httponly\") {\n            severity = SEVERITY_WARNING;\n            let rec = format!(\n                \"Set-Cookie header for '{}' does not have 'HttpOnly' flag. Attacker can steal the cookie using XSS. Consider using 'HttpOnly' when cookie is not used by JavaScript.\",\n                cookie_name\n            );\n            url_result.add_warning(rec.clone(), ANALYSIS_HEADERS, Some(vec![rec.clone()]));\n        }\n        if is_https && !set_cookie_lower.contains(\"secure\") {\n            severity = SEVERITY_CRITICAL;\n            let rec = format!(\n                \"Set-Cookie header for '{}' does not have 'Secure' flag. Attacker can steal the cookie over HTTP.\",\n                cookie_name\n            );\n            url_result.add_critical(rec.clone(), ANALYSIS_HEADERS, Some(vec![rec.clone()]));\n        }\n\n        self.result\n            .get_checked_header(HEADER_SET_COOKIE)\n            .set_finding(Some(cookie_name), severity, None);\n    }\n\n    fn set_findings_to_summary(&mut self, status: &Status) {\n        self.pages_with_critical = 0;\n        self.pages_with_warning = 0;\n        self.pages_with_notice = 0;\n\n        for header in self.result.checked_headers.values() {\n            self.pages_with_critical += header.count_per_severity.get(&SEVERITY_CRITICAL).copied().unwrap_or(0);\n            self.pages_with_warning += header.count_per_severity.get(&SEVERITY_WARNING).copied().unwrap_or(0);\n            self.pages_with_notice += header.count_per_severity.get(&SEVERITY_NOTICE).copied().unwrap_or(0);\n        }\n\n        if self.pages_with_critical > 0 {\n            status.add_critical_to_summary(\n                \"security\",\n                &format!(\n                    \"Security - {} pages(s) with critical finding(s).\",\n                    self.pages_with_critical\n                ),\n            );\n        } else if self.pages_with_warning > 0 {\n            status.add_warning_to_summary(\n                \"security\",\n                &format!(\"Security - {} pages(s) with warning(s).\", self.pages_with_warning),\n            );\n        } else if self.pages_with_notice > 0 {\n            status.add_notice_to_summary(\n                \"security\",\n                &format!(\"Security - {} pages(s) with notice(s).\", self.pages_with_notice),\n            );\n        } else {\n            status.add_ok_to_summary(\"security\", \"Security - no findings.\");\n        }\n    }\n}\n\nimpl Analyzer for SecurityAnalyzer {\n    fn analyze(&mut self, status: &Status, output: &mut dyn Output) {\n        let console_width = utils::get_console_width();\n        let recommendation_col_width = (console_width as i32 - 70).max(20);\n\n        let columns = vec![\n            SuperTableColumn::new(\n                \"header\".to_string(),\n                \"Header\".to_string(),\n                26,\n                None,\n                None,\n                true,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"ok\".to_string(),\n                \"OK\".to_string(),\n                5,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<usize>()\n                        && v > 0\n                    {\n                        return utils::get_color_text(&v.to_string(), \"green\", false);\n                    }\n                    \"0\".to_string()\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"notice\".to_string(),\n                \"Notice\".to_string(),\n                6,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<usize>()\n                        && v > 0\n                    {\n                        return utils::get_color_text(&v.to_string(), \"blue\", false);\n                    }\n                    \"0\".to_string()\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"warning\".to_string(),\n                \"Warning\".to_string(),\n                7,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<usize>()\n                        && v > 0\n                    {\n                        return utils::get_color_text(&v.to_string(), \"magenta\", true);\n                    }\n                    \"0\".to_string()\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"critical\".to_string(),\n                \"Critical\".to_string(),\n                8,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<usize>()\n                        && v > 0\n                    {\n                        return utils::get_color_text(&v.to_string(), \"red\", true);\n                    }\n                    \"0\".to_string()\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"recommendation\".to_string(),\n                \"Recommendation\".to_string(),\n                recommendation_col_width,\n                None,\n                None,\n                true,\n                true,\n                false,\n                false,\n                None,\n            ),\n        ];\n\n        let mut data: Vec<HashMap<String, String>> = Vec::new();\n        for header in self.result.checked_headers.values() {\n            let mut row = HashMap::new();\n            row.insert(\"header\".to_string(), header.get_formatted_header());\n            row.insert(\n                \"highestSeverity\".to_string(),\n                header.highest_severity.unwrap_or(0).to_string(),\n            );\n            row.insert(\n                \"ok\".to_string(),\n                header\n                    .count_per_severity\n                    .get(&SEVERITY_OK)\n                    .copied()\n                    .unwrap_or(0)\n                    .to_string(),\n            );\n            row.insert(\n                \"notice\".to_string(),\n                header\n                    .count_per_severity\n                    .get(&SEVERITY_NOTICE)\n                    .copied()\n                    .unwrap_or(0)\n                    .to_string(),\n            );\n            row.insert(\n                \"warning\".to_string(),\n                header\n                    .count_per_severity\n                    .get(&SEVERITY_WARNING)\n                    .copied()\n                    .unwrap_or(0)\n                    .to_string(),\n            );\n            row.insert(\n                \"critical\".to_string(),\n                header\n                    .count_per_severity\n                    .get(&SEVERITY_CRITICAL)\n                    .copied()\n                    .unwrap_or(0)\n                    .to_string(),\n            );\n            row.insert(\"recommendation\".to_string(), header.recommendations.join(\". \"));\n            data.push(row);\n        }\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_SECURITY.to_string(),\n            \"Security\".to_string(),\n            \"Nothing to report.\".to_string(),\n            columns,\n            true,\n            Some(\"highestSeverity\".to_string()),\n            \"DESC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        super_table.set_data(data);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_end(super_table);\n\n        self.set_findings_to_summary(status);\n    }\n\n    fn analyze_visited_url(\n        &mut self,\n        visited_url: &VisitedUrl,\n        body: Option<&str>,\n        headers: Option<&HashMap<String, String>>,\n    ) -> Option<UrlAnalysisResult> {\n        if !visited_url.is_allowed_for_crawling\n            || visited_url.content_type != ContentTypeId::Html\n            || visited_url.looks_like_static_file_by_url()\n        {\n            return None;\n        }\n\n        let mut result = UrlAnalysisResult::new();\n\n        let start = Instant::now();\n        if let Some(hdrs) = headers {\n            self.check_headers(hdrs, visited_url.is_https(), &mut result);\n        }\n        self.base.measure_exec_time(\"SecurityAnalyzer\", \"checkHeaders\", start);\n\n        if let Some(html) = body\n            && !html.trim().is_empty()\n        {\n            let start2 = Instant::now();\n            self.check_html_security(html, visited_url.is_https(), &mut result);\n            self.base\n                .measure_exec_time(\"SecurityAnalyzer\", \"checkHtmlSecurity\", start2);\n        }\n\n        Some(result)\n    }\n\n    fn should_be_activated(&self) -> bool {\n        true\n    }\n\n    fn get_order(&self) -> i32 {\n        215\n    }\n\n    fn get_name(&self) -> &str {\n        \"SecurityAnalyzer\"\n    }\n\n    fn get_exec_times(&self) -> &HashMap<String, f64> {\n        self.base.get_exec_times()\n    }\n\n    fn get_exec_counts(&self) -> &HashMap<String, usize> {\n        self.base.get_exec_counts()\n    }\n}\n"
  },
  {
    "path": "src/analysis/seo_opengraph_analyzer.rs",
    "content": "// SiteOne Crawler - SeoAndOpenGraphAnalyzer\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\nuse std::time::Instant;\n\nuse scraper::{Html, Selector};\n\nuse crate::analysis::analyzer::Analyzer;\nuse crate::analysis::base_analyzer::BaseAnalyzer;\nuse crate::analysis::result::heading_tree_item::HeadingTreeItem;\nuse crate::analysis::result::seo_opengraph_result::{ROBOTS_NOINDEX, SeoAndOpenGraphResult};\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::result::visited_url::VisitedUrl;\nuse crate::types::ContentTypeId;\nuse crate::utils;\n\nconst SUPER_TABLE_SEO: &str = \"seo\";\nconst SUPER_TABLE_OPEN_GRAPH: &str = \"open-graph\";\nconst SUPER_TABLE_SEO_HEADINGS: &str = \"seo-headings\";\n\npub struct SeoAndOpenGraphAnalyzer {\n    base: BaseAnalyzer,\n    max_heading_level: i32,\n    has_og_tags: bool,\n    has_twitter_tags: bool,\n}\n\nimpl Default for SeoAndOpenGraphAnalyzer {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl SeoAndOpenGraphAnalyzer {\n    pub fn new() -> Self {\n        Self {\n            base: BaseAnalyzer::new(),\n            max_heading_level: 3,\n            has_og_tags: false,\n            has_twitter_tags: false,\n        }\n    }\n\n    /// Set configuration from CoreOptions.\n    pub fn set_config(&mut self, max_heading_level: i32) {\n        self.max_heading_level = max_heading_level;\n    }\n\n    fn get_seo_and_opengraph_results(&self, status: &Status) -> Vec<SeoAndOpenGraphResult> {\n        let visited_urls = status.get_visited_urls();\n        let html_urls: Vec<&VisitedUrl> = visited_urls\n            .iter()\n            .filter(|u| u.status_code == 200 && u.is_allowed_for_crawling && u.content_type == ContentTypeId::Html)\n            .collect();\n\n        let mut results = Vec::new();\n\n        for visited_url in html_urls {\n            let html_body = match status.get_url_body_text(&visited_url.uq_id) {\n                Some(body) => body,\n                None => continue,\n            };\n\n            let url_path_and_query = get_url_path_and_query(&visited_url.url);\n            let mut url_result = SeoAndOpenGraphResult::new(visited_url.uq_id.clone(), url_path_and_query);\n\n            let document = Html::parse_document(&html_body);\n            extract_seo_metadata(&document, &mut url_result);\n            extract_opengraph_metadata(&document, &mut url_result);\n            extract_twitter_metadata(&document, &mut url_result);\n            build_heading_tree(&document, &mut url_result, self.max_heading_level);\n\n            results.push(url_result);\n        }\n\n        results\n    }\n\n    fn analyze_seo(&self, url_results: &[SeoAndOpenGraphResult], status: &Status, output: &mut dyn Output) {\n        let console_width = utils::get_console_width();\n        let url_col_width = 50;\n        let indexing_col_width = 20;\n        let common_col_count = 4;\n        let spaces_and_pipes = 6 * 3;\n        let common_col_width =\n            ((console_width as i32 - url_col_width - indexing_col_width - spaces_and_pipes) / common_col_count).max(10);\n\n        let columns = vec![\n            SuperTableColumn::new(\n                \"urlPathAndQuery\".to_string(),\n                \"URL\".to_string(),\n                url_col_width,\n                None,\n                None,\n                true,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"indexing\".to_string(),\n                \"Indexing\".to_string(),\n                indexing_col_width,\n                None,\n                Some(Box::new(|row: &HashMap<String, String>, _render_into: &str| {\n                    let denied = row.get(\"deniedByRobotsTxt\").map(|v| v == \"true\").unwrap_or(false);\n                    let robots_index = row.get(\"robotsIndex\").and_then(|v| v.parse::<i32>().ok()).unwrap_or(1);\n\n                    if denied {\n                        utils::get_color_text(\"DENY (robots.txt)\", \"magenta\", false)\n                    } else if robots_index == ROBOTS_NOINDEX {\n                        utils::get_color_text(\"DENY (meta)\", \"magenta\", false)\n                    } else {\n                        \"Allowed\".to_string()\n                    }\n                })),\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"title\".to_string(),\n                \"Title\".to_string(),\n                common_col_width,\n                None,\n                None,\n                true,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"h1\".to_string(),\n                \"H1\".to_string(),\n                common_col_width,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if value.is_empty() {\n                        utils::get_color_text(\"Missing H1\", \"red\", true)\n                    } else {\n                        value.to_string()\n                    }\n                })),\n                None,\n                true,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"description\".to_string(),\n                \"Description\".to_string(),\n                common_col_width,\n                None,\n                None,\n                true,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"keywords\".to_string(),\n                \"Keywords\".to_string(),\n                common_col_width,\n                None,\n                None,\n                true,\n                false,\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        let data = seo_results_to_table_data(url_results);\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_SEO.to_string(),\n            \"SEO metadata\".to_string(),\n            \"No URLs.\".to_string(),\n            columns,\n            true,\n            Some(\"urlPathAndQuery\".to_string()),\n            \"ASC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        super_table.set_visibility_in_console(true, Some(10));\n        super_table.set_data(data);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_beginning(super_table);\n    }\n\n    fn analyze_open_graph(&self, url_results: &[SeoAndOpenGraphResult], status: &Status, output: &mut dyn Output) {\n        let console_width = utils::get_console_width();\n        let url_col_width = 50;\n        let image_col_width = 18;\n        let image_col_count = (if self.has_og_tags { 1 } else { 0 }) + (if self.has_twitter_tags { 1 } else { 0 });\n        let common_col_count = (if self.has_og_tags { 2 } else { 0 }) + (if self.has_twitter_tags { 2 } else { 0 });\n        let spaces_and_pipes = (1 + image_col_count + common_col_count) * 3;\n        let common_col_width =\n            ((console_width as i32 - url_col_width - (image_col_count * image_col_width) - spaces_and_pipes)\n                / common_col_count.max(1))\n            .max(10);\n\n        let mut columns = vec![SuperTableColumn::new(\n            \"urlPathAndQuery\".to_string(),\n            \"URL\".to_string(),\n            url_col_width,\n            None,\n            None,\n            true,\n            false,\n            false,\n            true,\n            None,\n        )];\n\n        if self.has_og_tags {\n            columns.push(SuperTableColumn::new(\n                \"ogTitle\".to_string(),\n                \"OG Title\".to_string(),\n                common_col_width,\n                None,\n                None,\n                true,\n                false,\n                false,\n                true,\n                None,\n            ));\n            columns.push(SuperTableColumn::new(\n                \"ogDescription\".to_string(),\n                \"OG Description\".to_string(),\n                common_col_width,\n                None,\n                None,\n                true,\n                false,\n                false,\n                true,\n                None,\n            ));\n            columns.push(SuperTableColumn::new(\n                \"ogImage\".to_string(),\n                \"OG Image\".to_string(),\n                image_col_width,\n                None,\n                None,\n                true,\n                false,\n                false,\n                true,\n                None,\n            ));\n        }\n\n        if self.has_twitter_tags {\n            columns.push(SuperTableColumn::new(\n                \"twitterTitle\".to_string(),\n                \"Twitter Title\".to_string(),\n                common_col_width,\n                None,\n                None,\n                true,\n                false,\n                false,\n                true,\n                None,\n            ));\n            columns.push(SuperTableColumn::new(\n                \"twitterDescription\".to_string(),\n                \"Twitter Description\".to_string(),\n                common_col_width,\n                None,\n                None,\n                true,\n                false,\n                false,\n                true,\n                None,\n            ));\n            columns.push(SuperTableColumn::new(\n                \"twitterImage\".to_string(),\n                \"Twitter Image\".to_string(),\n                image_col_width,\n                None,\n                None,\n                true,\n                false,\n                false,\n                true,\n                None,\n            ));\n        }\n\n        let data = if self.has_og_tags || self.has_twitter_tags {\n            og_results_to_table_data(url_results)\n        } else {\n            Vec::new()\n        };\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_OPEN_GRAPH.to_string(),\n            \"OpenGraph metadata\".to_string(),\n            \"No URLs with OpenGraph data (og:* or twitter:* meta tags).\".to_string(),\n            columns,\n            true,\n            Some(\"urlPathAndQuery\".to_string()),\n            \"ASC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        super_table.set_visibility_in_console(true, Some(10));\n        super_table.set_data(data);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_beginning(super_table);\n    }\n\n    fn analyze_headings(&self, url_results: &[SeoAndOpenGraphResult], status: &Status, output: &mut dyn Output) {\n        let console_width = utils::get_console_width();\n        let url_col_width = 30;\n        let heading_col_width = (console_width as i32 - url_col_width - 24).max(20);\n\n        let columns = vec![\n            SuperTableColumn::new(\n                \"headings\".to_string(),\n                \"Heading structure\".to_string(),\n                heading_col_width,\n                None,\n                Some(Box::new(|row: &HashMap<String, String>, render_into: &str| {\n                    if render_into == \"html\" {\n                        row.get(\"headingsHtml\").cloned().unwrap_or_default()\n                    } else {\n                        row.get(\"headings\").cloned().unwrap_or_default()\n                    }\n                })),\n                true,\n                false,\n                false,\n                false,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"headingsCount\".to_string(),\n                \"Count\".to_string(),\n                5,\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"headingsErrorsCount\".to_string(),\n                \"Errors\".to_string(),\n                6,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<usize>() {\n                        if v > 0 {\n                            return utils::get_color_text(&v.to_string(), \"red\", true);\n                        }\n                        return utils::get_color_text(&v.to_string(), \"green\", true);\n                    }\n                    value.to_string()\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"urlPathAndQuery\".to_string(),\n                \"URL\".to_string(),\n                url_col_width,\n                None,\n                None,\n                true,\n                false,\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        let data = headings_to_table_data(url_results);\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_SEO_HEADINGS.to_string(),\n            \"Heading structure\".to_string(),\n            \"No URLs to analyze heading structure.\".to_string(),\n            columns,\n            true,\n            Some(\"headingsErrorsCount\".to_string()),\n            \"DESC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        super_table.set_visibility_in_console(true, Some(10));\n        super_table.set_data(data);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_beginning(super_table);\n    }\n}\n\nimpl Analyzer for SeoAndOpenGraphAnalyzer {\n    fn analyze(&mut self, status: &Status, output: &mut dyn Output) {\n        let url_results = self.get_seo_and_opengraph_results(status);\n\n        // Check for OG and Twitter tags\n        for r in &url_results {\n            if self.has_og_tags && self.has_twitter_tags {\n                break;\n            }\n            if r.og_title.is_some() || r.og_description.is_some() || r.og_image.is_some() {\n                self.has_og_tags = true;\n            }\n            if r.twitter_card.is_some()\n                || r.twitter_title.is_some()\n                || r.twitter_description.is_some()\n                || r.twitter_image.is_some()\n            {\n                self.has_twitter_tags = true;\n            }\n        }\n\n        let s = Instant::now();\n        self.analyze_seo(&url_results, status, output);\n        self.base.measure_exec_time(\"SeoAndOpenGraphAnalyzer\", \"analyzeSeo\", s);\n\n        let s = Instant::now();\n        self.analyze_open_graph(&url_results, status, output);\n        self.base\n            .measure_exec_time(\"SeoAndOpenGraphAnalyzer\", \"analyzeOpenGraph\", s);\n\n        let s = Instant::now();\n        self.analyze_headings(&url_results, status, output);\n        self.base\n            .measure_exec_time(\"SeoAndOpenGraphAnalyzer\", \"analyzeHeadings\", s);\n    }\n\n    fn should_be_activated(&self) -> bool {\n        true\n    }\n\n    fn get_order(&self) -> i32 {\n        113\n    }\n\n    fn get_name(&self) -> &str {\n        \"SeoAndOpenGraphAnalyzer\"\n    }\n\n    fn get_exec_times(&self) -> &HashMap<String, f64> {\n        self.base.get_exec_times()\n    }\n\n    fn get_exec_counts(&self) -> &HashMap<String, usize> {\n        self.base.get_exec_counts()\n    }\n}\n\nfn get_url_path_and_query(url: &str) -> String {\n    if let Ok(parsed) = url::Url::parse(url) {\n        let path = parsed.path().to_string();\n        if let Some(query) = parsed.query() {\n            format!(\"{}?{}\", path, query)\n        } else {\n            path\n        }\n    } else {\n        url.to_string()\n    }\n}\n\nfn extract_seo_metadata(document: &Html, result: &mut SeoAndOpenGraphResult) {\n    // Title\n    if let Ok(sel) = Selector::parse(\"title\")\n        && let Some(el) = document.select(&sel).next()\n    {\n        let text = el.text().collect::<String>().trim().to_string();\n        if !text.is_empty() {\n            result.title = Some(text);\n        }\n    }\n\n    // Meta description\n    if let Ok(sel) = Selector::parse(\"meta[name='description']\")\n        && let Some(el) = document.select(&sel).next()\n        && let Some(content) = el.value().attr(\"content\")\n    {\n        result.description = Some(content.to_string());\n    }\n\n    // Meta keywords\n    if let Ok(sel) = Selector::parse(\"meta[name='keywords']\")\n        && let Some(el) = document.select(&sel).next()\n        && let Some(content) = el.value().attr(\"content\")\n    {\n        result.keywords = Some(content.to_string());\n    }\n\n    // H1\n    if let Ok(sel) = Selector::parse(\"h1\")\n        && let Some(el) = document.select(&sel).next()\n    {\n        let text = el.text().collect::<String>().trim().to_string();\n        if !text.is_empty() {\n            result.h1 = Some(text);\n        }\n    }\n\n    // Robots meta\n    if let Ok(sel) = Selector::parse(\"meta[name='robots']\")\n        && let Some(el) = document.select(&sel).next()\n        && let Some(content) = el.value().attr(\"content\")\n    {\n        let content_lower = content.to_lowercase();\n        if content_lower.contains(\"noindex\") {\n            result.robots_index = Some(ROBOTS_NOINDEX);\n        }\n        if content_lower.contains(\"nofollow\") {\n            result.robots_follow = Some(crate::analysis::result::seo_opengraph_result::ROBOTS_NOFOLLOW);\n        }\n    }\n}\n\nfn extract_opengraph_metadata(document: &Html, result: &mut SeoAndOpenGraphResult) {\n    // Extract OG tags\n    if let Ok(sel) = Selector::parse(\"meta[property='og:title']\")\n        && let Some(el) = document.select(&sel).next()\n    {\n        result.og_title = el.value().attr(\"content\").map(|s| s.to_string());\n    }\n    if let Ok(sel) = Selector::parse(\"meta[property='og:description']\")\n        && let Some(el) = document.select(&sel).next()\n    {\n        result.og_description = el.value().attr(\"content\").map(|s| s.to_string());\n    }\n    if let Ok(sel) = Selector::parse(\"meta[property='og:image']\")\n        && let Some(el) = document.select(&sel).next()\n    {\n        result.og_image = el.value().attr(\"content\").map(|s| s.to_string());\n    }\n    if let Ok(sel) = Selector::parse(\"meta[property='og:url']\")\n        && let Some(el) = document.select(&sel).next()\n    {\n        result.og_url = el.value().attr(\"content\").map(|s| s.to_string());\n    }\n    if let Ok(sel) = Selector::parse(\"meta[property='og:type']\")\n        && let Some(el) = document.select(&sel).next()\n    {\n        result.og_type = el.value().attr(\"content\").map(|s| s.to_string());\n    }\n    if let Ok(sel) = Selector::parse(\"meta[property='og:site_name']\")\n        && let Some(el) = document.select(&sel).next()\n    {\n        result.og_site_name = el.value().attr(\"content\").map(|s| s.to_string());\n    }\n}\n\nfn extract_twitter_metadata(document: &Html, result: &mut SeoAndOpenGraphResult) {\n    if let Ok(sel) = Selector::parse(\"meta[name='twitter:card']\")\n        && let Some(el) = document.select(&sel).next()\n    {\n        result.twitter_card = el.value().attr(\"content\").map(|s| s.to_string());\n    }\n    if let Ok(sel) = Selector::parse(\"meta[name='twitter:site']\")\n        && let Some(el) = document.select(&sel).next()\n    {\n        result.twitter_site = el.value().attr(\"content\").map(|s| s.to_string());\n    }\n    if let Ok(sel) = Selector::parse(\"meta[name='twitter:creator']\")\n        && let Some(el) = document.select(&sel).next()\n    {\n        result.twitter_creator = el.value().attr(\"content\").map(|s| s.to_string());\n    }\n    if let Ok(sel) = Selector::parse(\"meta[name='twitter:title']\")\n        && let Some(el) = document.select(&sel).next()\n    {\n        result.twitter_title = el.value().attr(\"content\").map(|s| s.to_string());\n    }\n    if let Ok(sel) = Selector::parse(\"meta[name='twitter:description']\")\n        && let Some(el) = document.select(&sel).next()\n    {\n        result.twitter_description = el.value().attr(\"content\").map(|s| s.to_string());\n    }\n    if let Ok(sel) = Selector::parse(\"meta[name='twitter:image']\")\n        && let Some(el) = document.select(&sel).next()\n    {\n        result.twitter_image = el.value().attr(\"content\").map(|s| s.to_string());\n    }\n}\n\nfn build_heading_tree(document: &Html, result: &mut SeoAndOpenGraphResult, max_level: i32) {\n    let selector = match Selector::parse(\"h1, h2, h3, h4, h5, h6\") {\n        Ok(s) => s,\n        Err(_) => return,\n    };\n\n    let headings: Vec<(i32, String, Option<String>)> = document\n        .select(&selector)\n        .filter_map(|el| {\n            let tag = el.value().name();\n            let level = tag.strip_prefix('h').and_then(|s| s.parse::<i32>().ok())?;\n            if level > max_level {\n                return None;\n            }\n            let text = el.text().collect::<String>().trim().to_string();\n            // Strip JS from text\n            let text = text.split('\\n').map(|l| l.trim()).collect::<Vec<_>>().join(\" \");\n            use once_cell::sync::Lazy;\n            static RE_WS: Lazy<regex::Regex> = Lazy::new(|| regex::Regex::new(r\"\\s+\").unwrap());\n            let text = RE_WS.replace_all(&text, \" \").trim().to_string();\n            let id = el.value().attr(\"id\").map(|s| s.to_string());\n            Some((level, text, id))\n        })\n        .collect();\n\n    if headings.is_empty() {\n        return;\n    }\n\n    // Build tree structure: use a root node at level 0 and insert children based on heading levels\n\n    let mut items: Vec<Option<HeadingTreeItem>> = headings\n        .iter()\n        .map(|(level, text, id)| Some(HeadingTreeItem::new(*level, text.clone(), id.clone())))\n        .collect();\n\n    // Compute parent relationships using a stack\n    let headings_ref: Vec<(i32, Option<usize>)> = {\n        let mut result_vec = Vec::new();\n        let mut stack2: Vec<(i32, usize)> = Vec::new(); // (level, index)\n        for (idx, (level, _text, _id)) in headings.iter().enumerate() {\n            while let Some(&(top_level, _)) = stack2.last() {\n                if top_level >= *level {\n                    stack2.pop();\n                } else {\n                    break;\n                }\n            }\n            let parent_idx = stack2.last().map(|&(_, idx)| idx);\n            result_vec.push((*level, parent_idx));\n            stack2.push((*level, idx));\n        }\n        result_vec\n    };\n\n    // Build tree bottom-up\n    for idx in (0..items.len()).rev() {\n        if let Some(parent_idx) = headings_ref[idx].1\n            && let Some(child) = items[idx].take()\n            && let Some(ref mut parent) = items[parent_idx]\n        {\n            parent.children.insert(0, child);\n        }\n    }\n\n    // Collect root items (those without parents)\n    let mut root_children: Vec<HeadingTreeItem> = items\n        .into_iter()\n        .enumerate()\n        .filter(|(idx, _)| headings_ref[*idx].1.is_none())\n        .filter_map(|(_, item)| item)\n        .collect();\n\n    // Set error for multiple H1s\n    let h1_count = Selector::parse(\"h1\").map(|s| document.select(&s).count()).unwrap_or(0);\n    if h1_count > 1 {\n        fn mark_h1_errors(items: &mut [HeadingTreeItem], h1_count: usize) {\n            for item in items.iter_mut() {\n                if item.level == 1 {\n                    item.error_text = Some(format!(\"Multiple H1s ({}) found.\", h1_count));\n                }\n                mark_h1_errors(&mut item.children, h1_count);\n            }\n        }\n        mark_h1_errors(&mut root_children, h1_count);\n    }\n\n    // Set real_level and check for level mismatches\n    fn fix_real_levels(items: &mut [HeadingTreeItem], real_level: i32) {\n        for item in items.iter_mut() {\n            item.real_level = Some(real_level);\n            if item.level != real_level && item.error_text.is_none() {\n                item.error_text = Some(format!(\n                    \"Heading level {} is not correct. Should be {}.\",\n                    item.level, real_level\n                ));\n            }\n            fix_real_levels(&mut item.children, real_level + 1);\n        }\n    }\n    fix_real_levels(&mut root_children, 1);\n\n    let total_count = HeadingTreeItem::get_headings_count(&root_children);\n    let errors_count = HeadingTreeItem::get_headings_with_error_count(&root_children);\n\n    result.heading_tree_items = root_children;\n    result.headings_count = total_count;\n    result.headings_errors_count = errors_count;\n}\n\nfn seo_results_to_table_data(results: &[SeoAndOpenGraphResult]) -> Vec<HashMap<String, String>> {\n    results\n        .iter()\n        .map(|r| {\n            let mut row = HashMap::new();\n            row.insert(\"urlPathAndQuery\".to_string(), r.url_path_and_query.clone());\n            row.insert(\"title\".to_string(), r.title.clone().unwrap_or_default());\n            row.insert(\"h1\".to_string(), r.h1.clone().unwrap_or_default());\n            row.insert(\"description\".to_string(), r.description.clone().unwrap_or_default());\n            row.insert(\"keywords\".to_string(), r.keywords.clone().unwrap_or_default());\n            row.insert(\"deniedByRobotsTxt\".to_string(), r.denied_by_robots_txt.to_string());\n            row.insert(\"robotsIndex\".to_string(), r.robots_index.unwrap_or(1).to_string());\n            row.insert(\n                \"indexing\".to_string(),\n                String::new(), // Will be rendered by renderer\n            );\n            row\n        })\n        .collect()\n}\n\nfn og_results_to_table_data(results: &[SeoAndOpenGraphResult]) -> Vec<HashMap<String, String>> {\n    results\n        .iter()\n        .map(|r| {\n            let mut row = HashMap::new();\n            row.insert(\"urlPathAndQuery\".to_string(), r.url_path_and_query.clone());\n            row.insert(\"ogTitle\".to_string(), r.og_title.clone().unwrap_or_default());\n            row.insert(\n                \"ogDescription\".to_string(),\n                r.og_description.clone().unwrap_or_default(),\n            );\n            row.insert(\"ogImage\".to_string(), r.og_image.clone().unwrap_or_default());\n            row.insert(\"twitterTitle\".to_string(), r.twitter_title.clone().unwrap_or_default());\n            row.insert(\n                \"twitterDescription\".to_string(),\n                r.twitter_description.clone().unwrap_or_default(),\n            );\n            row.insert(\"twitterImage\".to_string(), r.twitter_image.clone().unwrap_or_default());\n            row\n        })\n        .collect()\n}\n\nfn headings_to_table_data(results: &[SeoAndOpenGraphResult]) -> Vec<HashMap<String, String>> {\n    results\n        .iter()\n        .map(|r| {\n            let mut row = HashMap::new();\n            row.insert(\"urlPathAndQuery\".to_string(), r.url_path_and_query.clone());\n            row.insert(\n                \"headings\".to_string(),\n                HeadingTreeItem::get_heading_tree_txt_list(&r.heading_tree_items),\n            );\n            row.insert(\n                \"headingsHtml\".to_string(),\n                HeadingTreeItem::get_heading_tree_ul_li_list(&r.heading_tree_items),\n            );\n            row.insert(\"headingsCount\".to_string(), r.headings_count.to_string());\n            row.insert(\"headingsErrorsCount\".to_string(), r.headings_errors_count.to_string());\n            row\n        })\n        .collect()\n}\n"
  },
  {
    "path": "src/analysis/skipped_urls_analyzer.rs",
    "content": "// SiteOne Crawler - SkippedUrlsAnalyzer\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse crate::analysis::analyzer::Analyzer;\nuse crate::analysis::base_analyzer::BaseAnalyzer;\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::result::visited_url::VisitedUrl;\nuse crate::types::SkippedReason;\n\nconst SUPER_TABLE_SKIPPED_SUMMARY: &str = \"skipped-summary\";\nconst SUPER_TABLE_SKIPPED: &str = \"skipped\";\n\npub struct SkippedUrlsAnalyzer {\n    base: BaseAnalyzer,\n}\n\nimpl Default for SkippedUrlsAnalyzer {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl SkippedUrlsAnalyzer {\n    pub fn new() -> Self {\n        Self {\n            base: BaseAnalyzer::new(),\n        }\n    }\n\n    fn get_reason_label(reason: &SkippedReason) -> &'static str {\n        match reason {\n            SkippedReason::NotAllowedHost => \"Not allowed host\",\n            SkippedReason::RobotsTxt => \"Robots.txt\",\n            SkippedReason::ExceedsMaxDepth => \"Max depth\",\n        }\n    }\n\n    fn get_source_short_name(source_attr: i32) -> &'static str {\n        match source_attr {\n            5 => \"Initial URL\",\n            10 => \"<a href>\",\n            20 => \"<img src>\",\n            21 => \"<img srcset>\",\n            22 => \"<input src>\",\n            23 => \"<source src>\",\n            24 => \"<video src>\",\n            25 => \"<audio src>\",\n            30 => \"<script src>\",\n            40 => \"inline <script src>\",\n            50 => \"<link href>\",\n            60 => \"css url()\",\n            70 => \"js url\",\n            80 => \"redirect\",\n            90 => \"sitemap\",\n            _ => \"unknown\",\n        }\n    }\n}\n\nimpl Analyzer for SkippedUrlsAnalyzer {\n    fn analyze(&mut self, status: &Status, output: &mut dyn Output) {\n        let skipped_entries = status.get_skipped_urls();\n\n        // Get initial host and scheme from the first visited URL\n        let visited = status.get_visited_urls();\n        let (initial_host, initial_scheme) = visited\n            .first()\n            .and_then(|v| url::Url::parse(&v.url).ok())\n            .map(|parsed| {\n                (\n                    Some(parsed.host_str().unwrap_or(\"\").to_string()),\n                    Some(parsed.scheme().to_string()),\n                )\n            })\n            .unwrap_or((None, None));\n\n        // Build summary: group by reason + domain\n        let mut summary_map: HashMap<(String, String), usize> = HashMap::new();\n        for entry in &skipped_entries {\n            let reason_label = Self::get_reason_label(&entry.reason).to_string();\n            let domain = url::Url::parse(&entry.url)\n                .ok()\n                .and_then(|u| u.host_str().map(|h| h.to_string()))\n                .unwrap_or_else(|| {\n                    // For relative URLs, extract domain from path\n                    let visited = status.get_visited_urls();\n                    visited\n                        .first()\n                        .and_then(|v| v.get_host())\n                        .unwrap_or_else(|| \"unknown\".to_string())\n                });\n            *summary_map.entry((reason_label, domain)).or_insert(0) += 1;\n        }\n\n        let mut skipped_urls_summary: Vec<HashMap<String, String>> = summary_map\n            .iter()\n            .map(|((reason, domain), count)| {\n                let mut row = HashMap::new();\n                row.insert(\"reason\".to_string(), reason.clone());\n                row.insert(\"domain\".to_string(), domain.clone());\n                row.insert(\"count\".to_string(), count.to_string());\n                row\n            })\n            .collect();\n        skipped_urls_summary.sort_by(|a, b| {\n            let count_a: usize = a.get(\"count\").and_then(|c| c.parse().ok()).unwrap_or(0);\n            let count_b: usize = b.get(\"count\").and_then(|c| c.parse().ok()).unwrap_or(0);\n            count_b.cmp(&count_a)\n        });\n\n        // Build detail: each skipped URL as a row\n        let visited_urls = status.get_visited_urls();\n        let visited_map: HashMap<String, &VisitedUrl> = visited_urls.iter().map(|v| (v.uq_id.clone(), v)).collect();\n\n        let mut skipped_urls: Vec<HashMap<String, String>> = skipped_entries\n            .iter()\n            .map(|entry| {\n                let mut row = HashMap::new();\n                row.insert(\"reason\".to_string(), Self::get_reason_label(&entry.reason).to_string());\n\n                // Strip scheme and host only for same-domain URLs\n                let skipped_url = crate::utils::get_url_without_scheme_and_host(\n                    &entry.url,\n                    initial_host.as_deref(),\n                    initial_scheme.as_deref(),\n                );\n                row.insert(\"url\".to_string(), skipped_url);\n                row.insert(\n                    \"sourceAttr\".to_string(),\n                    Self::get_source_short_name(entry.source_attr).to_string(),\n                );\n\n                // Resolve source URL from source_uq_id\n                let source_url = visited_map\n                    .get(&entry.source_uq_id)\n                    .map(|v| {\n                        crate::utils::get_url_without_scheme_and_host(\n                            &v.url,\n                            initial_host.as_deref(),\n                            initial_scheme.as_deref(),\n                        )\n                    })\n                    .unwrap_or_default();\n                row.insert(\"sourceUqId\".to_string(), source_url);\n                row\n            })\n            .collect();\n        skipped_urls.sort_by(|a, b| {\n            let url_a = a.get(\"url\").map(|s| s.as_str()).unwrap_or(\"\");\n            let url_b = b.get(\"url\").map(|s| s.as_str()).unwrap_or(\"\");\n            url_a.cmp(url_b)\n        });\n\n        let url_column_width = 60;\n\n        // Skipped URLs summary table\n        let summary_columns = vec![\n            SuperTableColumn::new(\n                \"reason\".to_string(),\n                \"Reason\".to_string(),\n                18,\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"domain\".to_string(),\n                \"Domain\".to_string(),\n                -1, // AUTO_WIDTH\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"count\".to_string(),\n                \"Unique URLs\".to_string(),\n                11,\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        let mut super_table_summary = SuperTable::new(\n            SUPER_TABLE_SKIPPED_SUMMARY.to_string(),\n            \"Skipped URLs Summary\".to_string(),\n            \"No skipped URLs found.\".to_string(),\n            summary_columns,\n            true,\n            Some(\"count\".to_string()),\n            \"DESC\".to_string(),\n            None,\n            None,\n            Some(\"Skipped URLs\".to_string()),\n        );\n\n        super_table_summary.set_data(skipped_urls_summary);\n        status.configure_super_table_url_stripping(&mut super_table_summary);\n        output.add_super_table(&super_table_summary);\n        status.add_super_table_at_beginning(super_table_summary);\n\n        // Skipped URLs table\n        let detail_columns = vec![\n            SuperTableColumn::new(\n                \"reason\".to_string(),\n                \"Reason\".to_string(),\n                18,\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"url\".to_string(),\n                \"Skipped URL\".to_string(),\n                url_column_width,\n                None,\n                None,\n                true,\n                true,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"sourceAttr\".to_string(),\n                \"Source\".to_string(),\n                19,\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"sourceUqId\".to_string(),\n                \"Found at URL\".to_string(),\n                url_column_width,\n                None,\n                None,\n                true,\n                true,\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        let count_skipped = skipped_urls.len();\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_SKIPPED.to_string(),\n            \"Skipped URLs\".to_string(),\n            \"No skipped URLs found.\".to_string(),\n            detail_columns,\n            true,\n            Some(\"url\".to_string()),\n            \"ASC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        super_table.set_data(skipped_urls);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_beginning(super_table);\n\n        status.add_summary_item_by_ranges(\n            \"skipped\",\n            count_skipped as f64,\n            &[(0.0, 0.0), (1.0, 2.0), (3.0, 9.0), (10.0, f64::MAX)],\n            &[\n                \"Skipped URLs - no skipped URLs found\",\n                \"Skipped URLs - {} skipped URLs found\",\n                \"Skipped URLs - {} skipped URLs found\",\n                \"Skipped URLs - {} skipped URLs found\",\n            ],\n        );\n    }\n\n    fn should_be_activated(&self) -> bool {\n        true\n    }\n\n    fn get_order(&self) -> i32 {\n        6\n    }\n\n    fn get_name(&self) -> &str {\n        \"SkippedUrlsAnalyzer\"\n    }\n\n    fn get_exec_times(&self) -> &HashMap<String, f64> {\n        self.base.get_exec_times()\n    }\n\n    fn get_exec_counts(&self) -> &HashMap<String, usize> {\n        self.base.get_exec_counts()\n    }\n}\n"
  },
  {
    "path": "src/analysis/slowest_analyzer.rs",
    "content": "// SiteOne Crawler - SlowestAnalyzer\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse crate::analysis::analyzer::Analyzer;\nuse crate::analysis::base_analyzer::BaseAnalyzer;\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::types::ContentTypeId;\nuse crate::utils;\n\nconst SUPER_TABLE_SLOWEST_URLS: &str = \"slowest-urls\";\n\npub struct SlowestAnalyzer {\n    base: BaseAnalyzer,\n    slowest_top_limit: usize,\n    slowest_min_time: f64,\n    slowest_max_time: f64,\n}\n\nimpl Default for SlowestAnalyzer {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl SlowestAnalyzer {\n    pub fn new() -> Self {\n        Self {\n            base: BaseAnalyzer::new(),\n            slowest_top_limit: 20,\n            slowest_min_time: 0.01,\n            slowest_max_time: 3.0,\n        }\n    }\n\n    /// Set configuration from CoreOptions.\n    pub fn set_config(&mut self, slowest_top_limit: usize, slowest_min_time: f64, slowest_max_time: f64) {\n        self.slowest_top_limit = slowest_top_limit;\n        self.slowest_min_time = slowest_min_time;\n        self.slowest_max_time = slowest_max_time;\n    }\n}\n\nimpl Analyzer for SlowestAnalyzer {\n    fn analyze(&mut self, status: &Status, output: &mut dyn Output) {\n        let visited_urls = status.get_visited_urls();\n\n        let mut slow_urls: Vec<_> = visited_urls\n            .iter()\n            .filter(|u| {\n                u.is_allowed_for_crawling\n                    && u.content_type == ContentTypeId::Html\n                    && u.request_time >= self.slowest_min_time\n            })\n            .cloned()\n            .collect();\n\n        slow_urls.sort_by(|a, b| {\n            b.request_time\n                .partial_cmp(&a.request_time)\n                .unwrap_or(std::cmp::Ordering::Equal)\n        });\n        slow_urls.truncate(self.slowest_top_limit);\n\n        let console_width = utils::get_console_width();\n        let url_column_width = (console_width as i32 - 25).max(20);\n\n        let columns = vec![\n            SuperTableColumn::new(\n                \"requestTime\".to_string(),\n                \"Time\".to_string(),\n                6,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<f64>() {\n                        utils::get_colored_request_time(v, 6)\n                    } else {\n                        value.to_string()\n                    }\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"statusCode\".to_string(),\n                \"Status\".to_string(),\n                6,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<i32>() {\n                        utils::get_colored_status_code(v, 6)\n                    } else {\n                        value.to_string()\n                    }\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"url\".to_string(),\n                \"Slow URL\".to_string(),\n                url_column_width,\n                None,\n                None,\n                true,\n                true,\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        let data: Vec<HashMap<String, String>> = slow_urls\n            .iter()\n            .map(|u| {\n                let mut row = HashMap::new();\n                row.insert(\"requestTime\".to_string(), format!(\"{:.4}\", u.request_time));\n                row.insert(\"statusCode\".to_string(), u.status_code.to_string());\n                row.insert(\"url\".to_string(), u.url.clone());\n                row\n            })\n            .collect();\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_SLOWEST_URLS.to_string(),\n            \"TOP slowest URLs\".to_string(),\n            format!(\"No slow URLs slower than {} second(s) found.\", self.slowest_min_time),\n            columns,\n            true,\n            Some(\"requestTime\".to_string()),\n            \"DESC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        super_table.set_data(data);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_beginning(super_table);\n\n        // Summary for very slow URLs\n        let very_slow_count = visited_urls\n            .iter()\n            .filter(|u| u.content_type == ContentTypeId::Html && u.request_time >= self.slowest_max_time)\n            .count();\n\n        status.add_summary_item_by_ranges(\n            \"slowUrls\",\n            very_slow_count as f64,\n            &[(0.0, 0.0), (1.0, 2.0), (3.0, 5.0), (6.0, f64::MAX)],\n            &[\n                &format!(\n                    \"Performance OK - all non-media URLs are faster than {} seconds\",\n                    self.slowest_max_time\n                ),\n                &format!(\n                    \"Performance NOTICE - {{}} slow non-media URL(s) found (slower than {} seconds)\",\n                    self.slowest_max_time\n                ),\n                &format!(\n                    \"Performance WARNING - {{}} slow non-media URLs found (slower than {} seconds)\",\n                    self.slowest_max_time\n                ),\n                &format!(\n                    \"Performance CRITICAL - {{}} slow non-media URLs found (slower than {} seconds)\",\n                    self.slowest_max_time\n                ),\n            ],\n        );\n    }\n\n    fn should_be_activated(&self) -> bool {\n        true\n    }\n\n    fn get_order(&self) -> i32 {\n        110\n    }\n\n    fn get_name(&self) -> &str {\n        \"SlowestAnalyzer\"\n    }\n\n    fn get_exec_times(&self) -> &HashMap<String, f64> {\n        self.base.get_exec_times()\n    }\n\n    fn get_exec_counts(&self) -> &HashMap<String, usize> {\n        self.base.get_exec_counts()\n    }\n}\n"
  },
  {
    "path": "src/analysis/source_domains_analyzer.rs",
    "content": "// SiteOne Crawler - SourceDomainsAnalyzer\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse crate::analysis::analyzer::Analyzer;\nuse crate::analysis::base_analyzer::BaseAnalyzer;\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::types::ContentTypeId;\nuse crate::utils;\n\nconst SUPER_TABLE_SOURCE_DOMAINS: &str = \"source-domains\";\n\npub struct SourceDomainsAnalyzer {\n    base: BaseAnalyzer,\n}\n\nimpl Default for SourceDomainsAnalyzer {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl SourceDomainsAnalyzer {\n    pub fn new() -> Self {\n        Self {\n            base: BaseAnalyzer::new(),\n        }\n    }\n}\n\nimpl Analyzer for SourceDomainsAnalyzer {\n    fn analyze(&mut self, status: &Status, output: &mut dyn Output) {\n        let visited_urls = status.get_visited_urls();\n        let content_type_ids = get_all_content_type_ids();\n\n        // Gather stats per domain and content type\n        let mut stats: HashMap<String, HashMap<String, DomainContentTypeStat>> = HashMap::new();\n\n        for visited_url in &visited_urls {\n            if visited_url.has_error_status_code() {\n                continue;\n            }\n            let url_host = visited_url.get_host().unwrap_or_else(|| \"unknown\".to_string());\n\n            let host_stats = stats.entry(url_host.clone()).or_default();\n            let content_type_id = visited_url.content_type;\n            let key = format!(\"{:?}\", content_type_id);\n\n            let stat = host_stats.entry(key).or_insert_with(|| DomainContentTypeStat {\n                count: 0,\n                total_size: 0,\n                total_exec_time: 0.0,\n            });\n\n            stat.count += 1;\n            stat.total_size += visited_url.size.unwrap_or(0);\n            stat.total_exec_time += visited_url.request_time;\n        }\n\n        // Convert stats to data rows\n        let delimiter = utils::get_color_text(\"/\", \"dark-gray\", false);\n        let mut data: Vec<HashMap<String, String>> = Vec::new();\n        let mut used_content_types: Vec<String> = Vec::new();\n\n        for (domain, host_stats) in &stats {\n            let mut row = HashMap::new();\n            row.insert(\"domain\".to_string(), domain.clone());\n\n            let mut total_count: usize = 0;\n            let mut total_size: i64 = 0;\n            let mut total_time: f64 = 0.0;\n\n            for ct_id in &content_type_ids {\n                let key = format!(\"{:?}\", ct_id);\n                let ct_name = ct_id.name().to_string();\n\n                if let Some(stat) = host_stats.get(&key) {\n                    total_count += stat.count;\n                    total_size += stat.total_size;\n                    total_time += stat.total_exec_time;\n\n                    let value = format!(\n                        \"{}/{}/{}\",\n                        stat.count,\n                        utils::get_formatted_size(stat.total_size, 0).replace(' ', \"\"),\n                        utils::get_formatted_duration(stat.total_exec_time).replace(' ', \"\"),\n                    );\n                    row.insert(ct_name.clone(), value);\n\n                    if !used_content_types.contains(&ct_name) {\n                        used_content_types.push(ct_name);\n                    }\n                } else {\n                    row.insert(ct_name, String::new());\n                }\n            }\n\n            row.insert(\n                \"totals\".to_string(),\n                format!(\n                    \"{}/{}/{}\",\n                    total_count,\n                    utils::get_formatted_size(total_size, 0).replace(' ', \"\"),\n                    utils::get_formatted_duration(total_time).replace(' ', \"\"),\n                ),\n            );\n            row.insert(\"totalCount\".to_string(), total_count.to_string());\n            data.push(row);\n        }\n\n        // Build columns\n        let mut columns = vec![\n            SuperTableColumn::new(\n                \"domain\".to_string(),\n                \"Domain\".to_string(),\n                -1, // AUTO_WIDTH\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"totals\".to_string(),\n                \"Totals\".to_string(),\n                -1, // AUTO_WIDTH\n                Some(Box::new({\n                    let delim = delimiter.clone();\n                    move |value: &str, render_into: &str| {\n                        if render_into == \"html\" {\n                            value.replace('/', &format!(\" {} \", delim))\n                        } else {\n                            value.replace('/', &delim)\n                        }\n                    }\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        for ct_name in &used_content_types {\n            let delim = delimiter.clone();\n            columns.push(SuperTableColumn::new(\n                ct_name.clone(),\n                ct_name.clone(),\n                -1, // AUTO_WIDTH\n                Some(Box::new(move |value: &str, render_into: &str| {\n                    if render_into == \"html\" {\n                        value.replace('/', &format!(\" {} \", delim))\n                    } else {\n                        value.replace('/', &delim)\n                    }\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ));\n        }\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_SOURCE_DOMAINS.to_string(),\n            \"Source domains\".to_string(),\n            \"No source domains found.\".to_string(),\n            columns,\n            false,\n            Some(\"totalCount\".to_string()),\n            \"DESC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        super_table.set_data(data);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_beginning(super_table);\n    }\n\n    fn should_be_activated(&self) -> bool {\n        true\n    }\n\n    fn get_order(&self) -> i32 {\n        205\n    }\n\n    fn get_name(&self) -> &str {\n        \"SourceDomainsAnalyzer\"\n    }\n\n    fn get_exec_times(&self) -> &HashMap<String, f64> {\n        self.base.get_exec_times()\n    }\n\n    fn get_exec_counts(&self) -> &HashMap<String, usize> {\n        self.base.get_exec_counts()\n    }\n}\n\nstruct DomainContentTypeStat {\n    count: usize,\n    total_size: i64,\n    total_exec_time: f64,\n}\n\nfn get_all_content_type_ids() -> Vec<ContentTypeId> {\n    vec![\n        ContentTypeId::Html,\n        ContentTypeId::Image,\n        ContentTypeId::Script,\n        ContentTypeId::Stylesheet,\n        ContentTypeId::Font,\n        ContentTypeId::Document,\n        ContentTypeId::Audio,\n        ContentTypeId::Video,\n        ContentTypeId::Json,\n        ContentTypeId::Xml,\n        ContentTypeId::Redirect,\n        ContentTypeId::Other,\n    ]\n}\n"
  },
  {
    "path": "src/analysis/ssl_tls_analyzer.rs",
    "content": "// SiteOne Crawler - SslTlsAnalyzer\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\nuse std::net::TcpStream;\nuse std::process::Command;\nuse std::sync::Arc;\nuse std::time::Instant;\n\nuse rustls::pki_types::ServerName;\nuse x509_parser::prelude::*;\n\nuse crate::analysis::analyzer::Analyzer;\nuse crate::analysis::base_analyzer::BaseAnalyzer;\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::utils;\n\nconst SUPER_TABLE_CERTIFICATE_INFO: &str = \"certificate-info\";\n\npub struct SslTlsAnalyzer {\n    base: BaseAnalyzer,\n}\n\nimpl Default for SslTlsAnalyzer {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl SslTlsAnalyzer {\n    pub fn new() -> Self {\n        Self {\n            base: BaseAnalyzer::new(),\n        }\n    }\n\n    fn get_tls_certificate_info(&self, hostname: &str, port: u16, status: &Status) -> HashMap<String, String> {\n        if !is_hostname_shell_safe(hostname) {\n            let mut result = HashMap::new();\n            let error = format!(\"Hostname '{}' contains unsafe characters for shell commands.\", hostname);\n            status.add_critical_to_summary(\"ssl-hostname-unsafe\", &error);\n            result.insert(\"Errors\".to_string(), error);\n            return result;\n        }\n\n        let mut result = HashMap::new();\n        let mut errors: Vec<String> = Vec::new();\n\n        // Build a TLS config that captures the certificate\n        let mut root_store = rustls::RootCertStore::empty();\n\n        // Add webpki roots\n        for cert in rustls_native_certs::load_native_certs().certs {\n            let _ = root_store.add(cert);\n        }\n\n        let config = rustls::ClientConfig::builder()\n            .with_root_certificates(root_store)\n            .with_no_client_auth();\n\n        let server_name = match ServerName::try_from(hostname.to_string()) {\n            Ok(sn) => sn,\n            Err(e) => {\n                let error = format!(\"Invalid hostname '{}': {}\", hostname, e);\n                status.add_critical_to_summary(\"ssl-certificate-connect\", &error);\n                errors.push(error);\n                result.insert(\"Errors\".to_string(), errors.join(\", \"));\n                return result;\n            }\n        };\n\n        let mut conn = match rustls::ClientConnection::new(Arc::new(config), server_name) {\n            Ok(c) => c,\n            Err(e) => {\n                let error = format!(\"Unable to create TLS connection to {}:{}: {}\", hostname, port, e);\n                status.add_critical_to_summary(\"ssl-certificate-connect\", &error);\n                errors.push(error);\n                result.insert(\"Errors\".to_string(), errors.join(\", \"));\n                return result;\n            }\n        };\n\n        let addr = format!(\"{}:{}\", hostname, port);\n        let mut sock = match TcpStream::connect(&addr) {\n            Ok(s) => s,\n            Err(e) => {\n                let error = format!(\"Unable to connect to {}:{}: {}\", hostname, port, e);\n                status.add_critical_to_summary(\"ssl-certificate-connect\", &error);\n                errors.push(error);\n                result.insert(\"Errors\".to_string(), errors.join(\", \"));\n                return result;\n            }\n        };\n\n        // Set a short timeout - we only need the TLS handshake, not data\n        let _ = sock.set_read_timeout(Some(std::time::Duration::from_secs(5)));\n        let _ = sock.set_write_timeout(Some(std::time::Duration::from_secs(5)));\n\n        // Complete the TLS handshake\n        loop {\n            if conn.is_handshaking() {\n                match conn.complete_io(&mut sock) {\n                    Ok(_) => {}\n                    Err(_) => break,\n                }\n            } else {\n                break;\n            }\n        }\n\n        // Extract peer certificates\n        let peer_certs = match conn.peer_certificates() {\n            Some(certs) if !certs.is_empty() => certs.to_vec(),\n            _ => {\n                let error = \"No certificate found.\".to_string();\n                status.add_critical_to_summary(\"ssl-certificate-missing\", &error);\n                errors.push(error);\n                result.insert(\"Errors\".to_string(), errors.join(\", \"));\n                return result;\n            }\n        };\n\n        // Parse the first (leaf) certificate\n        let leaf_cert = &peer_certs[0];\n        let (_, cert) = match X509Certificate::from_der(leaf_cert.as_ref()) {\n            Ok(parsed) => parsed,\n            Err(e) => {\n                let error = format!(\"Unable to parse certificate: {}\", e);\n                status.add_critical_to_summary(\"ssl-certificate-parse\", &error);\n                errors.push(error);\n                result.insert(\"Errors\".to_string(), errors.join(\", \"));\n                return result;\n            }\n        };\n\n        // Issuer - add spaces around '='\n        let issuer = add_spaces_around_equals(&cert.issuer().to_string());\n        result.insert(\"Issuer\".to_string(), issuer.clone());\n\n        // Subject - add spaces around '='\n        let subject = add_spaces_around_equals(&cert.subject().to_string());\n        result.insert(\"Subject\".to_string(), subject.clone());\n\n        // Valid from\n        let not_before = cert.validity().not_before;\n        let valid_from_str = format_asn1_time(&not_before);\n        let now = chrono::Utc::now();\n\n        if let Some(nb_dt) = asn1_time_to_datetime(&not_before) {\n            if now < nb_dt {\n                let diff = (nb_dt - now).num_seconds().unsigned_abs() as i64;\n                let error = format!(\n                    \"SSL/TLS certificate is not yet valid, it will be in {}.\",\n                    utils::get_formatted_age(diff)\n                );\n                status.add_critical_to_summary(\"ssl-certificate-valid-from\", &error);\n                errors.push(error);\n                result.insert(\"Valid from\".to_string(), format!(\"{} (NOT YET VALID)\", valid_from_str));\n            } else {\n                let diff = (now - nb_dt).num_seconds().unsigned_abs() as i64;\n                result.insert(\n                    \"Valid from\".to_string(),\n                    format!(\"{} (VALID already {})\", valid_from_str, utils::get_formatted_age(diff)),\n                );\n            }\n        } else {\n            result.insert(\"Valid from\".to_string(), valid_from_str);\n        }\n\n        // Valid to\n        let not_after = cert.validity().not_after;\n        let valid_to_str = format_asn1_time(&not_after);\n        let valid_to_orig = valid_to_str.clone();\n\n        if let Some(na_dt) = asn1_time_to_datetime(&not_after) {\n            if now > na_dt {\n                let diff = (now - na_dt).num_seconds().unsigned_abs() as i64;\n                let expired_ago = format!(\"{} ago\", utils::get_formatted_age(diff));\n                let error = format!(\"SSL/TLS certificate expired {}.\", expired_ago);\n                status.add_critical_to_summary(\"ssl-certificate-valid-to\", &error);\n                errors.push(error);\n                result.insert(\n                    \"Valid to\".to_string(),\n                    format!(\"{} (EXPIRED {})\", valid_to_str, expired_ago),\n                );\n            } else {\n                let diff = (na_dt - now).num_seconds().unsigned_abs() as i64;\n                result.insert(\n                    \"Valid to\".to_string(),\n                    format!(\"{} (VALID still for {})\", valid_to_str, utils::get_formatted_age(diff)),\n                );\n            }\n        } else {\n            result.insert(\"Valid to\".to_string(), valid_to_str);\n        }\n\n        // RAW certificate output - get via openssl command\n        let certificate_output = Command::new(\"sh\")\n            .arg(\"-c\")\n            .arg(format!(\n                \"timeout 3s sh -c \\\"echo | openssl s_client -connect {}:{} -servername {} 2>/dev/null | openssl x509 -text -noout\\\"\",\n                hostname, port, hostname\n            ))\n            .output()\n            .map(|o| {\n                let stdout = String::from_utf8_lossy(&o.stdout).to_string();\n                if stdout.trim().is_empty() {\n                    // Fallback to stderr if stdout is empty\n                    String::from_utf8_lossy(&o.stderr).to_string()\n                } else {\n                    stdout\n                }\n            })\n            .unwrap_or_default();\n\n        if !certificate_output.trim().is_empty() {\n            result.insert(\"RAW certificate output\".to_string(), certificate_output);\n        }\n\n        // Supported protocols - test each protocol via openssl s_client\n        let protocols = [\n            (\"ssl2\", \"SSLv2\"),\n            (\"ssl3\", \"SSLv3\"),\n            (\"tls1\", \"TLSv1.0\"),\n            (\"tls1_1\", \"TLSv1.1\"),\n            (\"tls1_2\", \"TLSv1.2\"),\n            (\"tls1_3\", \"TLSv1.3\"),\n        ];\n        let unsafe_protocols = [\"ssl2\", \"ssl3\", \"tls1\", \"tls1_1\"];\n        let mut supported_protocols: Vec<String> = Vec::new();\n        let mut protocols_output = String::new();\n\n        for (protocol_code, protocol_name) in &protocols {\n            let output = Command::new(\"sh\")\n                .arg(\"-c\")\n                .arg(format!(\n                    \"timeout 3s sh -c \\\"echo 'Q' | openssl s_client -connect {}:{} -servername {} -{} 2>&1\\\"\",\n                    hostname, port, hostname, protocol_code\n                ))\n                .output();\n\n            let output_str = match output {\n                Ok(o) => String::from_utf8_lossy(&o.stdout).to_string() + &String::from_utf8_lossy(&o.stderr),\n                Err(_) => String::new(),\n            };\n\n            protocols_output.push_str(&format!(\"\\n=== {} ===\\n{}\", protocol_code, output_str));\n\n            if output_str.contains(\"Certificate chain\") {\n                supported_protocols.push(protocol_name.to_string());\n                if unsafe_protocols.contains(protocol_code) {\n                    status.add_critical_to_summary(\n                        \"ssl-protocol-unsafe\",\n                        &format!(\"SSL/TLS protocol {} is unsafe.\", protocol_name),\n                    );\n                }\n            }\n        }\n\n        if !supported_protocols.is_empty() {\n            result.insert(\"Supported protocols\".to_string(), supported_protocols.join(\", \"));\n        } else {\n            // Fallback to rustls-detected protocol if openssl is not available\n            let protocol_version = conn\n                .protocol_version()\n                .map(|v| {\n                    let raw = format!(\"{:?}\", v);\n                    raw.replace('_', \".\")\n                })\n                .unwrap_or_else(|| \"Unknown\".to_string());\n            result.insert(\"Supported protocols\".to_string(), protocol_version.clone());\n        }\n\n        // Add TLSv1.3 support warning\n        let has_tls13 = supported_protocols.iter().any(|p| p.contains(\"1.3\"));\n        let has_tls12 = supported_protocols.iter().any(|p| p.contains(\"1.2\"));\n        if !has_tls13 {\n            if !has_tls12 {\n                status.add_critical_to_summary(\n                    \"ssl-protocol-hint\",\n                    \"SSL/TLS protocol TLSv1.2 is not supported. Ask your admin/provider to add TLSv1.2 support.\",\n                );\n            } else {\n                status.add_warning_to_summary(\n                    \"ssl-protocol-hint\",\n                    \"Latest SSL/TLS protocol TLSv1.3 is not supported. Ask your admin/provider to add TLSv1.3 support.\",\n                );\n            }\n        }\n\n        if !protocols_output.is_empty() {\n            result.insert(\"RAW protocols output\".to_string(), protocols_output);\n        }\n\n        // Set summary based on errors\n        if errors.is_empty() && !issuer.is_empty() {\n            status.add_ok_to_summary(\n                \"ssl-certificate-valid\",\n                &format!(\n                    \"SSL/TLS certificate is valid until {}. Issued by {}. Subject is {}.\",\n                    valid_to_orig, issuer, subject\n                ),\n            );\n            status.add_ok_to_summary(\n                \"certificate-info\",\n                &format!(\"SSL/TLS certificate issued by '{}'.\", issuer),\n            );\n        } else if !errors.is_empty() {\n            result.insert(\"Errors\".to_string(), errors.join(\", \"));\n        }\n\n        if issuer.is_empty() && errors.is_empty() {\n            status.add_critical_to_summary(\"certificate-info\", \"SSL/TLS: unable to load certificate info\");\n        }\n\n        result\n    }\n}\n\nimpl Analyzer for SslTlsAnalyzer {\n    fn analyze(&mut self, status: &Status, output: &mut dyn Output) {\n        // Find the initial URL from visited URLs (the one with SOURCE_INIT_URL source_attr)\n        let visited_urls = status.get_visited_urls();\n        let initial_url = visited_urls\n            .iter()\n            .find(|u| u.source_attr == crate::result::visited_url::SOURCE_INIT_URL)\n            .map(|u| u.url.clone())\n            .or_else(|| visited_urls.first().map(|u| u.url.clone()));\n\n        let initial_url = match initial_url {\n            Some(url) => url,\n            None => return,\n        };\n\n        if !initial_url.starts_with(\"https://\") {\n            status.add_notice_to_summary(\"ssl-tls-analyzer\", \"SSL/TLS not supported, analyzer skipped.\");\n            return;\n        }\n\n        // Extract hostname from URL\n        let hostname = match url::Url::parse(&initial_url) {\n            Ok(parsed) => parsed.host_str().unwrap_or(\"\").to_string(),\n            Err(_) => {\n                status.add_critical_to_summary(\"ssl-tls-analyzer\", \"SSL/TLS: unable to parse initial URL\");\n                return;\n            }\n        };\n\n        if hostname.is_empty() {\n            return;\n        }\n\n        let s = Instant::now();\n        let cert_info = self.get_tls_certificate_info(&hostname, 443, status);\n        self.base\n            .measure_exec_time(\"SslTlsAnalyzer\", \"getTLSandSSLCertificateInfo\", s);\n\n        let console_width = utils::get_console_width();\n\n        let mut table_data: Vec<HashMap<String, String>> = Vec::new();\n        let display_order = [\n            \"Issuer\",\n            \"Subject\",\n            \"Valid from\",\n            \"Valid to\",\n            \"Supported protocols\",\n            \"Errors\",\n            \"RAW certificate output\",\n            \"RAW protocols output\",\n        ];\n\n        for key in &display_order {\n            if let Some(value) = cert_info.get(*key)\n                && !value.is_empty()\n            {\n                let mut row = HashMap::new();\n                row.insert(\"info\".to_string(), key.to_string());\n                row.insert(\"value\".to_string(), value.clone());\n                table_data.push(row);\n            }\n        }\n\n        let columns = vec![\n            SuperTableColumn::new(\n                \"info\".to_string(),\n                \"Info\".to_string(),\n                -1, // AUTO_WIDTH\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"value\".to_string(),\n                \"Text\".to_string(),\n                (console_width as i32 - 30).max(20),\n                Some(Box::new(|value: &str, render_into: &str| {\n                    if render_into == \"html\" {\n                        value.replace(' ', \"&nbsp;\").replace('\\n', \"<br>\")\n                    } else {\n                        value.to_string()\n                    }\n                })),\n                None,\n                true,\n                true,\n                false,\n                false,\n                None,\n            ),\n        ];\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_CERTIFICATE_INFO.to_string(),\n            \"SSL/TLS info\".to_string(),\n            \"No SSL/TLS info.\".to_string(),\n            columns,\n            true,\n            None,\n            \"ASC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        super_table.set_data(table_data);\n        status.configure_super_table_url_stripping(&mut super_table);\n        output.add_super_table(&super_table);\n        status.add_super_table_at_beginning(super_table);\n    }\n\n    fn should_be_activated(&self) -> bool {\n        true\n    }\n\n    fn get_order(&self) -> i32 {\n        20\n    }\n\n    fn get_name(&self) -> &str {\n        \"SslTlsAnalyzer\"\n    }\n\n    fn get_exec_times(&self) -> &HashMap<String, f64> {\n        self.base.get_exec_times()\n    }\n\n    fn get_exec_counts(&self) -> &HashMap<String, usize> {\n        self.base.get_exec_counts()\n    }\n}\n\nfn format_asn1_time(time: &ASN1Time) -> String {\n    // ASN1Time implements Display, but we replace \"+00:00\" with \"GMT\"\n    format!(\"{}\", time).replace(\"+00:00\", \"GMT\")\n}\n\nfn add_spaces_around_equals(s: &str) -> String {\n    use once_cell::sync::Lazy;\n    static RE_EQUALS: Lazy<regex::Regex> = Lazy::new(|| regex::Regex::new(r\"(\\w)=(\\S)\").unwrap());\n    RE_EQUALS.replace_all(s, \"$1 = $2\").to_string()\n}\n\nfn asn1_time_to_datetime(time: &ASN1Time) -> Option<chrono::DateTime<chrono::Utc>> {\n    // ASN1Time has a timestamp() method that gives epoch seconds\n    let epoch = time.timestamp();\n    chrono::DateTime::from_timestamp(epoch, 0)\n}\n\n/// Validate that a hostname is safe to use in shell commands.\n/// Only allows alphanumeric chars, dots, and hyphens to prevent command injection.\nfn is_hostname_shell_safe(hostname: &str) -> bool {\n    !hostname.is_empty()\n        && hostname\n            .chars()\n            .all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-')\n}\n"
  },
  {
    "path": "src/components/mod.rs",
    "content": "pub mod summary;\r\npub mod super_table;\r\npub mod super_table_column;\r\n"
  },
  {
    "path": "src/components/summary/item.rs",
    "content": "// SiteOne Crawler - Summary Item\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse serde::{Deserialize, Serialize};\n\nuse crate::components::summary::item_status::ItemStatus;\nuse crate::utils;\n\n#[derive(Debug, Clone, Serialize, Deserialize)]\n#[serde(rename_all = \"camelCase\")]\npub struct Item {\n    pub apl_code: String,\n    pub text: String,\n    pub status: ItemStatus,\n}\n\nimpl Item {\n    pub fn new(apl_code: String, text: String, status: ItemStatus) -> Self {\n        Self { apl_code, text, status }\n    }\n\n    pub fn get_as_html(&self) -> String {\n        let icon = match self.status {\n            ItemStatus::Ok => \"\\u{2705}\",              // checkmark\n            ItemStatus::Notice => \"\\u{23E9}\",          // fast forward\n            ItemStatus::Warning => \"\\u{26A0}\\u{FE0F}\", // warning\n            ItemStatus::Critical => \"\\u{26D4}\",        // no entry\n            ItemStatus::Info => \"\\u{1F4CC}\",           // pushpin\n        };\n\n        let clean_text = utils::remove_ansi_colors(&self.text);\n        let escaped = html_escape(&clean_text);\n        let trimmed = escaped.trim_end_matches(['.', ' ']);\n        format!(\"{} {}.\", icon, trimmed)\n    }\n\n    pub fn get_as_console_text(&self) -> String {\n        let icon = match self.status {\n            ItemStatus::Ok => \"\\u{2705}\",\n            ItemStatus::Notice => \"\\u{23E9}\",\n            ItemStatus::Warning => \"\\u{26A0}\\u{FE0F}\",\n            ItemStatus::Critical => \"\\u{26D4}\",\n            ItemStatus::Info => \"\\u{1F4CC}\",\n        };\n\n        let trimmed = self.text.trim_end_matches(['.', ' ']);\n        format!(\"{} {}.\", icon, trimmed)\n    }\n}\n\nfn html_escape(s: &str) -> String {\n    s.replace('&', \"&amp;\")\n        .replace('<', \"&lt;\")\n        .replace('>', \"&gt;\")\n        .replace('\"', \"&quot;\")\n        .replace('\\'', \"&#39;\")\n}\n"
  },
  {
    "path": "src/components/summary/item_status.rs",
    "content": "// SiteOne Crawler - Summary ItemStatus\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\nuse serde::{Deserialize, Serialize};\r\n\r\nuse crate::error::CrawlerError;\r\n\r\n#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]\r\n#[serde(rename_all = \"UPPERCASE\")]\r\npub enum ItemStatus {\r\n    Ok,\r\n    Notice,\r\n    Warning,\r\n    Critical,\r\n    Info,\r\n}\r\n\r\nimpl ItemStatus {\r\n    pub fn from_range_id(range_id: i32) -> Result<Self, CrawlerError> {\r\n        match range_id {\r\n            0 => Ok(ItemStatus::Ok),\r\n            1 => Ok(ItemStatus::Notice),\r\n            2 => Ok(ItemStatus::Warning),\r\n            3 => Ok(ItemStatus::Critical),\r\n            4 => Ok(ItemStatus::Info),\r\n            _ => Err(CrawlerError::Parse(format!(\r\n                \"ItemStatus::from_range_id: Unknown range ID '{}'\",\r\n                range_id\r\n            ))),\r\n        }\r\n    }\r\n\r\n    pub fn from_text(text: &str) -> Result<Self, CrawlerError> {\r\n        match text.to_uppercase().as_str() {\r\n            \"OK\" => Ok(ItemStatus::Ok),\r\n            \"NOTICE\" => Ok(ItemStatus::Notice),\r\n            \"WARNING\" => Ok(ItemStatus::Warning),\r\n            \"CRITICAL\" => Ok(ItemStatus::Critical),\r\n            \"INFO\" => Ok(ItemStatus::Info),\r\n            _ => Err(CrawlerError::Parse(format!(\r\n                \"ItemStatus::from_text: Unknown status '{}'\",\r\n                text\r\n            ))),\r\n        }\r\n    }\r\n\r\n    pub fn sort_order(&self) -> i32 {\r\n        match self {\r\n            ItemStatus::Critical => 1,\r\n            ItemStatus::Warning => 2,\r\n            ItemStatus::Notice => 3,\r\n            ItemStatus::Ok => 4,\r\n            ItemStatus::Info => 5,\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "src/components/summary/mod.rs",
    "content": "pub mod item;\r\npub mod item_status;\r\n#[allow(clippy::module_inception)]\r\npub mod summary;\r\n"
  },
  {
    "path": "src/components/summary/summary.rs",
    "content": "// SiteOne Crawler - Summary\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse serde::{Deserialize, Serialize};\n\nuse crate::components::summary::item::Item;\nuse crate::components::summary::item_status::ItemStatus;\nuse crate::utils;\n\n#[derive(Debug, Clone, Default, Serialize, Deserialize)]\npub struct Summary {\n    items: Vec<Item>,\n}\n\nimpl Summary {\n    pub fn new() -> Self {\n        Self { items: Vec::new() }\n    }\n\n    pub fn add_item(&mut self, item: Item) {\n        self.items.push(item);\n    }\n\n    pub fn get_items(&self) -> &[Item] {\n        &self.items\n    }\n\n    fn sort_items(&mut self) {\n        self.items.sort_by_key(|item| item.status.sort_order());\n    }\n\n    pub fn get_as_html(&mut self) -> String {\n        let mut result = String::from(\"<ul>\\n\");\n        self.sort_items();\n        for item in &self.items {\n            result.push_str(&format!(\"    <li>{}</li>\\n\", item.get_as_html()));\n        }\n        result.push_str(\"</ul>\");\n        result\n    }\n\n    pub fn get_as_console_text(&mut self) -> String {\n        let title = \"Summary\";\n        let title_output = format!(\"{}\\n{}\\n\\n\", title, \"-\".repeat(title.len()));\n        let mut result = utils::get_color_text(&title_output, \"blue\", false);\n\n        self.sort_items();\n        for item in &self.items {\n            result.push_str(&item.get_as_console_text());\n            result.push('\\n');\n        }\n        result\n    }\n\n    pub fn get_count_by_item_status(&self, status: ItemStatus) -> usize {\n        self.items.iter().filter(|item| item.status == status).count()\n    }\n}\n"
  },
  {
    "path": "src/components/super_table.rs",
    "content": "// SiteOne Crawler - SuperTable\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\nuse std::sync::RwLock;\n\nuse once_cell::sync::Lazy;\nuse regex::Regex;\nuse serde::Serialize;\n\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::utils;\n\nstatic RE_RELATIVE_URL_PATH: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)^/[a-z0-9\\-_./?\\&#+=%%@()|]*$\").unwrap());\n\npub const POSITION_BEFORE_URL_TABLE: &str = \"before-url-table\";\npub const POSITION_AFTER_URL_TABLE: &str = \"after-url-table\";\n\npub const RENDER_INTO_HTML: &str = \"html\";\npub const RENDER_INTO_CONSOLE: &str = \"console\";\n\nstatic HARD_ROWS_LIMIT: RwLock<usize> = RwLock::new(200);\n\n#[derive(Debug, Serialize)]\npub struct SuperTable {\n    pub apl_code: String,\n    pub title: String,\n    pub description: Option<String>,\n    pub max_rows: Option<usize>,\n    pub forced_tab_label: Option<String>,\n\n    #[serde(skip)]\n    visible_in_html: bool,\n    #[serde(skip)]\n    visible_in_json: bool,\n    #[serde(skip)]\n    visible_in_console: bool,\n    #[serde(skip)]\n    visible_in_console_rows_limit: Option<usize>,\n    #[serde(skip)]\n    show_only_columns_with_values: bool,\n\n    #[serde(skip)]\n    columns: Vec<SuperTableColumn>,\n    #[serde(skip)]\n    position_before_url_table: bool,\n    #[serde(skip)]\n    data: Vec<HashMap<String, String>>,\n    #[serde(skip)]\n    empty_table_message: String,\n    #[serde(skip)]\n    current_order_column: Option<String>,\n    #[serde(skip)]\n    current_order_direction: String,\n    #[serde(skip)]\n    unique_id: String,\n    #[serde(skip)]\n    host_to_strip_from_urls: Option<String>,\n    #[serde(skip)]\n    scheme_of_host_to_strip_from_urls: Option<String>,\n    #[serde(skip)]\n    initial_url: Option<String>,\n    #[serde(skip)]\n    fulltext_enabled: bool,\n    #[serde(skip)]\n    min_rows_for_fulltext: usize,\n    #[serde(skip)]\n    ignore_hard_rows_limit: bool,\n    #[serde(skip)]\n    max_hard_rows_limit_reached: bool,\n}\n\nimpl SuperTable {\n    #[allow(clippy::too_many_arguments)]\n    pub fn new(\n        apl_code: String,\n        title: String,\n        empty_table_message: String,\n        columns: Vec<SuperTableColumn>,\n        position_before_url_table: bool,\n        current_order_column: Option<String>,\n        current_order_direction: String,\n        description: Option<String>,\n        max_rows: Option<usize>,\n        forced_tab_label: Option<String>,\n    ) -> Self {\n        let unique_id = generate_unique_id();\n\n        Self {\n            apl_code,\n            title,\n            empty_table_message,\n            columns,\n            position_before_url_table,\n            current_order_column,\n            current_order_direction,\n            description,\n            max_rows,\n            forced_tab_label,\n            unique_id,\n            visible_in_html: true,\n            visible_in_json: true,\n            visible_in_console: true,\n            visible_in_console_rows_limit: None,\n            show_only_columns_with_values: false,\n            data: Vec::new(),\n            host_to_strip_from_urls: None,\n            scheme_of_host_to_strip_from_urls: None,\n            initial_url: None,\n            fulltext_enabled: true,\n            min_rows_for_fulltext: 10,\n            ignore_hard_rows_limit: false,\n            max_hard_rows_limit_reached: false,\n        }\n    }\n\n    pub fn set_data(&mut self, data: Vec<HashMap<String, String>>) {\n        self.data = data;\n        if let Some(ref col) = self.current_order_column.clone() {\n            let dir = self.current_order_direction.clone();\n            self.sort_data(col, &dir);\n        }\n        self.apply_hard_rows_limit();\n        self.remove_columns_with_empty_data();\n    }\n\n    pub fn get_html_output(&self) -> String {\n        if !self.visible_in_html {\n            return String::new();\n        }\n\n        let mut output = format!(\"<h2>{}</h2>\", html_escape(&self.title));\n\n        if self.data.is_empty() {\n            output.push_str(&format!(\"<p>{}</p>\", html_escape(&self.empty_table_message)));\n            return output;\n        } else if let Some(ref desc) = self.description {\n            output.push_str(desc);\n            output.push_str(\"<br>\");\n        }\n\n        if self.is_fulltext_enabled() {\n            output.push_str(\"<div class=\\\"fulltext-container\\\">\");\n            output.push_str(&format!(\n                \"    <input type=\\\"text\\\" class=\\\"fulltext\\\" data-uq-id=\\\"{}\\\" style=\\\"width: 300px;\\\" placeholder=\\\"Fulltext search\\\">\",\n                html_escape(&self.unique_id)\n            ));\n            output.push_str(&format!(\n                \"    <span id=\\\"foundRows_{}\\\" class=\\\"found-rows\\\">Found {} row(s).</span>\",\n                html_escape(&self.unique_id),\n                self.data.len()\n            ));\n            output.push_str(\"</div>\");\n        }\n\n        let show_more = self.data.len() > 20;\n\n        let mut extra_classes = vec![self.apl_code.clone()];\n        if show_more {\n            extra_classes.push(\"table-with-show-more\".to_string());\n        }\n\n        output.push_str(&format!(\n            \"<div class='table-container-top{}'>\",\n            if show_more { \" show-more\" } else { \"\" }\n        ));\n        if show_more {\n            output.push_str(&format!(\n                \"<input id='showMore_{}' name='showMore' class='show-more-checkbox' type='checkbox' />\",\n                html_escape(&self.unique_id)\n            ));\n        }\n        output.push_str(&format!(\n            \"<div class='table-container{}'>\",\n            if show_more { \" show-more\" } else { \"\" }\n        ));\n        output.push_str(&format!(\n            \"<table id='{}' border='1' class='table table-bordered table-hover table-sortable {}' style='border-collapse: collapse;'>\",\n            html_escape(&self.unique_id),\n            extra_classes.join(\" \")\n        ));\n\n        // thead\n        output.push_str(\"<thead>\");\n        for column in &self.columns {\n            let direction = if self.current_order_column.as_deref() == Some(&column.apl_code)\n                && self.current_order_direction == \"ASC\"\n            {\n                \"DESC\"\n            } else {\n                \"ASC\"\n            };\n\n            let arrow = if self.current_order_column.as_deref() == Some(&column.apl_code) {\n                if self.current_order_direction == \"ASC\" {\n                    \"&nbsp;&#128316;\"\n                } else {\n                    \"&nbsp;&#128317;\"\n                }\n            } else {\n                \"\"\n            };\n\n            let data_type = column.forced_data_type.as_deref().unwrap_or_else(|| {\n                if let Some(first_row) = self.data.first()\n                    && let Some(val) = first_row.get(&column.apl_code)\n                    && val.parse::<f64>().is_ok()\n                {\n                    return \"number\";\n                }\n                \"string\"\n            });\n\n            output.push_str(&format!(\n                \"<th class='sortable-th' data-key='{}' data-type='{}' data-direction='{}' data-label='{}' data-uq-id='{}'>{}{}</th>\",\n                column.apl_code,\n                data_type,\n                direction,\n                html_escape(&column.name),\n                html_escape(&self.unique_id),\n                html_escape(&column.name),\n                arrow\n            ));\n        }\n\n        let initial_root_url = self.initial_url.as_ref().and_then(|url| {\n            let re = regex::Regex::new(r\"^(https?://[^/]+).*$\").ok()?;\n            re.captures(url)\n                .and_then(|caps| caps.get(1))\n                .map(|m| m.as_str().to_string())\n        });\n\n        output.push_str(\"</thead>\");\n        output.push_str(\"<tbody>\");\n\n        let mut counter = 1usize;\n        let mut max_rows_reached = false;\n\n        for row in &self.data {\n            if let Some(max) = self.max_rows\n                && counter > max\n            {\n                max_rows_reached = true;\n                break;\n            }\n\n            output.push_str(\"<tr>\");\n            for column in &self.columns {\n                let value = row.get(&column.apl_code).cloned().unwrap_or_default();\n                let mut formatted_value = value.clone();\n\n                if let Some(ref fmt) = column.formatter {\n                    formatted_value = fmt(&value, RENDER_INTO_HTML);\n                } else if let Some(ref rend) = column.renderer {\n                    formatted_value = rend(row, RENDER_INTO_HTML);\n                }\n\n                if column.escape_output_html {\n                    formatted_value = html_escape(&formatted_value);\n                }\n\n                if column.non_breaking_spaces {\n                    formatted_value = formatted_value\n                        .replace(' ', \"&nbsp;\")\n                        .replace('\\t', \"&nbsp;&nbsp;&nbsp;&nbsp;\");\n                }\n\n                // colored text\n                if formatted_value.contains(\"[0;\") || formatted_value.contains(\"[1;\") || formatted_value.contains(\"[0m\")\n                {\n                    formatted_value = crate::utils::convert_bash_colors_in_text_to_html(&formatted_value);\n                }\n\n                // full URL in value — skip if a renderer/formatter already produced custom HTML\n                let has_custom_formatter = column.formatter.is_some() || column.renderer.is_some();\n                if !has_custom_formatter && value.starts_with(\"http\") {\n                    let truncated = utils::truncate_url(\n                        &value,\n                        100,\n                        \"\\u{2026}\",\n                        self.host_to_strip_from_urls.as_deref(),\n                        self.scheme_of_host_to_strip_from_urls.as_deref(),\n                        Some(false),\n                    );\n                    formatted_value = format!(\"<a href='{}' target='_blank'>{}</a>\", html_escape(&value), truncated);\n                } else if !has_custom_formatter && formatted_value.starts_with(\"http\") {\n                    let truncated = utils::truncate_url(\n                        &formatted_value,\n                        100,\n                        \"\\u{2026}\",\n                        self.host_to_strip_from_urls.as_deref(),\n                        self.scheme_of_host_to_strip_from_urls.as_deref(),\n                        Some(false),\n                    );\n                    formatted_value = format!(\n                        \"<a href='{}' target='_blank'>{}</a>\",\n                        html_escape(&formatted_value),\n                        truncated\n                    );\n                } else if !has_custom_formatter\n                    && let Some(ref root_url) = initial_root_url\n                    && formatted_value.starts_with('/')\n                    && RE_RELATIVE_URL_PATH.is_match(&formatted_value)\n                {\n                    let final_url = format!(\"{}{}\", root_url, formatted_value);\n                    let truncated = utils::truncate_url(\n                        &formatted_value,\n                        100,\n                        \"\\u{2026}\",\n                        self.host_to_strip_from_urls.as_deref(),\n                        self.scheme_of_host_to_strip_from_urls.as_deref(),\n                        Some(false),\n                    );\n                    formatted_value = format!(\n                        \"<a href='{}' target='_blank'>{}</a>\",\n                        html_escape(&final_url),\n                        truncated\n                    );\n                }\n\n                let data_value = if column.get_data_value_callback.is_some() {\n                    column.get_data_value(row)\n                } else if value.len() < 200 {\n                    value.clone()\n                } else if formatted_value.len() < 50 {\n                    formatted_value.clone()\n                } else {\n                    \"complex-data\".to_string()\n                };\n\n                output.push_str(&format!(\n                    \"<td data-value='{}' class='{}'>{}</td>\",\n                    html_escape(&data_value),\n                    html_escape(&column.apl_code),\n                    formatted_value\n                ));\n            }\n            output.push_str(\"</tr>\");\n            counter += 1;\n        }\n\n        if self.data.is_empty() {\n            output.push_str(&format!(\n                \"<tr><td colspan='{}' class='warning'>{}</td></tr>\",\n                self.columns.len(),\n                html_escape(&self.empty_table_message)\n            ));\n        } else if max_rows_reached {\n            output.push_str(&format!(\n                \"<tr><td colspan='{}' class='warning'>You have reached the limit of {} rows as a protection against very large output or exhausted memory.</td></tr>\",\n                self.columns.len(),\n                self.max_rows.unwrap_or(0)\n            ));\n        } else if self.max_hard_rows_limit_reached {\n            let limit = HARD_ROWS_LIMIT.read().map(|v| *v).unwrap_or(200);\n            output.push_str(&format!(\n                \"<tr><td colspan='{}' class='warning'>You have reached the hard limit of {} rows as a protection against very large output or exhausted memory. You can change this with <code>--rows-limit</code>.</td></tr>\",\n                self.columns.len(),\n                limit\n            ));\n        }\n\n        output.push_str(\"</tbody>\");\n\n        if self.is_fulltext_enabled() {\n            output.push_str(\"<tfoot>\");\n            output.push_str(&format!(\n                \"  <tr class='empty-fulltext'><td colspan='{}' class='warning'>No rows found, please edit your search term.</td></tr>\",\n                self.columns.len()\n            ));\n            output.push_str(\"</tfoot>\");\n        }\n\n        output.push_str(\"</table></div>\");\n\n        if show_more {\n            output.push_str(&format!(\n                \"<label for='showMore_{}' class='show-more-label'>(+) Show entire table</label>\",\n                html_escape(&self.unique_id)\n            ));\n        }\n        output.push_str(\"</div>\");\n\n        output\n    }\n\n    pub fn get_console_output(&self) -> String {\n        let title_output = format!(\"{}\\n{}\\n\\n\", self.title, \"-\".repeat(self.title.chars().count()));\n        let mut output = utils::get_color_text(&title_output, \"blue\", false);\n\n        let data = &self.data;\n\n        if data.is_empty() {\n            output.push_str(&utils::get_color_text(&self.empty_table_message, \"gray\", false));\n            output.push_str(\"\\n\\n\");\n            return output;\n        } else if !self.visible_in_console {\n            output.push_str(&utils::get_color_text(\n                \"This table contains large data. To see them, use output to HTML using `--output-html-report=tmp/myreport.html`.\",\n                \"yellow\",\n                false,\n            ));\n            output.push_str(\"\\n\\n\");\n            return output;\n        }\n\n        let display_data: &[HashMap<String, String>] = if let Some(limit) = self.visible_in_console_rows_limit {\n            output.push_str(&utils::get_color_text(\n                    &format!(\n                        \"This table contains large data and shows max {} rows. To see them all, use output to HTML using `--output-html-report=tmp/myreport.html`.\",\n                        limit\n                    ),\n                    \"yellow\",\n                    false,\n                ));\n            output.push_str(\"\\n\\n\");\n            &data[..limit.min(data.len())]\n        } else {\n            data\n        };\n\n        // Calculate column widths\n        let column_widths: Vec<usize> = self\n            .columns\n            .iter()\n            .map(|col| {\n                if col.width == super::super_table_column::AUTO_WIDTH {\n                    col.get_auto_width_by_data(&self.data)\n                } else {\n                    col.width as usize\n                }\n            })\n            .collect();\n\n        // Headers\n        let headers: Vec<String> = self\n            .columns\n            .iter()\n            .enumerate()\n            .map(|(i, col)| utils::mb_str_pad(&col.name, column_widths[i], ' '))\n            .collect();\n        output.push_str(&utils::get_color_text(&headers.join(\" | \"), \"gray\", false));\n        output.push('\\n');\n\n        // Separator\n        let total_width: usize = column_widths.iter().sum::<usize>() + (self.columns.len() * 3) - 1;\n        output.push_str(&\"-\".repeat(total_width));\n        output.push('\\n');\n\n        // Rows\n        for row in display_data {\n            let mut row_data = Vec::new();\n            for (i, column) in self.columns.iter().enumerate() {\n                let value = row.get(&column.apl_code).cloned().unwrap_or_default();\n                let col_width = column_widths[i];\n\n                let mut display_value = if let Some(ref fmt) = column.formatter {\n                    fmt(&value, RENDER_INTO_CONSOLE)\n                } else if let Some(ref rend) = column.renderer {\n                    rend(row, RENDER_INTO_CONSOLE)\n                } else {\n                    value\n                };\n\n                // Strip protocol+domain from same-domain URLs in console output\n                if display_value.starts_with(\"http\") {\n                    display_value = utils::truncate_url(\n                        &display_value,\n                        col_width,\n                        \"\\u{2026}\",\n                        self.host_to_strip_from_urls.as_deref(),\n                        self.scheme_of_host_to_strip_from_urls.as_deref(),\n                        None,\n                    );\n                }\n\n                if column.truncate_if_longer && display_value.chars().count() > col_width {\n                    display_value = utils::truncate_in_two_thirds(&display_value, col_width, \"\\u{2026}\", None);\n                }\n\n                // Always use ANSI-aware padding: truncation may add colored \"…\" to any column\n                let stripped_len = utils::remove_ansi_colors(&display_value).chars().count();\n                let padding = col_width.saturating_sub(stripped_len);\n                row_data.push(format!(\"{}{}\", display_value, \" \".repeat(padding)));\n            }\n            output.push_str(&row_data.join(\" | \"));\n            output.push('\\n');\n        }\n        output.push('\\n');\n\n        output\n    }\n\n    pub fn get_json_output(&self) -> Option<serde_json::Value> {\n        if !self.visible_in_json {\n            return None;\n        }\n\n        // Build columns as a dict keyed by aplCode\n        let mut columns_map = serde_json::Map::new();\n        for col in &self.columns {\n            let col_json = serde_json::json!({\n                \"aplCode\": col.apl_code,\n                \"name\": col.name,\n                \"width\": col.width,\n                \"formatter\": if col.formatter.is_some() { serde_json::json!({}) } else { serde_json::Value::Null },\n                \"renderer\": if col.renderer.is_some() { serde_json::json!({}) } else { serde_json::Value::Null },\n                \"truncateIfLonger\": col.truncate_if_longer,\n                \"formatterWillChangeValueLength\": col.formatter_will_change_value_length,\n                \"nonBreakingSpaces\": col.non_breaking_spaces,\n                \"escapeOutputHtml\": col.escape_output_html,\n                \"getDataValueCallback\": if col.get_data_value_callback.is_some() { serde_json::json!({}) } else { serde_json::Value::Null },\n                \"forcedDataType\": col.forced_data_type,\n            });\n            columns_map.insert(col.apl_code.clone(), col_json);\n        }\n\n        Some(serde_json::json!({\n            \"aplCode\": self.apl_code,\n            \"title\": self.title,\n            \"columns\": columns_map,\n            \"rows\": self.data,\n            \"position\": if self.position_before_url_table { POSITION_BEFORE_URL_TABLE } else { POSITION_AFTER_URL_TABLE },\n        }))\n    }\n\n    pub fn is_position_before_url_table(&self) -> bool {\n        self.position_before_url_table\n    }\n\n    pub fn get_data(&self) -> &[HashMap<String, String>] {\n        &self.data\n    }\n\n    pub fn get_total_rows(&self) -> usize {\n        self.data.len()\n    }\n\n    pub fn set_host_to_strip_from_urls(&mut self, host: Option<String>, scheme: Option<String>) {\n        self.host_to_strip_from_urls = host;\n        self.scheme_of_host_to_strip_from_urls = scheme;\n    }\n\n    pub fn set_initial_url(&mut self, url: Option<String>) {\n        self.initial_url = url;\n    }\n\n    pub fn set_visibility_in_html(&mut self, visible: bool) {\n        self.visible_in_html = visible;\n    }\n\n    pub fn set_visibility_in_console(&mut self, visible: bool, rows_limit: Option<usize>) {\n        self.visible_in_console = visible;\n        self.visible_in_console_rows_limit = rows_limit;\n    }\n\n    pub fn set_visibility_in_json(&mut self, visible: bool) {\n        self.visible_in_json = visible;\n    }\n\n    pub fn is_visible_in_html(&self) -> bool {\n        self.visible_in_html\n    }\n\n    pub fn is_visible_in_console(&self) -> bool {\n        self.visible_in_console\n    }\n\n    pub fn is_visible_in_json(&self) -> bool {\n        self.visible_in_json\n    }\n\n    pub fn disable_fulltext(&mut self) {\n        self.fulltext_enabled = false;\n    }\n\n    pub fn set_show_only_columns_with_values(&mut self, show_only: bool) {\n        self.show_only_columns_with_values = show_only;\n    }\n\n    pub fn get_columns(&self) -> &[SuperTableColumn] {\n        &self.columns\n    }\n\n    pub fn set_hard_rows_limit(limit: usize) {\n        if let Ok(mut v) = HARD_ROWS_LIMIT.write() {\n            *v = limit;\n        }\n    }\n\n    pub fn set_ignore_hard_rows_limit(&mut self, ignore: bool) {\n        self.ignore_hard_rows_limit = ignore;\n    }\n\n    fn sort_data(&mut self, column_key: &str, direction: &str) {\n        let dir_upper = direction.to_uppercase();\n        let key = column_key.to_string();\n        self.data.sort_by(|a, b| {\n            let a_val = a.get(&key).cloned().unwrap_or_default();\n            let b_val = b.get(&key).cloned().unwrap_or_default();\n\n            // Try numeric comparison first\n            let cmp = match (a_val.parse::<f64>(), b_val.parse::<f64>()) {\n                (Ok(a_num), Ok(b_num)) => a_num.partial_cmp(&b_num).unwrap_or(std::cmp::Ordering::Equal),\n                _ => a_val.cmp(&b_val),\n            };\n\n            if dir_upper == \"ASC\" { cmp } else { cmp.reverse() }\n        });\n    }\n\n    fn is_fulltext_enabled(&self) -> bool {\n        self.fulltext_enabled && self.data.len() >= self.min_rows_for_fulltext\n    }\n\n    fn remove_columns_with_empty_data(&mut self) {\n        if !self.show_only_columns_with_values {\n            return;\n        }\n\n        let columns_to_remove: Vec<String> = self\n            .columns\n            .iter()\n            .filter(|col| {\n                !self.data.iter().any(|row| {\n                    let value = row.get(&col.apl_code).cloned().unwrap_or_default();\n                    let trimmed = value.trim().trim_matches(|c: char| c == '0' || c == '.' || c == ',');\n                    !trimmed.is_empty()\n                })\n            })\n            .map(|col| col.apl_code.clone())\n            .collect();\n\n        self.columns.retain(|col| !columns_to_remove.contains(&col.apl_code));\n\n        for row in &mut self.data {\n            for key in &columns_to_remove {\n                row.remove(key);\n            }\n        }\n    }\n\n    fn apply_hard_rows_limit(&mut self) {\n        let limit = HARD_ROWS_LIMIT.read().map(|v| *v).unwrap_or(200);\n        if limit > 0 && !self.ignore_hard_rows_limit && self.data.len() > limit {\n            self.data.truncate(limit);\n            self.max_hard_rows_limit_reached = true;\n        }\n    }\n}\n\nfn html_escape(s: &str) -> String {\n    s.replace('&', \"&amp;\")\n        .replace('<', \"&lt;\")\n        .replace('>', \"&gt;\")\n        .replace('\"', \"&quot;\")\n        .replace('\\'', \"&#39;\")\n}\n\nfn generate_unique_id() -> String {\n    use std::time::SystemTime;\n    let nanos = SystemTime::now()\n        .duration_since(SystemTime::UNIX_EPOCH)\n        .map(|d| d.as_nanos())\n        .unwrap_or(42);\n\n    use ::md5::{Digest, Md5};\n    let mut hasher = Md5::new();\n    hasher.update(nanos.to_string().as_bytes());\n    let result = hasher.finalize();\n    format!(\"t{}\", &format!(\"{:x}\", result)[..6])\n}\n"
  },
  {
    "path": "src/components/super_table_column.rs",
    "content": "// SiteOne Crawler - SuperTableColumn\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse serde::Serialize;\nuse std::collections::HashMap;\n\npub const AUTO_WIDTH: i32 = -1;\n\npub type FormatterFn = Box<dyn Fn(&str, &str) -> String + Send + Sync>;\npub type RendererFn = Box<dyn Fn(&HashMap<String, String>, &str) -> String + Send + Sync>;\npub type DataValueCallbackFn = Box<dyn Fn(&HashMap<String, String>) -> String + Send + Sync>;\n\n#[derive(Serialize)]\npub struct SuperTableColumn {\n    pub apl_code: String,\n    pub name: String,\n    pub width: i32,\n    #[serde(skip)]\n    pub formatter: Option<FormatterFn>,\n    #[serde(skip)]\n    pub renderer: Option<RendererFn>,\n    pub truncate_if_longer: bool,\n    pub formatter_will_change_value_length: bool,\n    pub non_breaking_spaces: bool,\n    pub escape_output_html: bool,\n    #[serde(skip)]\n    pub get_data_value_callback: Option<DataValueCallbackFn>,\n    pub forced_data_type: Option<String>,\n}\n\nimpl std::fmt::Debug for SuperTableColumn {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"SuperTableColumn\")\n            .field(\"apl_code\", &self.apl_code)\n            .field(\"name\", &self.name)\n            .field(\"width\", &self.width)\n            .field(\"truncate_if_longer\", &self.truncate_if_longer)\n            .field(\n                \"formatter_will_change_value_length\",\n                &self.formatter_will_change_value_length,\n            )\n            .field(\"non_breaking_spaces\", &self.non_breaking_spaces)\n            .field(\"escape_output_html\", &self.escape_output_html)\n            .field(\"forced_data_type\", &self.forced_data_type)\n            .finish()\n    }\n}\n\nimpl SuperTableColumn {\n    #[allow(clippy::too_many_arguments)]\n    pub fn new(\n        apl_code: String,\n        name: String,\n        width: i32,\n        formatter: Option<FormatterFn>,\n        renderer: Option<RendererFn>,\n        truncate_if_longer: bool,\n        formatter_will_change_value_length: bool,\n        non_breaking_spaces: bool,\n        escape_output_html: bool,\n        get_data_value_callback: Option<DataValueCallbackFn>,\n    ) -> Self {\n        Self {\n            apl_code,\n            name,\n            width,\n            formatter,\n            renderer,\n            truncate_if_longer,\n            formatter_will_change_value_length,\n            non_breaking_spaces,\n            escape_output_html,\n            get_data_value_callback,\n            forced_data_type: None,\n        }\n    }\n\n    pub fn get_width_px(&self) -> i32 {\n        self.width * 8\n    }\n\n    pub fn get_auto_width_by_data(&self, data: &[HashMap<String, String>]) -> usize {\n        let mut max_width = self.name.chars().count();\n\n        for row in data {\n            let value = row.get(&self.apl_code);\n            match value {\n                None => continue,\n                Some(v) if v.is_empty() => continue,\n                Some(v) => {\n                    if self.formatter.is_some() && self.formatter_will_change_value_length {\n                        if let Some(ref fmt) = self.formatter {\n                            let formatted = fmt(v, \"console\");\n                            max_width = max_width.max(formatted.chars().count());\n                        }\n                    } else {\n                        max_width = max_width.max(v.chars().count());\n                    }\n                }\n            }\n        }\n\n        max_width.min(1000)\n    }\n\n    pub fn get_data_value(&self, row: &HashMap<String, String>) -> String {\n        if let Some(ref callback) = self.get_data_value_callback {\n            return callback(row);\n        }\n        row.get(&self.apl_code).cloned().unwrap_or_default()\n    }\n}\n"
  },
  {
    "path": "src/content_processor/astro_processor.rs",
    "content": "// SiteOne Crawler - AstroProcessor\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Handles Astro specific patterns - extracts component-url and renderer-url,\n// and inlines modules for offline version (CORS blocking with file:// protocol).\n\nuse std::collections::HashSet;\n\nuse md5::{Digest, Md5};\nuse once_cell::sync::Lazy;\nuse regex::Regex;\n\nuse crate::content_processor::base_processor::{ProcessorConfig, is_relevant};\nuse crate::content_processor::content_processor::ContentProcessor;\nuse crate::engine::found_url::{FoundUrl, UrlSource};\nuse crate::engine::found_urls::FoundUrls;\nuse crate::engine::parsed_url::ParsedUrl;\nuse crate::types::ContentTypeId;\n\nstatic RE_ASTRO_URLS: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?i)(component-url|renderer-url)=[\"']([^\"']+)[\"']\"#).unwrap());\n\n// For offline version - match <script type=\"module\" src=\"...\"> tags\nstatic RE_MODULE_SCRIPT_SRC_FIRST: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?im)<script[^>]+type=\"module\"[^>]+src=\"([^\"]+)\"[^>]*>\\s*</script>\"#).unwrap());\n\nstatic RE_MODULE_SCRIPT_SRC_SECOND: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?im)<script[^>]+src=\"([^\"]+)\"[^>]+type=\"module\"[^>]*>\\s*</script>\"#).unwrap());\n\nstatic RE_IMPORT_STATEMENT: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?i)import\\s*[\"']([^\"']+)[\"']\\s*;?\"#).unwrap());\n\npub struct AstroProcessor {\n    #[allow(dead_code)]\n    config: ProcessorConfig,\n    debug_mode: bool,\n    relevant_content_types: Vec<ContentTypeId>,\n}\n\nimpl AstroProcessor {\n    pub fn new(config: ProcessorConfig) -> Self {\n        Self {\n            config,\n            debug_mode: false,\n            relevant_content_types: vec![ContentTypeId::Html, ContentTypeId::Script],\n        }\n    }\n}\n\nimpl AstroProcessor {\n    /// Recursively detect and inline imported modules.\n    #[allow(clippy::only_used_in_recursion)]\n    fn detect_and_include_other_modules(\n        &self,\n        module_content: &str,\n        module_url: &ParsedUrl,\n        inline_modules: &mut Vec<String>,\n        content_loader: &dyn Fn(&str) -> Option<String>,\n        depth: u32,\n    ) -> String {\n        if depth > 10 {\n            return module_content.to_string();\n        }\n\n        RE_IMPORT_STATEMENT\n            .replace_all(module_content, |caps: &regex::Captures| {\n                let src = caps.get(1).map_or(\"\", |m| m.as_str()).trim();\n                let src_parsed_url = ParsedUrl::parse(src, Some(module_url));\n                let src_full_url = src_parsed_url.get_full_url(true, false);\n\n                if let Some(mut src_content) = content_loader(&src_full_url) {\n                    if src_content.contains(\"import\") {\n                        src_content = self.detect_and_include_other_modules(\n                            &src_content,\n                            &src_parsed_url,\n                            inline_modules,\n                            content_loader,\n                            depth + 1,\n                        );\n                    }\n                    inline_modules.push(src_content);\n\n                    if depth == 0 {\n                        \"/* SiteOne Crawler: imported as inline modules recursively */\".to_string()\n                    } else {\n                        src.to_string()\n                    }\n                } else {\n                    // Module not found in storage, keep original import\n                    caps[0].to_string()\n                }\n            })\n            .to_string()\n    }\n\n    /// Replace module script tag with inlined content.\n    fn inline_module_script(\n        &self,\n        src: &str,\n        url: &ParsedUrl,\n        already_included: &mut HashSet<String>,\n        content_loader: &dyn Fn(&str) -> Option<String>,\n    ) -> String {\n        let src_parsed_url = ParsedUrl::parse(src, Some(url));\n        let src_full_url = src_parsed_url.get_full_url(true, false);\n\n        if let Some(src_content) = content_loader(&src_full_url) {\n            let mut inline_modules: Vec<String> = Vec::new();\n            let processed_content = self.detect_and_include_other_modules(\n                &src_content,\n                &src_parsed_url,\n                &mut inline_modules,\n                content_loader,\n                0,\n            );\n\n            let mut result = String::new();\n            for inline_module in &inline_modules {\n                let mut hasher = Md5::new();\n                hasher.update(inline_module.as_bytes());\n                let module_md5 = format!(\"{:x}\", hasher.finalize());\n                if already_included.contains(&module_md5) {\n                    continue;\n                }\n                result.push_str(&format!(\"<script type=\\\"module\\\">{}</script>\\n\", inline_module));\n                already_included.insert(module_md5);\n            }\n\n            result.push_str(&format!(\"<script type=\\\"module\\\">{}</script>\", processed_content));\n            result\n        } else {\n            // Module not found - keep script tag but remove type=\"module\" for offline compatibility\n            format!(\"<script src=\\\"{}\\\"></script>\", src)\n        }\n    }\n}\n\nimpl ContentProcessor for AstroProcessor {\n    fn find_urls(&self, content: &str, source_url: &ParsedUrl) -> Option<FoundUrls> {\n        // Only process content containing \"astro\"\n        if !content.contains(\"astro\") {\n            return None;\n        }\n\n        let source_url_str = source_url.get_full_url(true, false);\n        let mut found_urls = FoundUrls::new();\n\n        for caps in RE_ASTRO_URLS.captures_iter(content) {\n            if let Some(m) = caps.get(2) {\n                let parsed = ParsedUrl::parse(m.as_str(), Some(source_url));\n                found_urls.add_url(FoundUrl::new(\n                    &parsed.get_full_url(true, false),\n                    &source_url_str,\n                    UrlSource::JsUrl,\n                ));\n            }\n        }\n\n        if found_urls.get_count() > 0 {\n            Some(found_urls)\n        } else {\n            None\n        }\n    }\n\n    fn apply_content_changes_before_url_parsing(\n        &self,\n        _content: &mut String,\n        _content_type: ContentTypeId,\n        _url: &ParsedUrl,\n    ) {\n        // No changes needed before URL parsing in AstroProcessor\n    }\n\n    fn apply_content_changes_for_offline_version(\n        &self,\n        content: &mut String,\n        _content_type: ContentTypeId,\n        _url: &ParsedUrl,\n        _remove_unwanted_code: bool,\n    ) {\n        // Without a content loader, we can only remove type=\"module\" for offline compatibility.\n        // Full module inlining happens in apply_content_changes_for_offline_version_with_loader.\n        if !content.contains(\"astro\") || self.config.disable_astro_inline_modules {\n            return;\n        }\n\n        *content = RE_MODULE_SCRIPT_SRC_FIRST\n            .replace_all(content, |caps: &regex::Captures| {\n                let src = caps.get(1).map_or(\"\", |m| m.as_str());\n                format!(\"<script src=\\\"{}\\\"></script>\", src)\n            })\n            .to_string();\n\n        *content = RE_MODULE_SCRIPT_SRC_SECOND\n            .replace_all(content, |caps: &regex::Captures| {\n                let src = caps.get(1).map_or(\"\", |m| m.as_str());\n                format!(\"<script src=\\\"{}\\\"></script>\", src)\n            })\n            .to_string();\n    }\n\n    fn apply_content_changes_for_offline_version_with_loader(\n        &self,\n        content: &mut String,\n        _content_type: ContentTypeId,\n        url: &ParsedUrl,\n        _remove_unwanted_code: bool,\n        content_loader: &dyn Fn(&str) -> Option<String>,\n    ) {\n        if !content.contains(\"astro\") || self.config.disable_astro_inline_modules {\n            return;\n        }\n\n        let mut already_included: HashSet<String> = HashSet::new();\n\n        // Inline module scripts - pattern 1: <script type=\"module\" src=\"...\">\n        *content = RE_MODULE_SCRIPT_SRC_FIRST\n            .replace_all(content, |caps: &regex::Captures| {\n                let src = caps.get(1).map_or(\"\", |m| m.as_str());\n                self.inline_module_script(src, url, &mut already_included, content_loader)\n            })\n            .to_string();\n\n        // Inline module scripts - pattern 2: <script src=\"...\" type=\"module\">\n        *content = RE_MODULE_SCRIPT_SRC_SECOND\n            .replace_all(content, |caps: &regex::Captures| {\n                let src = caps.get(1).map_or(\"\", |m| m.as_str());\n                self.inline_module_script(src, url, &mut already_included, content_loader)\n            })\n            .to_string();\n    }\n\n    fn is_content_type_relevant(&self, content_type: ContentTypeId) -> bool {\n        is_relevant(content_type, &self.relevant_content_types)\n    }\n\n    fn get_name(&self) -> &str {\n        \"AstroProcessor\"\n    }\n\n    fn set_debug_mode(&mut self, debug_mode: bool) {\n        self.debug_mode = debug_mode;\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    fn make_config() -> ProcessorConfig {\n        ProcessorConfig::new(ParsedUrl::parse(\"https://example.com/\", None))\n    }\n\n    #[test]\n    fn test_find_astro_urls() {\n        let processor = AstroProcessor::new(make_config());\n        let html = r#\"<astro-island component-url=\"/_astro/TestSlider.fb32dc5a.js\" component-export=\"default\" renderer-url=\"/_astro/client.c4e17359.js\">\"#;\n        let source = ParsedUrl::parse(\"https://example.com/page\", None);\n        let result = processor.find_urls(html, &source);\n        assert!(result.is_some());\n        assert_eq!(result.unwrap().get_count(), 2);\n    }\n\n    #[test]\n    fn test_no_astro_content() {\n        let processor = AstroProcessor::new(make_config());\n        let html = r#\"<html><body>Regular page</body></html>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/page\", None);\n        let result = processor.find_urls(html, &source);\n        assert!(result.is_none());\n    }\n\n    #[test]\n    fn test_module_inlining_with_loader() {\n        let processor = AstroProcessor::new(make_config());\n        let mut content =\n            r#\"<html><head><!-- astro --><script type=\"module\" src=\"/_astro/app.js\"></script></head></html>\"#\n                .to_string();\n        let url = ParsedUrl::parse(\"https://example.com/page\", None);\n\n        let content_loader = |url_str: &str| -> Option<String> {\n            if url_str.contains(\"app.js\") {\n                Some(\"console.log('hello');\".to_string())\n            } else {\n                None\n            }\n        };\n\n        processor.apply_content_changes_for_offline_version_with_loader(\n            &mut content,\n            ContentTypeId::Html,\n            &url,\n            false,\n            &content_loader,\n        );\n\n        // Should have inlined the module content\n        assert!(content.contains(\"console.log('hello');\"));\n        // Should not have the original src attribute anymore\n        assert!(!content.contains(r#\"src=\"/_astro/app.js\"\"#));\n    }\n\n    #[test]\n    fn test_module_inlining_without_loader_falls_back() {\n        let processor = AstroProcessor::new(make_config());\n        let mut content =\n            r#\"<html><head><!-- astro --><script type=\"module\" src=\"/_astro/app.js\"></script></head></html>\"#\n                .to_string();\n        let url = ParsedUrl::parse(\"https://example.com/page\", None);\n\n        processor.apply_content_changes_for_offline_version(&mut content, ContentTypeId::Html, &url, false);\n\n        // Without loader, should remove type=\"module\" but keep src\n        assert!(content.contains(r#\"<script src=\"/_astro/app.js\"></script>\"#));\n        assert!(!content.contains(\"type=\\\"module\\\"\"));\n    }\n}\n"
  },
  {
    "path": "src/content_processor/base_processor.rs",
    "content": "// SiteOne Crawler - BaseProcessor shared utilities\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Provides shared utility methods used by all content processors.\n\nuse crate::engine::parsed_url::ParsedUrl;\nuse crate::export::utils::offline_url_converter::OfflineUrlConverter;\nuse crate::types::ContentTypeId;\n\n/// Configuration extracted from CoreOptions, shared across processors.\n/// This avoids each processor needing a reference to the full crawler.\n#[derive(Debug, Clone)]\npub struct ProcessorConfig {\n    pub single_page: bool,\n    pub single_foreign_page: bool,\n    pub max_depth: i64,\n    pub files_enabled: bool,\n    pub images_enabled: bool,\n    pub scripts_enabled: bool,\n    pub styles_enabled: bool,\n    pub fonts_enabled: bool,\n    pub disable_javascript: bool,\n    pub remove_all_anchor_listeners: bool,\n    pub ignore_regex: Vec<String>,\n    /// Pre-compiled ignore regexes for hot path usage\n    pub compiled_ignore_regex: Vec<regex::Regex>,\n    pub disable_astro_inline_modules: bool,\n    pub offline_export_preserve_urls: bool,\n    pub initial_url: ParsedUrl,\n}\n\nimpl ProcessorConfig {\n    pub fn new(initial_url: ParsedUrl) -> Self {\n        Self {\n            single_page: false,\n            single_foreign_page: false,\n            max_depth: 0,\n            files_enabled: true,\n            images_enabled: true,\n            scripts_enabled: true,\n            styles_enabled: true,\n            fonts_enabled: true,\n            disable_javascript: false,\n            remove_all_anchor_listeners: false,\n            ignore_regex: Vec::new(),\n            compiled_ignore_regex: Vec::new(),\n            disable_astro_inline_modules: false,\n            offline_export_preserve_urls: false,\n            initial_url,\n        }\n    }\n\n    /// Compile ignore_regex patterns into Regex objects for hot path usage.\n    /// Call this after setting ignore_regex.\n    pub fn compile_ignore_regex(&mut self) {\n        self.compiled_ignore_regex = self\n            .ignore_regex\n            .iter()\n            .filter_map(|pattern| regex::Regex::new(pattern).ok())\n            .collect();\n    }\n}\n\n/// Check if a content type is in the list of relevant types\npub fn is_relevant(content_type: ContentTypeId, relevant_types: &[ContentTypeId]) -> bool {\n    relevant_types.contains(&content_type)\n}\n\n/// Normalize a URL path by resolving `.` and `..` segments.\nfn normalize_path(path: &str) -> String {\n    let mut segments: Vec<&str> = Vec::new();\n    for segment in path.split('/') {\n        match segment {\n            \".\" => {}\n            \"..\" => {\n                segments.pop();\n            }\n            s => segments.push(s),\n        }\n    }\n    let result = segments.join(\"/\");\n    if result.starts_with('/') {\n        result\n    } else {\n        format!(\"/{}\", result)\n    }\n}\n\n/// Convert a URL to a relative path for offline use.\n/// When `preserve_urls` is true, same-domain links become root-relative and cross-domain links stay absolute.\npub fn convert_url_to_relative(\n    base_url: &ParsedUrl,\n    target_url: &str,\n    initial_url: &ParsedUrl,\n    attribute: Option<&str>,\n    preserve_urls: bool,\n) -> String {\n    // If it's a data URI, anchor, or non-http scheme, return as-is\n    if target_url.starts_with(\"data:\")\n        || target_url.starts_with(\"javascript:\")\n        || target_url.starts_with(\"mailto:\")\n        || target_url.starts_with(\"tel:\")\n    {\n        return target_url.to_string();\n    }\n\n    // Normalize HTML entities in URL before parsing so it matches what FoundUrl stored.\n    // Only decode entities (not full normalize_url which also trims trailing &, quotes, etc.\n    // — those transformations are for discovery, not for offline conversion of already-parsed URLs).\n    let normalized = target_url.replace(\"&#38;\", \"&\").replace(\"&amp;\", \"&\");\n    let parsed_target = ParsedUrl::parse(&normalized, Some(base_url));\n\n    if preserve_urls {\n        let target_host = parsed_target.host.as_deref().unwrap_or(\"\");\n        let initial_host = initial_url.host.as_deref().unwrap_or(\"\");\n        if target_host.is_empty() || target_host == initial_host {\n            // Same domain → root-relative (path + query + fragment)\n            // Normalize path segments (resolve .. and .)\n            let normalized_path = normalize_path(&parsed_target.path);\n            let mut result = normalized_path;\n            if let Some(ref q) = parsed_target.query {\n                result.push('?');\n                result.push_str(q);\n            }\n            if let Some(ref f) = parsed_target.fragment {\n                result.push('#');\n                result.push_str(f);\n            }\n            return result;\n        } else {\n            // Cross domain → full absolute URL\n            return parsed_target.get_full_url(true, true);\n        }\n    }\n\n    let mut converter = OfflineUrlConverter::new(\n        initial_url.clone(),\n        base_url.clone(),\n        parsed_target,\n        None,\n        None,\n        attribute,\n    );\n\n    converter.convert_url_to_relative(true)\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    fn initial_url() -> ParsedUrl {\n        ParsedUrl::parse(\"https://example.com/\", None)\n    }\n\n    #[test]\n    fn decode_amp_entity_before_offline_conversion() {\n        let base = ParsedUrl::parse(\"https://example.com/blog/\", None);\n        let result = convert_url_to_relative(&base, \"/style.css?v=1&amp;t=2\", &initial_url(), Some(\"href\"), false);\n        // &amp; must be decoded to & so the query hash matches what FoundUrl stored\n        assert!(\n            !result.contains(\"&amp;\"),\n            \"HTML entity &amp; should be decoded before conversion\"\n        );\n    }\n\n    #[test]\n    fn decode_numeric_entity_before_offline_conversion() {\n        let base = ParsedUrl::parse(\"https://example.com/\", None);\n        let result = convert_url_to_relative(&base, \"/page?a=1&#38;b=2\", &initial_url(), Some(\"href\"), false);\n        assert!(\n            !result.contains(\"&#38;\"),\n            \"HTML entity &#38; should be decoded before conversion\"\n        );\n    }\n\n    #[test]\n    fn preserve_trailing_ampersand() {\n        // Trailing & in a query string should NOT be stripped (unlike in FoundUrl discovery)\n        let base = ParsedUrl::parse(\"https://example.com/\", None);\n        let a = convert_url_to_relative(&base, \"/page?a=1&\", &initial_url(), Some(\"href\"), false);\n        let b = convert_url_to_relative(&base, \"/page?a=1&b=\", &initial_url(), Some(\"href\"), false);\n        // Both should produce different results (trailing & matters for hash)\n        assert_ne!(a, b, \"trailing & should be preserved, not stripped\");\n    }\n\n    #[test]\n    fn skip_data_uri() {\n        let base = ParsedUrl::parse(\"https://example.com/\", None);\n        let result = convert_url_to_relative(&base, \"data:image/png;base64,abc\", &initial_url(), None, false);\n        assert_eq!(result, \"data:image/png;base64,abc\");\n    }\n\n    #[test]\n    fn skip_javascript_uri() {\n        let base = ParsedUrl::parse(\"https://example.com/\", None);\n        let result = convert_url_to_relative(&base, \"javascript:void(0)\", &initial_url(), None, false);\n        assert_eq!(result, \"javascript:void(0)\");\n    }\n\n    // --- preserve_urls tests ---\n\n    #[test]\n    fn preserve_urls_same_domain_absolute() {\n        let base = ParsedUrl::parse(\"https://example.com/blog/post\", None);\n        let result = convert_url_to_relative(\n            &base,\n            \"https://example.com/designy/classic\",\n            &initial_url(),\n            Some(\"href\"),\n            true,\n        );\n        assert_eq!(result, \"/designy/classic\");\n    }\n\n    #[test]\n    fn preserve_urls_same_domain_root_relative() {\n        let base = ParsedUrl::parse(\"https://example.com/blog/post\", None);\n        let result = convert_url_to_relative(&base, \"/about\", &initial_url(), Some(\"href\"), true);\n        assert_eq!(result, \"/about\");\n    }\n\n    #[test]\n    fn preserve_urls_same_domain_relative() {\n        let base = ParsedUrl::parse(\"https://example.com/blog/post\", None);\n        let result = convert_url_to_relative(&base, \"../images/logo.png\", &initial_url(), Some(\"src\"), true);\n        assert_eq!(result, \"/images/logo.png\");\n    }\n\n    #[test]\n    fn preserve_urls_cross_domain() {\n        let base = ParsedUrl::parse(\"https://example.com/page\", None);\n        let result = convert_url_to_relative(\n            &base,\n            \"https://cdn.other.com/style.css\",\n            &initial_url(),\n            Some(\"href\"),\n            true,\n        );\n        assert_eq!(result, \"https://cdn.other.com/style.css\");\n    }\n\n    #[test]\n    fn preserve_urls_with_query_and_fragment() {\n        let base = ParsedUrl::parse(\"https://example.com/\", None);\n        let result = convert_url_to_relative(&base, \"/page?key=val#section\", &initial_url(), Some(\"href\"), true);\n        assert_eq!(result, \"/page?key=val#section\");\n    }\n\n    #[test]\n    fn preserve_urls_data_uri_unchanged() {\n        let base = ParsedUrl::parse(\"https://example.com/\", None);\n        let result = convert_url_to_relative(&base, \"data:image/png;base64,abc\", &initial_url(), None, true);\n        assert_eq!(result, \"data:image/png;base64,abc\");\n    }\n\n    #[test]\n    fn preserve_urls_mailto_unchanged() {\n        let base = ParsedUrl::parse(\"https://example.com/\", None);\n        let result = convert_url_to_relative(&base, \"mailto:test@example.com\", &initial_url(), None, true);\n        assert_eq!(result, \"mailto:test@example.com\");\n    }\n}\n"
  },
  {
    "path": "src/content_processor/content_processor.rs",
    "content": "// SiteOne Crawler - ContentProcessor trait\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse crate::engine::found_urls::FoundUrls;\nuse crate::engine::parsed_url::ParsedUrl;\nuse crate::types::ContentTypeId;\n\n/// Trait for content processors that extract URLs and modify content\n/// for offline versions.\npub trait ContentProcessor: Send + Sync {\n    /// Parse and find framework specific URLs in HTML/CSS/JS\n    fn find_urls(&self, content: &str, source_url: &ParsedUrl) -> Option<FoundUrls>;\n\n    /// Apply content changes for HTML/CSS/JS before URL parsing,\n    /// directly modifying the content string.\n    /// Called by manager only if is_content_type_relevant() returns true.\n    fn apply_content_changes_before_url_parsing(\n        &self,\n        content: &mut String,\n        content_type: ContentTypeId,\n        url: &ParsedUrl,\n    );\n\n    /// Apply content changes for offline version of the file,\n    /// directly modifying the content (HTML/CSS/JS) string.\n    /// Called by manager only if is_content_type_relevant() returns true.\n    fn apply_content_changes_for_offline_version(\n        &self,\n        content: &mut String,\n        content_type: ContentTypeId,\n        url: &ParsedUrl,\n        remove_unwanted_code: bool,\n    );\n\n    /// Apply content changes for offline version with a content loader callback.\n    /// The loader takes a URL string and returns its body text if available.\n    /// Default implementation delegates to apply_content_changes_for_offline_version.\n    /// Only AstroProcessor overrides this to inline modules from storage.\n    fn apply_content_changes_for_offline_version_with_loader(\n        &self,\n        content: &mut String,\n        content_type: ContentTypeId,\n        url: &ParsedUrl,\n        remove_unwanted_code: bool,\n        _content_loader: &dyn Fn(&str) -> Option<String>,\n    ) {\n        self.apply_content_changes_for_offline_version(content, content_type, url, remove_unwanted_code);\n    }\n\n    /// Check if this ContentProcessor is relevant for given content type\n    fn is_content_type_relevant(&self, content_type: ContentTypeId) -> bool;\n\n    /// Get the name of this processor (used for stats/logging)\n    fn get_name(&self) -> &str;\n\n    /// Enable/disable debug mode\n    fn set_debug_mode(&mut self, debug_mode: bool);\n}\n"
  },
  {
    "path": "src/content_processor/css_processor.rs",
    "content": "// SiteOne Crawler - CssProcessor\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Extracts URLs from CSS url() and @import, and converts for offline use.\n\nuse once_cell::sync::Lazy;\nuse regex::Regex;\n\nuse crate::content_processor::base_processor::{ProcessorConfig, convert_url_to_relative, is_relevant};\nuse crate::content_processor::content_processor::ContentProcessor;\nuse crate::engine::found_url::UrlSource;\nuse crate::engine::found_urls::FoundUrls;\nuse crate::engine::parsed_url::ParsedUrl;\nuse crate::types::ContentTypeId;\nuse crate::utils;\n\nstatic RE_CSS_URL: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?im)url\\s*\\(\\s*[\"']?([^\"')]+)[\"']?\\s*\\)\"#).unwrap());\n\nstatic RE_IS_IMAGE: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r\"(?i)\\.(jpg|jpeg|png|gif|webp|avif|svg|ico|tif|bmp)(\\?.*|#.*)?$\").unwrap());\n\nstatic RE_IS_FONT: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)\\.(eot|ttf|woff2|woff|otf)(\\?.*|#.*)?$\").unwrap());\n\nstatic RE_IS_CSS: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)\\.css(\\?.*|#.*)?$\").unwrap());\n\nstatic RE_CSS_URL_OFFLINE: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?i)url\\((['\"]?)((?:[^'\")\\s]|\\([^)]*\\))+)['\"]?\\)\"#).unwrap());\n\npub struct CssProcessor {\n    config: ProcessorConfig,\n    debug_mode: bool,\n    relevant_content_types: Vec<ContentTypeId>,\n}\n\nimpl CssProcessor {\n    pub fn new(config: ProcessorConfig) -> Self {\n        Self {\n            config,\n            debug_mode: false,\n            relevant_content_types: vec![ContentTypeId::Html, ContentTypeId::Stylesheet],\n        }\n    }\n\n    /// Remove unwanted code from CSS based on disable options\n    fn remove_unwanted_code_from_css(&self, css: &str) -> String {\n        let mut result = css.to_string();\n\n        if !self.config.fonts_enabled {\n            result = utils::strip_fonts(&result);\n        }\n        if !self.config.images_enabled {\n            result = utils::strip_images(&result, None);\n        }\n\n        result\n    }\n}\n\nimpl ContentProcessor for CssProcessor {\n    fn find_urls(&self, content: &str, source_url: &ParsedUrl) -> Option<FoundUrls> {\n        let source_url_str = source_url.get_full_url(true, false);\n\n        // Find all url() references in CSS\n        let mut url_texts: Vec<&str> = Vec::new();\n        for caps in RE_CSS_URL.captures_iter(content) {\n            if let Some(m) = caps.get(1) {\n                let url = m.as_str();\n                let is_image = RE_IS_IMAGE.is_match(url);\n                let is_font = RE_IS_FONT.is_match(url);\n                let is_css = RE_IS_CSS.is_match(url);\n\n                if (self.config.images_enabled && is_image)\n                    || (self.config.fonts_enabled && is_font)\n                    || (self.config.styles_enabled && is_css)\n                {\n                    url_texts.push(url);\n                }\n            }\n        }\n\n        let mut found_urls = FoundUrls::new();\n        found_urls.add_urls_from_text_array(&url_texts, &source_url_str, UrlSource::CssUrl);\n\n        if found_urls.get_count() > 0 {\n            Some(found_urls)\n        } else {\n            None\n        }\n    }\n\n    fn apply_content_changes_before_url_parsing(\n        &self,\n        _content: &mut String,\n        _content_type: ContentTypeId,\n        _url: &ParsedUrl,\n    ) {\n        // No changes needed before URL parsing in CssProcessor\n    }\n\n    fn apply_content_changes_for_offline_version(\n        &self,\n        content: &mut String,\n        _content_type: ContentTypeId,\n        url: &ParsedUrl,\n        _remove_unwanted_code: bool,\n    ) {\n        let initial_url = &self.config.initial_url;\n\n        *content = RE_CSS_URL_OFFLINE\n            .replace_all(content, |caps: &regex::Captures| {\n                let quote = caps.get(1).map_or(\"\", |m| m.as_str());\n                let found_url = caps.get(2).map_or(\"\", |m| m.as_str());\n\n                // If data URI, anchor, or non-requestable resource, skip\n                if !utils::is_href_for_requestable_resource(found_url) || found_url.starts_with('#') {\n                    return caps.get(0).map_or(\"\", |m| m.as_str()).to_string();\n                }\n\n                let relative_url = convert_url_to_relative(\n                    url,\n                    found_url,\n                    initial_url,\n                    None,\n                    self.config.offline_export_preserve_urls,\n                );\n                format!(\"url({}{}{})\", quote, relative_url, quote)\n            })\n            .to_string();\n\n        *content = self.remove_unwanted_code_from_css(content);\n    }\n\n    fn is_content_type_relevant(&self, content_type: ContentTypeId) -> bool {\n        is_relevant(content_type, &self.relevant_content_types)\n    }\n\n    fn get_name(&self) -> &str {\n        \"CssProcessor\"\n    }\n\n    fn set_debug_mode(&mut self, debug_mode: bool) {\n        self.debug_mode = debug_mode;\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    fn make_config() -> ProcessorConfig {\n        ProcessorConfig::new(ParsedUrl::parse(\"https://example.com/\", None))\n    }\n\n    #[test]\n    fn test_find_css_urls() {\n        let processor = CssProcessor::new(make_config());\n        let css = r#\"\n            body { background: url('/img/bg.jpg'); }\n            @font-face { src: url('/fonts/custom.woff2'); }\n        \"#;\n        let source = ParsedUrl::parse(\"https://example.com/style.css\", None);\n        let result = processor.find_urls(css, &source);\n        assert!(result.is_some());\n        assert!(result.unwrap().get_count() >= 2);\n    }\n\n    #[test]\n    fn test_find_css_urls_disabled_images() {\n        let mut config = make_config();\n        config.images_enabled = false;\n        let processor = CssProcessor::new(config);\n        let css = r#\"body { background: url('/img/bg.jpg'); }\"#;\n        let source = ParsedUrl::parse(\"https://example.com/style.css\", None);\n        let result = processor.find_urls(css, &source);\n        // Should be None because images are disabled\n        assert!(result.is_none());\n    }\n}\n"
  },
  {
    "path": "src/content_processor/html_processor.rs",
    "content": "// SiteOne Crawler - HtmlProcessor\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Extracts URLs from HTML content and applies offline conversion changes.\n\nuse once_cell::sync::Lazy;\nuse regex::Regex;\n\nuse crate::content_processor::base_processor::{ProcessorConfig, convert_url_to_relative, is_relevant};\nuse crate::content_processor::content_processor::ContentProcessor;\nuse crate::engine::found_url::UrlSource;\nuse crate::engine::found_urls::FoundUrls;\nuse crate::engine::parsed_url::ParsedUrl;\nuse crate::types::ContentTypeId;\nuse crate::utils;\n\npub const JS_VARIABLE_NAME_URL_DEPTH: &str = \"_SiteOneUrlDepth\";\n\npub const HTML_PAGES_EXTENSIONS: &[&str] = &[\n    \"htm\", \"html\", \"shtml\", \"php\", \"phtml\", \"ashx\", \"xhtml\", \"asp\", \"aspx\", \"jsp\", \"jspx\", \"do\", \"cfm\", \"cgi\", \"pl\",\n];\n\nstatic HTML_EXT_REGEX: Lazy<Regex> = Lazy::new(|| {\n    let pattern = format!(r\"(?i)\\.({})\", HTML_PAGES_EXTENSIONS.join(\"|\"));\n    Regex::new(&pattern).unwrap()\n});\n\nstatic RE_A_HREF: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?is)<a[^>]*\\shref=(?:[\"']([^\"'#][^\"']*)[\"']|([^\\s>\"'#][^\\s>\"']*))[^>]*>\"#).unwrap());\n\nstatic RE_ESCAPED_HREF: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?i)href\\\\[\"'][:=]\\\\[\"'](https?://[^\"'\\\\]+)\\\\[\"']\"#).unwrap());\n\nstatic RE_FONT_URL: Lazy<Regex> = Lazy::new(|| {\n    Regex::new(r#\"(?is)url\\s*\\(\\s*['\"]?([^'\"\\s>]+\\.(eot|ttf|woff2|woff|otf)[^'\")\\s]*)['\"]?\\s*\\)\"#).unwrap()\n});\n\nstatic RE_FONT_LINK: Lazy<Regex> = Lazy::new(|| {\n    Regex::new(r#\"(?is)<link\\s+[^>]*href=(?:[\"']([^\"']+\\.(?:eot|ttf|woff2|woff|otf)[^\"']*)[\"']|([^\\s>\"']+\\.(?:eot|ttf|woff2|woff|otf)[^\\s>\"']*))[^>]*>\"#).unwrap()\n});\n\nstatic RE_IMG_SRC: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?is)<img\\s+[^>]*?src=(?:[\"']([^\"']+)[\"']|([^\\s>\"']+))[^>]*>\"#).unwrap());\n\nstatic RE_IMG_DATA_SRC: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?is)<img\\s+[^>]*?data-src=(?:[\"']([^\"']+)[\"']|([^\\s>\"']+))[^>]*>\"#).unwrap());\n\nstatic RE_INPUT_SRC: Lazy<Regex> = Lazy::new(|| {\n    Regex::new(r#\"(?is)<input\\s+[^>]*?src=(?:[\"']([^\"']+\\.[a-z0-9]{1,10})[\"']|([^\\s>\"']+\\.[a-z0-9]{1,10}))[^>]*>\"#)\n        .unwrap()\n});\n\nstatic RE_LINK_IMAGE: Lazy<Regex> = Lazy::new(|| {\n    Regex::new(r#\"(?is)<link\\s+[^>]*?href=(?:[\"']([^\"']+\\.(?:png|gif|jpg|jpeg|webp|avif|tif|bmp|svg|ico)(?:\\?[^\"']*)?)[\"']|([^\\s>\"']+\\.(?:png|gif|jpg|jpeg|webp|avif|tif|bmp|svg|ico)(?:\\?[^\\s>\"']*)?))[^>]*>\"#).unwrap()\n});\n\nstatic RE_SOURCE_SRC: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?is)<source\\s+[^>]*?src=[\"']([^\"'>]+)[\"'][^>]*>\"#).unwrap());\n\nstatic RE_CSS_URL_IMAGE: Lazy<Regex> = Lazy::new(|| {\n    Regex::new(r#\"(?is)url\\s*\\(\\s*['\"]?([^'\")\\s]+\\.(jpg|jpeg|png|gif|bmp|tif|webp|avif)[^'\")\\s]*)['\"]?\\s*\\)\"#).unwrap()\n});\n\nstatic RE_SOURCE_SRCSET: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?is)<source\\s+[^>]*?srcset=[\"']([^\"'>]+)[\"'][^>]*>\"#).unwrap());\n\nstatic RE_IMG_SRCSET: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?is)<img[^>]+srcset=[\"']([^\"']+)[\"']\"#).unwrap());\n\nstatic RE_IMAGESRCSET: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?is)<[a-z]+[^>]+imagesrcset=[\"']([^\"']+)[\"']\"#).unwrap());\n\nstatic RE_AUDIO_SRC: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?is)<audio\\s+[^>]*?src=(?:[\"']([^\"']+)[\"']|([^\\s>\"']+))[^>]*>\"#).unwrap());\n\nstatic RE_VIDEO_SRC: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?is)<video\\s+[^>]*?src=(?:[\"']([^\"']+)[\"']|([^\\s>\"']+))[^>]*>\"#).unwrap());\n\nstatic RE_SCRIPT_SRC: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?is)<script\\s+[^>]*?src=(?:[\"']([^\"']+)[\"']|([^\\s>\"']+))[^>]*>\"#).unwrap());\n\nstatic RE_LINK_JS: Lazy<Regex> = Lazy::new(|| {\n    Regex::new(r#\"(?is)<link\\s+[^>]*href=(?:[\"']([^\"']+\\.(?:json|js)(?:\\?[^\"']*)?)[\"']|([^\\s>\"']+\\.(?:json|js)(?:\\?[^\\s>\"']*)?))[^>]*>\"#).unwrap()\n});\n\nstatic RE_DOT_SRC: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?is)\\.src\\s*=\\s*[\"']([^\"']+)[\"']\"#).unwrap());\n\nstatic RE_NEXTJS_CHUNKS: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?is):([a-z0-9/._\\-\\[\\]]+chunks[a-z0-9/._\\-\\[\\]]+\\.js)\"#).unwrap());\n\nstatic RE_LINK_STYLESHEET: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?is)<link\\s+[^>]*?href=[\"']([^\"']+)[\"'][^>]*>\"#).unwrap());\n\nstatic RE_FILE_EXTENSION: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)\\.[a-z0-9]{1,10}(\\?.*)?$\").unwrap());\n\n// Offline version regexes\nstatic RE_HREF_SRC: Lazy<Regex> = Lazy::new(|| {\n    Regex::new(r#\"(?is)(\\.|<[a-z0-9]{1,10}[^>]*\\s+)(href|src|component-url)\\s*(=)\\s*(['\"]?)([^'\">]+)['\"]?([^>]*)\"#)\n        .unwrap()\n});\n\nstatic RE_SRCSET_ATTR: Lazy<Regex> = Lazy::new(|| {\n    Regex::new(\n        r#\"(?is)(\\.|<[a-z0-9]{1,10}[^>]*\\s+)(imagesrcset|srcset|renderer-url)\\s*(=)\\s*(['\"]?)([^'\">]+)['\"]?([^>]*)\"#,\n    )\n    .unwrap()\n});\n\nstatic RE_META_URL: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?im)(<meta[^>]*)(url)\\s*(=)\\s*(['\"]?)([^'\">]+)['\"]?(\")\"#).unwrap());\n\nstatic RE_ESCAPED_HREF_SRC: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?is)(.)(href\\\\[\"']|src\\\\[\"'])([:=])(\\\\[\"'])([^\"'\\\\]+)\\\\[\"'](.)\"#).unwrap());\n\nstatic RE_META_REFRESH: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?i)(<meta[^>]*url=)([^\"']+)([\"'][^>]*>)\"#).unwrap());\n\nstatic RE_PORT_NORMALIZE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)((https?:)?//[a-z0-9._-]+):[0-9]+\").unwrap());\n\nstatic RE_CLOSE_HEAD: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)<\\s*/\\s*head\\s*>\").unwrap());\n\nstatic RE_CLOSE_BODY: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)<\\s*/\\s*body\\s*>\").unwrap());\n\nstatic RE_NON_HTTP_SCHEME: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)^[a-z]+:[a-z0-9+]\").unwrap());\n\nstatic RE_EXTERNAL_SCRIPT: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?is)<script[^>]*src=[\"']?(.*?)[\"']?[^>]*>.*?</script>\"#).unwrap());\n\nstatic RE_EXTERNAL_URL: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)^(https?:)?//\").unwrap());\n\nstatic RE_CROSSORIGIN_LINK: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?i)(<link[^>]+)\\s*crossorigin(\\s*=\\s*[\"']?.*?[\"']?)?(\\s*[^>]*>)\"#).unwrap());\n\nstatic RE_CROSSORIGIN_SCRIPT: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?i)(<script[^>]+)\\s*crossorigin(\\s*=\\s*[\"']?.*?[\"']?)?(\\s*[^>]*>)\"#).unwrap());\n\nstatic RE_SCRIPT_BLOCK: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?is)<script[^>]*>(.*?)</script>\").unwrap());\n\nstatic RE_SOCNET_IFRAME: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r\"(?is)<iframe[^>]*(facebook\\.com|twitter\\.com|linkedin\\.com)[^>]*>.*?</iframe>\").unwrap());\n\npub struct HtmlProcessor {\n    config: ProcessorConfig,\n    debug_mode: bool,\n    relevant_content_types: Vec<ContentTypeId>,\n}\n\nimpl HtmlProcessor {\n    pub fn new(config: ProcessorConfig) -> Self {\n        Self {\n            config,\n            debug_mode: false,\n            relevant_content_types: vec![ContentTypeId::Html, ContentTypeId::Redirect],\n        }\n    }\n\n    /// Find <a href> URLs\n    fn find_href_urls(&self, html: &str, source_url: &ParsedUrl, found_urls: &mut FoundUrls) {\n        let source_url_str = source_url.get_full_url(true, false);\n\n        // Standard <a href=\"...\"> links\n        let mut urls: Vec<String> = Vec::new();\n        for caps in RE_A_HREF.captures_iter(html) {\n            if let Some(m) = caps.get(1).or_else(|| caps.get(2)) {\n                urls.push(m.as_str().to_string());\n            }\n        }\n\n        // Escaped href URLs (e.g., href\\\":\\\")\n        for caps in RE_ESCAPED_HREF.captures_iter(html) {\n            if let Some(m) = caps.get(1) {\n                urls.push(m.as_str().to_string());\n            }\n        }\n\n        // If single_foreign_page is set and source is on a different 2nd-level domain, skip\n        if self.config.single_foreign_page && source_url.domain_2nd_level != self.config.initial_url.domain_2nd_level {\n            return;\n        }\n\n        // Filter by max depth\n        if self.config.max_depth > 0 {\n            urls.retain(|url_str| {\n                let parsed = ParsedUrl::parse(url_str, Some(source_url));\n                parsed.get_depth() <= self.config.max_depth as usize\n            });\n        }\n\n        // Filter out files if files are disabled\n        if !self.config.files_enabled {\n            urls.retain(|url_str| !RE_FILE_EXTENSION.is_match(url_str) || HTML_EXT_REGEX.is_match(url_str));\n        }\n\n        let url_refs: Vec<&str> = urls.iter().map(|s| s.as_str()).collect();\n        found_urls.add_urls_from_text_array(&url_refs, &source_url_str, UrlSource::AHref);\n    }\n\n    /// Find font URLs in CSS and link tags\n    fn find_fonts(&self, html: &str, source_url: &ParsedUrl, found_urls: &mut FoundUrls) {\n        let source_url_str = source_url.get_full_url(true, false);\n\n        // CSS @font-face url()\n        let font_urls: Vec<&str> = RE_FONT_URL\n            .captures_iter(html)\n            .filter_map(|caps| caps.get(1).map(|m| m.as_str()))\n            .collect();\n        found_urls.add_urls_from_text_array(&font_urls, &source_url_str, UrlSource::CssUrl);\n\n        // <link href=\"...(font extensions)\"\n        let link_fonts: Vec<&str> = RE_FONT_LINK\n            .captures_iter(html)\n            .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)).map(|m| m.as_str()))\n            .collect();\n        found_urls.add_urls_from_text_array(&link_fonts, &source_url_str, UrlSource::LinkHref);\n    }\n\n    /// Find image URLs from various sources\n    fn find_images(&self, html: &str, source_url: &ParsedUrl, found_urls: &mut FoundUrls) {\n        let source_url_str = source_url.get_full_url(true, false);\n\n        // <img src=\"...\"\n        let img_srcs: Vec<&str> = RE_IMG_SRC\n            .captures_iter(html)\n            .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)).map(|m| m.as_str()))\n            .collect();\n        found_urls.add_urls_from_text_array(&img_srcs, &source_url_str, UrlSource::ImgSrc);\n\n        // <img data-src=\"...\" (lazy loading)\n        let data_srcs: Vec<&str> = RE_IMG_DATA_SRC\n            .captures_iter(html)\n            .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)).map(|m| m.as_str()))\n            .collect();\n        found_urls.add_urls_from_text_array(&data_srcs, &source_url_str, UrlSource::ImgSrc);\n\n        // <input src=\"...\"\n        let input_srcs: Vec<&str> = RE_INPUT_SRC\n            .captures_iter(html)\n            .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)).map(|m| m.as_str()))\n            .collect();\n        found_urls.add_urls_from_text_array(&input_srcs, &source_url_str, UrlSource::InputSrc);\n\n        // <link href=\"...(image extensions)\"\n        let link_imgs: Vec<&str> = RE_LINK_IMAGE\n            .captures_iter(html)\n            .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)).map(|m| m.as_str()))\n            .collect();\n        found_urls.add_urls_from_text_array(&link_imgs, &source_url_str, UrlSource::LinkHref);\n\n        // <source src=\"...\"\n        let source_srcs: Vec<&str> = RE_SOURCE_SRC\n            .captures_iter(html)\n            .filter_map(|caps| caps.get(1).map(|m| m.as_str()))\n            .collect();\n        found_urls.add_urls_from_text_array(&source_srcs, &source_url_str, UrlSource::SourceSrc);\n\n        // CSS url() with image extensions\n        let css_imgs: Vec<&str> = RE_CSS_URL_IMAGE\n            .captures_iter(html)\n            .filter_map(|caps| caps.get(1).map(|m| m.as_str()))\n            .collect();\n        found_urls.add_urls_from_text_array(&css_imgs, &source_url_str, UrlSource::CssUrl);\n\n        // srcset from <source>, <img>, and imagesrcset\n        let mut srcset_urls: Vec<String> = Vec::new();\n\n        let mut srcset_values: Vec<&str> = Vec::new();\n        for caps in RE_SOURCE_SRCSET.captures_iter(html) {\n            if let Some(m) = caps.get(1) {\n                srcset_values.push(m.as_str());\n            }\n        }\n        for caps in RE_IMG_SRCSET.captures_iter(html) {\n            if let Some(m) = caps.get(1) {\n                srcset_values.push(m.as_str());\n            }\n        }\n        for caps in RE_IMAGESRCSET.captures_iter(html) {\n            if let Some(m) = caps.get(1) {\n                srcset_values.push(m.as_str());\n            }\n        }\n\n        for srcset in &srcset_values {\n            // srcset sources are separated by \", \" (comma+space)\n            for source in srcset.split(\", \") {\n                let trimmed = source.trim();\n                if trimmed.is_empty() {\n                    continue;\n                }\n                // Split by whitespace to separate URL from size descriptor\n                let url_part = trimmed.split_whitespace().next().unwrap_or(\"\");\n                let url_trimmed = url_part.trim().to_string();\n                if !url_trimmed.is_empty() && !srcset_urls.contains(&url_trimmed) {\n                    srcset_urls.push(url_trimmed);\n                }\n            }\n        }\n\n        let srcset_refs: Vec<&str> = srcset_urls.iter().map(|s| s.as_str()).collect();\n        found_urls.add_urls_from_text_array(&srcset_refs, &source_url_str, UrlSource::ImgSrcset);\n    }\n\n    /// Find audio URLs\n    fn find_audio(&self, html: &str, source_url: &ParsedUrl, found_urls: &mut FoundUrls) {\n        let source_url_str = source_url.get_full_url(true, false);\n        let urls: Vec<&str> = RE_AUDIO_SRC\n            .captures_iter(html)\n            .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)).map(|m| m.as_str()))\n            .collect();\n        found_urls.add_urls_from_text_array(&urls, &source_url_str, UrlSource::AudioSrc);\n    }\n\n    /// Find video URLs\n    fn find_video(&self, html: &str, source_url: &ParsedUrl, found_urls: &mut FoundUrls) {\n        let source_url_str = source_url.get_full_url(true, false);\n        let urls: Vec<&str> = RE_VIDEO_SRC\n            .captures_iter(html)\n            .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)).map(|m| m.as_str()))\n            .collect();\n        found_urls.add_urls_from_text_array(&urls, &source_url_str, UrlSource::VideoSrc);\n    }\n\n    /// Find script URLs from <script src>, <link href=\".js\">, .src= assignments, and NextJS chunks\n    fn find_scripts(&self, html: &str, source_url: &ParsedUrl, found_urls: &mut FoundUrls) {\n        let source_url_str = source_url.get_full_url(true, false);\n\n        // <script src=\"...\"\n        let script_srcs: Vec<&str> = RE_SCRIPT_SRC\n            .captures_iter(html)\n            .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)).map(|m| m.as_str()))\n            .collect();\n        found_urls.add_urls_from_text_array(&script_srcs, &source_url_str, UrlSource::ScriptSrc);\n\n        // <link href=\"...(json|js)\"\n        let link_js: Vec<&str> = RE_LINK_JS\n            .captures_iter(html)\n            .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)).map(|m| m.as_str()))\n            .collect();\n        found_urls.add_urls_from_text_array(&link_js, &source_url_str, UrlSource::LinkHref);\n\n        // .src = \"...\" (lazy loading in JS)\n        let dot_srcs: Vec<&str> = RE_DOT_SRC\n            .captures_iter(html)\n            .filter_map(|caps| caps.get(1).map(|m| m.as_str()))\n            .collect();\n        found_urls.add_urls_from_text_array(&dot_srcs, &source_url_str, UrlSource::InlineScriptSrc);\n\n        // NextJS chunks\n        let mut next_js_chunks: Vec<String> = Vec::new();\n        for caps in RE_NEXTJS_CHUNKS.captures_iter(html) {\n            if let Some(m) = caps.get(1) {\n                let matched = m.as_str();\n                let chunk_url = if matched.starts_with(\"//\") {\n                    format!(\"{}:{}\", source_url.scheme.as_deref().unwrap_or(\"https\"), matched)\n                } else if matched.starts_with(\"http://\") || matched.starts_with(\"https://\") {\n                    matched.to_string()\n                } else if matched.contains(\"/_next/\") {\n                    let mut url = matched.to_string();\n                    if source_url.host.is_some() && source_url.host != self.config.initial_url.host {\n                        url = format!(\"{}{}\", source_url.get_full_homepage_url(), url);\n                    }\n                    url\n                } else {\n                    format!(\"{}/_next/{}\", source_url.get_full_homepage_url(), matched)\n                };\n                next_js_chunks.push(chunk_url);\n            }\n        }\n        let chunk_refs: Vec<&str> = next_js_chunks.iter().map(|s| s.as_str()).collect();\n        found_urls.add_urls_from_text_array(&chunk_refs, &source_url_str, UrlSource::InlineScriptSrc);\n    }\n\n    /// Find stylesheet URLs from <link> tags with rel=\"stylesheet\"\n    fn find_stylesheets(&self, html: &str, source_url: &ParsedUrl, found_urls: &mut FoundUrls) {\n        let source_url_str = source_url.get_full_url(true, false);\n\n        let mut stylesheet_urls: Vec<String> = Vec::new();\n        for caps in RE_LINK_STYLESHEET.captures_iter(html) {\n            let full_match = caps.get(0).map(|m| m.as_str()).unwrap_or(\"\");\n            if let Some(href) = caps.get(1) {\n                // Only include if no rel= attribute or rel=\"stylesheet\"\n                let full_lower = full_match.to_lowercase();\n                if !full_lower.contains(\"rel=\") || full_lower.contains(\"stylesheet\") {\n                    stylesheet_urls.push(href.as_str().to_string());\n                }\n            }\n        }\n\n        let url_refs: Vec<&str> = stylesheet_urls.iter().map(|s| s.as_str()).collect();\n        found_urls.add_urls_from_text_array(&url_refs, &source_url_str, UrlSource::LinkHref);\n    }\n\n    /// Remove all unwanted code from HTML with respect to --disable-* options\n    fn remove_unwanted_code_from_html(&self, html: &str) -> String {\n        let mut result = html.to_string();\n\n        if !self.config.scripts_enabled {\n            result = utils::strip_javascript(&result);\n        }\n        if !self.config.styles_enabled {\n            result = utils::strip_styles(&result);\n        }\n        if !self.config.fonts_enabled {\n            result = utils::strip_fonts(&result);\n        }\n        if !self.config.images_enabled && result.to_lowercase().contains(\"<img\") {\n            result = utils::strip_images(&result, None);\n            result = self.set_custom_css_for_tile_images(&result);\n            result = utils::add_class_to_html_images(&result, \"siteone-crawler-bg\");\n        }\n\n        result\n    }\n\n    /// Add custom CSS for placeholder tile images\n    fn set_custom_css_for_tile_images(&self, html: &str) -> String {\n        let background_base64 = \"iVBORw0KGgoAAAANSUhEUgAAAEAAAAAkCAMAAAAO0sygAAAAAXNSR0IB2cksfwAAAAlwSFlzAAALEwAACxMBAJqcGAAAAMlQTFRFFxcXwMDA////1NTU5+fnpaWl0tLSIyMj5ubmlJSUxcXF29vbz8/P9PT01tbWxMTE8fHxaWlp39/f9fX1yMjI3NzciYmJeXl5Gxsb2dnZNTU18/PzXFxc5eXlJycnysrKZGRk6enp3d3dW1tbsrKyWFhYIiIi19fXvLy8w8PDuLi47e3tzMzM0dHRx8fH09PTHR0dzs7OLy8vwcHB0NDQSEhIqamp4uLiHh4eOzs74ODg3t7ewsLCISEhJCQkaGhoy8vLzc3N2NjYEPdgjAAAAaRJREFUeJzdlWlTgzAQhiGlWlsCCHog9vAWpalavK3X//9RJptACoEwTp1x9MPOht19k+VJCIZhIvNXzVh1DmPVHowfe5cOsrpr5dh6D1kb/VJs0LWQ3cAAO7gyp0tjXinGavBmPQOf5gaVvgIaC5eet5jeceoZbNPcjhjvcu9GGHl7sjYGfbXP3HyaG/Dx/pD7UYDwOFT0ThuDSYwOahgcCn0rgyNWZykMQNtpYXBM9+wkhtreKZ8zZ8D52RoGEeT6E9EntTPmzxP5/mEIXscAX8SF/hL6TSEHc6VTRGbNDMyrYaElRPRxncr1bVpzEzczyHugNjeIGHO9ydbNMjomGgbUUq5PlDMzp3p5FpoYmLei77sERUJ//yBy0wy8lkFf8s/XV/rVMXjk9VahnYF/KvWrY/AMuZdFrvdQCHtPSnVt52BOfa/YP6LcBzoGfj73K1s/q70PtAzYt/DGx8P3Dx6r3Ad6Br6ce2GLmLwPknGAgigAPfNBFDffB6WzKRiMvGJfK/urMlgyycBV9FjDoLAlBl5V/9nwPXzb/tO/8e8y+AJh0S3ETlwQiAAAAABJRU5ErkJggg==\";\n\n        RE_CLOSE_HEAD\n            .replace(\n                html,\n                &format!(\n                    \"<style>\\n\\\n                .siteone-crawler-bg {{\\n\\\n                    background-image: url(\\\"data:image/png;base64,{}\\\");\\n\\\n                    background-repeat: repeat;\\n\\\n                    opacity: 0.15;\\n\\\n                }}\\n\\\n            </style></head>\",\n                    background_base64\n                ),\n            )\n            .to_string()\n    }\n\n    /// Set JS variable _SiteOneUrlDepth with number of levels before </head>\n    fn set_js_variable_with_url_depth(&self, html: &str, base_url: &str) -> String {\n        let base_path = if let Ok(parsed) = url::Url::parse(base_url) {\n            parsed.path().to_string()\n        } else {\n            \"/\".to_string()\n        };\n\n        let trimmed = base_path.trim_start_matches('/');\n        let mut depth = trimmed.matches('/').count();\n\n        let needs_index_html = base_path != \"/\" && base_path.ends_with('/');\n        if needs_index_html {\n            depth += 1;\n        }\n\n        RE_CLOSE_HEAD\n            .replace(\n                html,\n                &format!(\n                    \"<script>var {} = {};</script></head>\",\n                    JS_VARIABLE_NAME_URL_DEPTH, depth\n                ),\n            )\n            .to_string()\n    }\n\n    /// Set JS function to remove all anchor listeners before </body>\n    fn set_js_function_to_remove_all_anchor_listeners(&self, html: &str) -> String {\n        RE_CLOSE_BODY\n            .replace(\n                html,\n                concat!(\n                    \"<script>\\n\",\n                    \"function _SiteOneRemoveAllAnchorListeners(){\\n\",\n                    \"    var anchors=document.getElementsByTagName('a');\\n\",\n                    \"    for(var i=0;i<anchors.length;i++){\\n\",\n                    \"        var anchor=anchors[i];\\n\",\n                    \"        var newAnchor=anchor.cloneNode(true);\\n\",\n                    \"        anchor.parentNode.replaceChild(newAnchor,anchor);\\n\",\n                    \"    }\\n\",\n                    \"}\\n\",\n                    \"setTimeout(_SiteOneRemoveAllAnchorListeners, 200);\\n\",\n                    \"setTimeout(_SiteOneRemoveAllAnchorListeners, 1000);\\n\",\n                    \"setTimeout(_SiteOneRemoveAllAnchorListeners, 5000);\\n\",\n                    \"</script></body>\",\n                ),\n            )\n            .to_string()\n    }\n\n    /// Remove scheme and host from full origin URLs to simplify relative paths conversion\n    fn remove_schema_and_host_from_full_origin_urls(&self, url: &ParsedUrl, content: &str) -> String {\n        static RE_BASE_URL_ROOT: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)((https?:)?//[^/]+/?).*\").unwrap());\n\n        let full_url = url.get_full_url(true, false);\n        // Extract base URL root (scheme://host/)\n        let base_url_root = RE_BASE_URL_ROOT.replace(&full_url, \"$1\").to_string();\n\n        let mut result = content.to_string();\n\n        // Normalize port numbers\n        result = RE_PORT_NORMALIZE.replace_all(&result, \"$1\").to_string();\n\n        // Build patterns for href=, src=, url=, url( attributes\n        let escaped_root = regex::escape(&base_url_root);\n        let attr_patterns = [\n            format!(r#\"(?i)(href=([\"'])){esc}([^\"']*)([\"'])\"#, esc = escaped_root),\n            format!(r#\"(?i)(src=([\"'])){esc}([^\"']*)([\"'])\"#, esc = escaped_root),\n            format!(r#\"(?i)(url=([\"'])){esc}([^\"']*)([\"'])\"#, esc = escaped_root),\n            format!(r#\"(?i)(url\\(([\"']?)){esc}([^\"')]*)([\"']\\)|\\))\"#, esc = escaped_root),\n        ];\n\n        for pattern in &attr_patterns {\n            if let Ok(re) = Regex::new(pattern) {\n                let compiled_ignore = &self.config.compiled_ignore_regex;\n                result = re\n                    .replace_all(&result, |caps: &regex::Captures| {\n                        let full_match = caps.get(0).map_or(\"\", |m| m.as_str());\n\n                        // Check against pre-compiled ignore patterns\n                        for ire in compiled_ignore {\n                            if ire.is_match(full_match) {\n                                return full_match.to_string();\n                            }\n                        }\n\n                        let attr_start = caps.get(1).map_or(\"\", |m| m.as_str());\n                        let path = caps.get(3).map_or(\"\", |m| m.as_str());\n                        let attr_end = caps.get(4).map_or(\"\", |m| m.as_str());\n\n                        format!(\"{}/{}{}\", attr_start, path, attr_end)\n                    })\n                    .to_string();\n            }\n        }\n\n        result\n    }\n\n    /// Update all HTML paths to relative for offline version\n    fn update_html_paths_to_relative(&self, html: &str, parsed_base_url: &ParsedUrl) -> String {\n        let initial_url = &self.config.initial_url;\n        let compiled_ignore = &self.config.compiled_ignore_regex;\n\n        let replace_callback = |caps: &regex::Captures| -> String {\n            let full_match = caps.get(0).map_or(\"\", |m| m.as_str());\n            let start = caps.get(1).map_or(\"\", |m| m.as_str());\n            let attribute_raw = caps.get(2).map_or(\"\", |m| m.as_str());\n            let attribute = attribute_raw.trim_matches(|c: char| c == ' ' || c == '\\\\' || c == '\"' || c == '\\'');\n            let assignment_char = caps.get(3).map_or(\"\", |m| m.as_str());\n            let quote = caps.get(4).map_or(\"\", |m| m.as_str());\n            // Decode HTML entities in URL values (fixes Astro image query params like &#38; → &)\n            let value_raw = caps.get(5).map_or(\"\", |m| m.as_str());\n            let value_decoded = html_entity_decode(value_raw);\n            let value = value_decoded.as_str();\n            let end = caps.get(6).map_or(\"\", |m| m.as_str());\n\n            // When modifying x.src (JS) and there is no quote, do not convert\n            if start == \".\" && quote.is_empty() {\n                return full_match.to_string();\n            }\n\n            // Ignore data URI, anchor, or non-http scheme\n            if value.starts_with('#') || RE_NON_HTTP_SCHEME.is_match(value) {\n                return full_match.to_string();\n            }\n\n            // Check against pre-compiled ignore regex patterns\n            for ire in compiled_ignore {\n                if ire.is_match(value) {\n                    return full_match.to_string();\n                }\n            }\n\n            let attr_lower = attribute.to_lowercase();\n            let new_value = if attr_lower == \"srcset\" || attr_lower == \"imagesrcset\" {\n                // Handle srcset: multiple sources separated by \", \"\n                let sources: Vec<&str> = value.split(\", \").collect();\n                let converted: Vec<String> = sources\n                    .iter()\n                    .map(|source| {\n                        let trimmed = source.trim();\n                        if !trimmed.contains(' ') {\n                            // URL without size descriptor\n                            convert_url_to_relative(\n                                parsed_base_url,\n                                trimmed,\n                                initial_url,\n                                Some(&attr_lower),\n                                self.config.offline_export_preserve_urls,\n                            )\n                        } else {\n                            // URL with size descriptor (e.g., \"url 2x\")\n                            let mut parts = trimmed.splitn(2, char::is_whitespace);\n                            let url_part = parts.next().unwrap_or(\"\");\n                            let size_part = parts.next().unwrap_or(\"\");\n                            let relative_url = convert_url_to_relative(\n                                parsed_base_url,\n                                url_part,\n                                initial_url,\n                                Some(&attr_lower),\n                                self.config.offline_export_preserve_urls,\n                            );\n                            format!(\"{} {}\", relative_url, size_part)\n                        }\n                    })\n                    .collect();\n                converted.join(\", \")\n            } else {\n                let mut converted = convert_url_to_relative(\n                    parsed_base_url,\n                    value,\n                    initial_url,\n                    Some(attribute),\n                    self.config.offline_export_preserve_urls,\n                );\n\n                // Handle component-url and renderer-url (Astro)\n                if attribute == \"component-url\" || attribute == \"renderer-url\" {\n                    converted = format!(\"./{}\", converted);\n                }\n\n                converted\n            };\n\n            format!(\n                \"{}{}{}{}{}{}{}\",\n                start, attribute_raw, assignment_char, quote, new_value, quote, end\n            )\n        };\n\n        let mut result = html.to_string();\n        result = RE_HREF_SRC.replace_all(&result, replace_callback).to_string();\n        result = RE_SRCSET_ATTR.replace_all(&result, replace_callback).to_string();\n        result = RE_META_URL.replace_all(&result, replace_callback).to_string();\n        result = RE_ESCAPED_HREF_SRC.replace_all(&result, replace_callback).to_string();\n        result\n    }\n\n    /// Apply specific HTML changes for offline version\n    #[allow(clippy::too_many_arguments)]\n    fn apply_specific_html_changes(\n        &self,\n        html: &mut String,\n        parsed_base_url: &ParsedUrl,\n        remove_external_js: bool,\n        remove_cross_origins: bool,\n        remove_analytics: bool,\n        remove_socnets: bool,\n        remove_cookies_related: bool,\n    ) {\n        if html.trim().is_empty() {\n            return;\n        }\n\n        let base_host = parsed_base_url.host.as_deref().unwrap_or(\"\");\n\n        // Remove external JS\n        if remove_external_js {\n            let base_host_owned = base_host.to_string();\n            *html = RE_EXTERNAL_SCRIPT\n                .replace_all(html, |caps: &regex::Captures| {\n                    let full_match = caps.get(0).map_or(\"\", |m| m.as_str());\n                    let src = caps.get(1).map_or(\"\", |m| m.as_str());\n\n                    if RE_EXTERNAL_URL.is_match(src) {\n                        // Parse host from the src URL\n                        let parsed_src = if src.starts_with(\"//\") {\n                            format!(\"https:{}\", src)\n                        } else {\n                            src.to_string()\n                        };\n                        if let Ok(parsed) = url::Url::parse(&parsed_src)\n                            && parsed.host_str().unwrap_or(\"\") != base_host_owned\n                        {\n                            return String::new();\n                        }\n                    }\n                    full_match.to_string()\n                })\n                .to_string();\n        }\n\n        // Remove crossorigin attributes\n        if remove_cross_origins {\n            *html = RE_CROSSORIGIN_LINK.replace_all(html, \"$1$3\").to_string();\n            *html = RE_CROSSORIGIN_SCRIPT.replace_all(html, \"$1$3\").to_string();\n        }\n\n        // Remove analytics and social network scripts\n        if remove_analytics || remove_socnets || remove_cookies_related {\n            let mut patterns: Vec<&str> = Vec::new();\n\n            if remove_analytics {\n                patterns.extend_from_slice(&[\n                    \"googletagmanager.com\",\n                    \"google-analytics.com\",\n                    \"ga.js\",\n                    \"gtag.js\",\n                    \"gtag(\",\n                    \"analytics.\",\n                    \"connect.facebook.net\",\n                    \"fbq(\",\n                ]);\n            }\n\n            if remove_socnets {\n                patterns.extend_from_slice(&[\n                    \"connect.facebook.net\",\n                    \"connect.facebook.com\",\n                    \"twitter.com\",\n                    \".x.com\",\n                    \"linkedin.com\",\n                    \"instagram.com\",\n                    \"pinterest.com\",\n                    \"tumblr.com\",\n                    \"plus.google.com\",\n                    \"curator.io\",\n                ]);\n            }\n\n            if remove_cookies_related {\n                patterns.extend_from_slice(&[\"cookies\", \"cookiebot\"]);\n            }\n\n            // Deduplicate\n            patterns.sort();\n            patterns.dedup();\n\n            *html = RE_SCRIPT_BLOCK\n                .replace_all(html, |caps: &regex::Captures| {\n                    let full_match = caps.get(0).map_or(\"\", |m| m.as_str());\n                    let full_lower = full_match.to_lowercase();\n\n                    for keyword in &patterns {\n                        if full_lower.contains(&keyword.to_lowercase()) {\n                            return String::new();\n                        }\n                    }\n\n                    full_match.to_string()\n                })\n                .to_string();\n\n            // Remove social network iframes\n            if remove_socnets {\n                *html = RE_SOCNET_IFRAME.replace_all(html, \"\").to_string();\n            }\n        }\n    }\n\n    /// Check if anchor listener removal is forced (e.g., for NextJS sites)\n    fn is_forced_to_remove_anchor_listeners(&self, html: &str) -> bool {\n        html.contains(\"_next/\")\n    }\n}\n\nimpl ContentProcessor for HtmlProcessor {\n    fn find_urls(&self, content: &str, source_url: &ParsedUrl) -> Option<FoundUrls> {\n        let mut found_urls = FoundUrls::new();\n\n        if !self.config.single_page {\n            self.find_href_urls(content, source_url, &mut found_urls);\n        }\n\n        if self.config.fonts_enabled {\n            self.find_fonts(content, source_url, &mut found_urls);\n        }\n\n        if self.config.images_enabled {\n            self.find_images(content, source_url, &mut found_urls);\n        }\n\n        if self.config.files_enabled {\n            self.find_audio(content, source_url, &mut found_urls);\n            self.find_video(content, source_url, &mut found_urls);\n        }\n\n        if self.config.scripts_enabled {\n            self.find_scripts(content, source_url, &mut found_urls);\n        }\n\n        if self.config.styles_enabled {\n            self.find_stylesheets(content, source_url, &mut found_urls);\n        }\n\n        if found_urls.get_count() > 0 {\n            Some(found_urls)\n        } else {\n            None\n        }\n    }\n\n    fn apply_content_changes_before_url_parsing(\n        &self,\n        _content: &mut String,\n        _content_type: ContentTypeId,\n        _url: &ParsedUrl,\n    ) {\n        // No changes needed before URL parsing in HtmlProcessor\n    }\n\n    fn apply_content_changes_for_offline_version(\n        &self,\n        content: &mut String,\n        _content_type: ContentTypeId,\n        url: &ParsedUrl,\n        remove_unwanted_code: bool,\n    ) {\n        let base_url = url.get_full_url(true, false);\n\n        // Remove schema and host from full origin URLs\n        *content = self.remove_schema_and_host_from_full_origin_urls(url, content);\n\n        // Remove unwanted code from HTML\n        *content = self.remove_unwanted_code_from_html(content);\n\n        // Update all paths to relative\n        *content = self.update_html_paths_to_relative(content, url);\n\n        // Meta redirects (e.g., in Astro projects)\n        if let Some(caps) = RE_META_REFRESH.captures(content) {\n            let full_match = caps.get(0).map_or(\"\", |m| m.as_str());\n            let prefix = caps.get(1).map_or(\"\", |m| m.as_str());\n            let meta_url = caps.get(2).map_or(\"\", |m| m.as_str());\n            let suffix = caps.get(3).map_or(\"\", |m| m.as_str());\n\n            let relative = convert_url_to_relative(\n                url,\n                meta_url,\n                &self.config.initial_url,\n                None,\n                self.config.offline_export_preserve_urls,\n            );\n            *content = content.replace(full_match, &format!(\"{}{}{}\", prefix, relative, suffix));\n        }\n\n        // Apply specific HTML changes\n        self.apply_specific_html_changes(\n            content,\n            url,\n            self.config.disable_javascript,\n            remove_unwanted_code,\n            remove_unwanted_code,\n            remove_unwanted_code,\n            remove_unwanted_code,\n        );\n\n        // Set JS variable and remove anchor listeners\n        if self.config.scripts_enabled {\n            if !self.config.offline_export_preserve_urls {\n                *content = self.set_js_variable_with_url_depth(content, &base_url);\n            }\n            if self.config.remove_all_anchor_listeners || self.is_forced_to_remove_anchor_listeners(content) {\n                *content = self.set_js_function_to_remove_all_anchor_listeners(content);\n            }\n        }\n    }\n\n    fn is_content_type_relevant(&self, content_type: ContentTypeId) -> bool {\n        is_relevant(content_type, &self.relevant_content_types)\n    }\n\n    fn get_name(&self) -> &str {\n        \"HtmlProcessor\"\n    }\n\n    fn set_debug_mode(&mut self, debug_mode: bool) {\n        self.debug_mode = debug_mode;\n    }\n}\n\n/// Decode HTML entities in URL attribute values.\n/// Single-pass implementation to avoid double-decoding (e.g. `&#38;amp;` → `&amp;`, not `&`).\nfn html_entity_decode(input: &str) -> String {\n    let mut result = String::with_capacity(input.len());\n    let bytes = input.as_bytes();\n    let len = bytes.len();\n    let mut i = 0;\n\n    while i < len {\n        if bytes[i] == b'&' {\n            // Try to match a named or numeric entity\n            if let Some((decoded, advance)) = try_decode_entity(&input[i..]) {\n                result.push_str(decoded);\n                i += advance;\n                continue;\n            }\n        }\n        result.push(input[i..].chars().next().unwrap());\n        i += input[i..].chars().next().unwrap().len_utf8();\n    }\n\n    result\n}\n\n/// Try to decode a single HTML entity at the start of `s`. Returns (decoded, bytes_consumed).\nfn try_decode_entity(s: &str) -> Option<(&'static str, usize)> {\n    // Named entities\n    for (entity, decoded) in [\n        (\"&amp;\", \"&\"),\n        (\"&lt;\", \"<\"),\n        (\"&gt;\", \">\"),\n        (\"&quot;\", \"\\\"\"),\n        (\"&apos;\", \"'\"),\n    ] {\n        if s.starts_with(entity) {\n            return Some((decoded, entity.len()));\n        }\n    }\n\n    // Numeric entities (decimal and hex)\n    for (entity, decoded) in [\n        (\"&#38;\", \"&\"),\n        (\"&#x26;\", \"&\"),\n        (\"&#60;\", \"<\"),\n        (\"&#x3C;\", \"<\"),\n        (\"&#x3c;\", \"<\"),\n        (\"&#62;\", \">\"),\n        (\"&#x3E;\", \">\"),\n        (\"&#x3e;\", \">\"),\n        (\"&#34;\", \"\\\"\"),\n        (\"&#x22;\", \"\\\"\"),\n        (\"&#39;\", \"'\"),\n        (\"&#x27;\", \"'\"),\n        (\"&#039;\", \"'\"),\n    ] {\n        if s.starts_with(entity) {\n            return Some((decoded, entity.len()));\n        }\n    }\n\n    None\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    fn make_config() -> ProcessorConfig {\n        ProcessorConfig::new(ParsedUrl::parse(\"https://example.com/\", None))\n    }\n\n    #[test]\n    fn test_find_href_urls() {\n        let processor = HtmlProcessor::new(make_config());\n        let html = r#\"<html><body><a href=\"/about\">About</a><a href=\"/contact\">Contact</a></body></html>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/\", None);\n        let result = processor.find_urls(html, &source);\n        assert!(result.is_some());\n        let found = result.unwrap();\n        assert!(found.get_count() >= 2);\n    }\n\n    #[test]\n    fn test_find_images() {\n        let processor = HtmlProcessor::new(make_config());\n        let html = r#\"<html><body><img src=\"/img/logo.png\"><img data-src=\"/img/lazy.jpg\"></body></html>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/\", None);\n        let result = processor.find_urls(html, &source);\n        assert!(result.is_some());\n    }\n\n    #[test]\n    fn test_find_scripts() {\n        let processor = HtmlProcessor::new(make_config());\n        let html = r#\"<html><head><script src=\"/js/app.js\"></script></head></html>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/\", None);\n        let result = processor.find_urls(html, &source);\n        assert!(result.is_some());\n    }\n\n    #[test]\n    fn test_single_page_no_hrefs() {\n        let mut config = make_config();\n        config.single_page = true;\n        let processor = HtmlProcessor::new(config);\n        let html = r#\"<html><body><a href=\"/about\">About</a><script src=\"/app.js\"></script></body></html>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/\", None);\n        let result = processor.find_urls(html, &source);\n        assert!(result.is_some());\n        // Should only find script, not href\n        let found = result.unwrap();\n        for (_key, url) in found.get_urls() {\n            assert_ne!(url.url, \"/about\");\n        }\n    }\n\n    #[test]\n    fn test_find_srcset() {\n        let processor = HtmlProcessor::new(make_config());\n        let html = r#\"<img srcset=\"/img/small.jpg 1x, /img/large.jpg 2x\">\"#;\n        let source = ParsedUrl::parse(\"https://example.com/\", None);\n        let result = processor.find_urls(html, &source);\n        assert!(result.is_some());\n    }\n\n    #[test]\n    fn test_spaces_in_quoted_img_src() {\n        let processor = HtmlProcessor::new(make_config());\n        let html = r#\"<html><body><img src=\"/images/dir with spaces/file with spaces.png?ver=1.0\"></body></html>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/\", None);\n        let found = processor.find_urls(html, &source).unwrap();\n        let urls: Vec<&str> = found.get_urls().iter().map(|(_, u)| u.url.as_str()).collect();\n        assert!(\n            urls.iter()\n                .any(|u| u.contains(\"dir%20with%20spaces/file%20with%20spaces.png\")),\n            \"img src with spaces in quoted attribute should be captured. Found: {:?}\",\n            urls\n        );\n    }\n\n    #[test]\n    fn test_spaces_in_quoted_a_href() {\n        let processor = HtmlProcessor::new(make_config());\n        let html = r#\"<html><body><a href=\"/pages/page two.html\">Link</a></body></html>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/\", None);\n        let found = processor.find_urls(html, &source).unwrap();\n        let urls: Vec<&str> = found.get_urls().iter().map(|(_, u)| u.url.as_str()).collect();\n        assert!(\n            urls.iter().any(|u| u.contains(\"page%20two.html\")),\n            \"a href with spaces in quoted attribute should be captured. Found: {:?}\",\n            urls\n        );\n    }\n\n    #[test]\n    fn test_spaces_in_quoted_script_src() {\n        let processor = HtmlProcessor::new(make_config());\n        let html = r#\"<html><head><script src=\"/js/my script.js\"></script></head></html>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/\", None);\n        let found = processor.find_urls(html, &source).unwrap();\n        let urls: Vec<&str> = found.get_urls().iter().map(|(_, u)| u.url.as_str()).collect();\n        assert!(\n            urls.iter().any(|u| u.contains(\"my%20script.js\")),\n            \"script src with spaces in quoted attribute should be captured. Found: {:?}\",\n            urls\n        );\n    }\n\n    #[test]\n    fn test_unquoted_src_still_works() {\n        let processor = HtmlProcessor::new(make_config());\n        let html = r#\"<html><body><img src=logo.png></body></html>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/\", None);\n        let found = processor.find_urls(html, &source).unwrap();\n        let urls: Vec<&str> = found.get_urls().iter().map(|(_, u)| u.url.as_str()).collect();\n        assert!(\n            urls.iter().any(|u| u.contains(\"logo.png\")),\n            \"unquoted img src should still be captured. Found: {:?}\",\n            urls\n        );\n    }\n\n    #[test]\n    fn test_single_quoted_src_with_spaces() {\n        let processor = HtmlProcessor::new(make_config());\n        let html = r#\"<html><body><img src='/images/dir with spaces/another file.jpg'></body></html>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/\", None);\n        let found = processor.find_urls(html, &source).unwrap();\n        let urls: Vec<&str> = found.get_urls().iter().map(|(_, u)| u.url.as_str()).collect();\n        assert!(\n            urls.iter().any(|u| u.contains(\"another%20file.jpg\")),\n            \"single-quoted img src with spaces should be captured. Found: {:?}\",\n            urls\n        );\n    }\n\n    #[test]\n    fn test_unquoted_href_no_spaces() {\n        let processor = HtmlProcessor::new(make_config());\n        let html = r#\"<html><body><a href=/about>About</a></body></html>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/\", None);\n        let found = processor.find_urls(html, &source).unwrap();\n        let urls: Vec<&str> = found.get_urls().iter().map(|(_, u)| u.url.as_str()).collect();\n        assert!(\n            urls.iter().any(|u| u.contains(\"/about\")),\n            \"unquoted a href should still be captured. Found: {:?}\",\n            urls\n        );\n    }\n\n    #[test]\n    fn test_unquoted_script_src() {\n        let processor = HtmlProcessor::new(make_config());\n        let html = r#\"<html><head><script src=app.js></script></head></html>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/\", None);\n        let found = processor.find_urls(html, &source).unwrap();\n        let urls: Vec<&str> = found.get_urls().iter().map(|(_, u)| u.url.as_str()).collect();\n        assert!(\n            urls.iter().any(|u| u.contains(\"app.js\")),\n            \"unquoted script src should still be captured. Found: {:?}\",\n            urls\n        );\n    }\n\n    #[test]\n    fn test_spaces_in_audio_video_src() {\n        let processor = HtmlProcessor::new(make_config());\n        let html = r#\"<html><body>\n            <audio src=\"/media/my song.mp3\"></audio>\n            <video src=\"/media/my video.mp4\"></video>\n        </body></html>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/\", None);\n        let found = processor.find_urls(html, &source).unwrap();\n        let urls: Vec<&str> = found.get_urls().iter().map(|(_, u)| u.url.as_str()).collect();\n        assert!(\n            urls.iter().any(|u| u.contains(\"my%20song.mp3\")),\n            \"audio src with spaces should be captured. Found: {:?}\",\n            urls\n        );\n        assert!(\n            urls.iter().any(|u| u.contains(\"my%20video.mp4\")),\n            \"video src with spaces should be captured. Found: {:?}\",\n            urls\n        );\n    }\n\n    #[test]\n    fn test_mixed_quoted_and_unquoted() {\n        let processor = HtmlProcessor::new(make_config());\n        let html = r#\"<html><body>\n            <img src=\"/images/path with spaces/photo.jpg\">\n            <img src=simple.png>\n            <img src='/another path/img.webp'>\n        </body></html>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/\", None);\n        let found = processor.find_urls(html, &source).unwrap();\n        let urls: Vec<&str> = found.get_urls().iter().map(|(_, u)| u.url.as_str()).collect();\n        assert!(\n            urls.iter().any(|u| u.contains(\"path%20with%20spaces/photo.jpg\")),\n            \"double-quoted with spaces should work. Found: {:?}\",\n            urls\n        );\n        assert!(\n            urls.iter().any(|u| u.contains(\"simple.png\")),\n            \"unquoted without spaces should work. Found: {:?}\",\n            urls\n        );\n        assert!(\n            urls.iter().any(|u| u.contains(\"another%20path/img.webp\")),\n            \"single-quoted with spaces should work. Found: {:?}\",\n            urls\n        );\n    }\n\n    #[test]\n    fn test_fragment_links_still_skipped() {\n        let processor = HtmlProcessor::new(make_config());\n        let html = r##\"<html><body><a href=\"#section\">Section</a><a href=\"/real-page\">Real</a></body></html>\"##;\n        let source = ParsedUrl::parse(\"https://example.com/\", None);\n        let found = processor.find_urls(html, &source).unwrap();\n        let urls: Vec<&str> = found.get_urls().iter().map(|(_, u)| u.url.as_str()).collect();\n        assert!(\n            !urls.iter().any(|u| u == &\"#section\"),\n            \"fragment-only links should still be skipped. Found: {:?}\",\n            urls\n        );\n        assert!(\n            urls.iter().any(|u| u.contains(\"/real-page\")),\n            \"real page links should be captured. Found: {:?}\",\n            urls\n        );\n    }\n}\n"
  },
  {
    "path": "src/content_processor/javascript_processor.rs",
    "content": "// SiteOne Crawler - JavaScriptProcessor\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Extracts URLs from JS import/from statements and applies offline conversion.\n\nuse once_cell::sync::Lazy;\nuse regex::Regex;\n\nuse crate::content_processor::base_processor::{ProcessorConfig, is_relevant};\nuse crate::content_processor::content_processor::ContentProcessor;\nuse crate::content_processor::html_processor::JS_VARIABLE_NAME_URL_DEPTH;\nuse crate::engine::found_url::UrlSource;\nuse crate::engine::found_urls::FoundUrls;\nuse crate::engine::parsed_url::ParsedUrl;\nuse crate::types::ContentTypeId;\n\nstatic RE_IMPORT_FROM: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?i)from\\s*[\"']([^\"']+\\.js[^\"']*)[\"']\"#).unwrap());\n\nstatic RE_QUOTED_JS_PATH: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?i)[\"'](/[^\"']+\\.js)[\"']\"#).unwrap());\n\nstatic RE_QUOTED_HTTPS_JS: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?i)[\"'](https://[^\"']+\\.js)[\"']\"#).unwrap());\n\nstatic RE_WEBPACK_CHUNKS: Lazy<Regex> = Lazy::new(|| {\n    Regex::new(r#\"(?i)\"assets/js/\".*\\+.*\\(\\{([^}]*)\\}.*\\[e\\].*\\|\\|.*e\\)\\s*\\+\\s*\"\\.\".*\\+\\s*\\{([^}]+)\\}\"#).unwrap()\n});\n\nstatic RE_WEBPACK_NAME_ITEM: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"([0-9]+):\\s*\"([^\"']+)\"\"#).unwrap());\n\nstatic RE_WEBPACK_HASH_ITEM: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"([0-9]+):\\s*\"([a-f0-9]+)\"\"#).unwrap());\n\n// Offline conversion regexes\nstatic RE_WEBPACK_AP: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"a\\.p=\"/\"\"#).unwrap());\n\nstatic RE_HREF_SLASH: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"href:\"/\"#).unwrap());\n\nstatic RE_PATH_SLASH: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"path:\"/\"#).unwrap());\n\nstatic RE_PATH_UPPER_SLASH: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"Path:\"/\"#).unwrap());\n\nstatic RE_CROSSORIGIN: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)crossorigin\").unwrap());\n\npub struct JavaScriptProcessor {\n    #[allow(dead_code)]\n    config: ProcessorConfig,\n    debug_mode: bool,\n    relevant_content_types: Vec<ContentTypeId>,\n}\n\nimpl JavaScriptProcessor {\n    pub fn new(config: ProcessorConfig) -> Self {\n        Self {\n            config,\n            debug_mode: false,\n            relevant_content_types: vec![ContentTypeId::Html, ContentTypeId::Script],\n        }\n    }\n\n    /// Find URLs in JavaScript import from statements and quoted JS paths\n    fn find_urls_import_from(&self, content: &str, source_url: &ParsedUrl) -> Option<FoundUrls> {\n        // Don't process HTML files\n        if content.to_lowercase().contains(\"<html\") {\n            return None;\n        }\n        if !content.contains(\"from\") {\n            return None;\n        }\n\n        let mut found_urls_txt: Vec<String> = Vec::new();\n\n        // import ... from \"path.js\"\n        for caps in RE_IMPORT_FROM.captures_iter(content) {\n            if let Some(m) = caps.get(1) {\n                found_urls_txt.push(m.as_str().trim().to_string());\n            }\n        }\n\n        // \"/assets/js/12.c6446aa6.js\" style paths\n        for caps in RE_QUOTED_JS_PATH.captures_iter(content) {\n            if let Some(m) = caps.get(1) {\n                found_urls_txt.push(m.as_str().trim().to_string());\n            }\n        }\n\n        // \"https://...\" style JS URLs\n        for caps in RE_QUOTED_HTTPS_JS.captures_iter(content) {\n            if let Some(m) = caps.get(1) {\n                found_urls_txt.push(m.as_str().trim().to_string());\n            }\n        }\n\n        // Webpack chunks pattern\n        if let Some(caps) = RE_WEBPACK_CHUNKS.captures(content) {\n            let mut tmp_webpack: std::collections::HashMap<String, String> = std::collections::HashMap::new();\n\n            // Parse name mappings: {5:\"vendors~docsearch\"}\n            if let Some(names_str) = caps.get(1) {\n                for item in names_str.as_str().split(',') {\n                    if let Some(item_caps) = RE_WEBPACK_NAME_ITEM.captures(item) {\n                        let id = item_caps.get(1).map_or(\"\", |m| m.as_str()).to_string();\n                        let name = item_caps.get(2).map_or(\"\", |m| m.as_str()).to_string();\n                        tmp_webpack.insert(id, name);\n                    }\n                }\n            }\n\n            // Parse hash mappings and build URLs\n            if let Some(hashes_str) = caps.get(2) {\n                for item in hashes_str.as_str().split(',') {\n                    if let Some(item_caps) = RE_WEBPACK_HASH_ITEM.captures(item) {\n                        let id = item_caps.get(1).map_or(\"\", |m| m.as_str());\n                        let hash = item_caps.get(2).map_or(\"\", |m| m.as_str());\n\n                        found_urls_txt.push(format!(\"/assets/js/{}.{}.js\", id, hash));\n\n                        // Special case: named webpack chunks\n                        if let Some(name) = tmp_webpack.get(id) {\n                            found_urls_txt.push(format!(\"/assets/js/{}.{}.js\", name, hash));\n                        }\n                    }\n                }\n            }\n        }\n\n        if found_urls_txt.is_empty() {\n            return None;\n        }\n\n        let mut found_urls = FoundUrls::new();\n        let url_refs: Vec<&str> = found_urls_txt.iter().map(|s| s.as_str()).collect();\n        found_urls.add_urls_from_text_array(&url_refs, &source_url.path, UrlSource::JsUrl);\n\n        if found_urls.get_count() > 0 {\n            Some(found_urls)\n        } else {\n            None\n        }\n    }\n}\n\nimpl ContentProcessor for JavaScriptProcessor {\n    fn find_urls(&self, content: &str, source_url: &ParsedUrl) -> Option<FoundUrls> {\n        self.find_urls_import_from(content, source_url)\n    }\n\n    fn apply_content_changes_before_url_parsing(\n        &self,\n        _content: &mut String,\n        _content_type: ContentTypeId,\n        _url: &ParsedUrl,\n    ) {\n        // No changes needed before URL parsing in JavaScriptProcessor\n    }\n\n    fn apply_content_changes_for_offline_version(\n        &self,\n        content: &mut String,\n        _content_type: ContentTypeId,\n        _url: &ParsedUrl,\n        _remove_unwanted_code: bool,\n    ) {\n        // Replace crossorigin keyword (case-insensitive)\n        if RE_CROSSORIGIN.is_match(content) {\n            *content = RE_CROSSORIGIN.replace_all(content, \"_SiteOne_CO_\").to_string();\n        }\n\n        // When preserving URLs, skip webpack path transformations since paths remain root-relative\n        if self.config.offline_export_preserve_urls {\n            return;\n        }\n\n        let webpack_path_prefix = format!(\n            \"({} > 0 ? \\\"../\\\".repeat({}) : \\\"./\\\")\",\n            JS_VARIABLE_NAME_URL_DEPTH, JS_VARIABLE_NAME_URL_DEPTH\n        );\n\n        // webpack case: a.p=\"/\"\n        if content.to_lowercase().contains(\"a.p=\") {\n            *content = RE_WEBPACK_AP\n                .replace_all(content, &format!(\"a.p={}\", webpack_path_prefix))\n                .to_string();\n        }\n\n        // webpack href/path/Path cases\n        if content.to_lowercase().contains(\"href:\\\"/\") {\n            *content = RE_HREF_SLASH\n                .replace_all(content, &format!(\"href:{}+\\\"\", webpack_path_prefix))\n                .to_string();\n        }\n        if content.to_lowercase().contains(\"path:\\\"/\") {\n            *content = RE_PATH_SLASH\n                .replace_all(content, &format!(\"href:{}+\\\"\", webpack_path_prefix))\n                .to_string();\n        }\n        if content.contains(\"Path:\\\"/\") {\n            *content = RE_PATH_UPPER_SLASH\n                .replace_all(content, &format!(\"href:{}+\\\"\", webpack_path_prefix))\n                .to_string();\n        }\n    }\n\n    fn is_content_type_relevant(&self, content_type: ContentTypeId) -> bool {\n        is_relevant(content_type, &self.relevant_content_types)\n    }\n\n    fn get_name(&self) -> &str {\n        \"JavaScriptProcessor\"\n    }\n\n    fn set_debug_mode(&mut self, debug_mode: bool) {\n        self.debug_mode = debug_mode;\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    fn make_config() -> ProcessorConfig {\n        ProcessorConfig::new(ParsedUrl::parse(\"https://example.com/\", None))\n    }\n\n    #[test]\n    fn test_find_import_from() {\n        let processor = JavaScriptProcessor::new(make_config());\n        let js = r#\"import{R as W}from\"./Repl.209fef3e.js\";import{s}from\"./stores.js\";\"#;\n        let source = ParsedUrl::parse(\"https://example.com/app.js\", None);\n        let result = processor.find_urls(js, &source);\n        assert!(result.is_some());\n        assert!(result.unwrap().get_count() >= 2);\n    }\n\n    #[test]\n    fn test_skip_html_content() {\n        let processor = JavaScriptProcessor::new(make_config());\n        let html = r#\"<html><head></head><body>from something</body></html>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/page.html\", None);\n        let result = processor.find_urls(html, &source);\n        assert!(result.is_none());\n    }\n\n    #[test]\n    fn test_find_quoted_js_paths() {\n        let processor = JavaScriptProcessor::new(make_config());\n        let js = r#\"from x; var chunks = [\"/assets/js/12.c6446aa6.js\",\"/assets/js/120.03870a87.js\"]\"#;\n        let source = ParsedUrl::parse(\"https://example.com/bundle.js\", None);\n        let result = processor.find_urls(js, &source);\n        assert!(result.is_some());\n    }\n}\n"
  },
  {
    "path": "src/content_processor/manager.rs",
    "content": "// SiteOne Crawler - ContentProcessorManager\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Holds all registered processors and delegates operations to them.\n\nuse std::time::Instant;\n\nuse crate::engine::found_urls::FoundUrls;\nuse crate::engine::parsed_url::ParsedUrl;\nuse crate::result::manager_stats::ManagerStats;\nuse crate::types::ContentTypeId;\n\nuse super::content_processor::ContentProcessor;\n\npub const SUPER_TABLE_CONTENT_PROCESSORS_STATS: &str = \"content-processors-stats\";\n\npub struct ContentProcessorManager {\n    processors: Vec<Box<dyn ContentProcessor>>,\n    stats: ManagerStats,\n}\n\nimpl ContentProcessorManager {\n    pub fn new() -> Self {\n        Self {\n            processors: Vec::new(),\n            stats: ManagerStats::new(),\n        }\n    }\n\n    /// Register a content processor. Returns error if a processor with the\n    /// same name is already registered.\n    pub fn register_processor(&mut self, processor: Box<dyn ContentProcessor>) -> Result<(), String> {\n        let name = processor.get_name().to_string();\n        if self.processors.iter().any(|p| p.get_name() == name) {\n            return Err(format!(\"Content processor '{}' is already registered\", name));\n        }\n        self.processors.push(processor);\n        Ok(())\n    }\n\n    /// Get references to all registered processors\n    pub fn get_processors(&self) -> &[Box<dyn ContentProcessor>] {\n        &self.processors\n    }\n\n    /// Find URLs in content using all relevant processors.\n    /// Returns a Vec of FoundUrls from each processor that found something.\n    pub fn find_urls(&mut self, content: &str, content_type: ContentTypeId, url: &ParsedUrl) -> Vec<FoundUrls> {\n        let mut result = Vec::new();\n\n        for processor in &self.processors {\n            if processor.is_content_type_relevant(content_type) {\n                let start = Instant::now();\n                let found_urls = processor.find_urls(content, url);\n                self.stats.measure_exec_time(processor.get_name(), \"findUrls\", start);\n\n                if let Some(urls) = found_urls\n                    && urls.get_count() > 0\n                {\n                    result.push(urls);\n                }\n            }\n        }\n\n        result\n    }\n\n    /// Apply content changes for offline version using all relevant processors.\n    pub fn apply_content_changes_for_offline_version(\n        &mut self,\n        content: &mut String,\n        content_type: ContentTypeId,\n        url: &ParsedUrl,\n        remove_unwanted_code: bool,\n    ) {\n        for processor in &self.processors {\n            if processor.is_content_type_relevant(content_type) {\n                let start = Instant::now();\n                processor.apply_content_changes_for_offline_version(content, content_type, url, remove_unwanted_code);\n                self.stats\n                    .measure_exec_time(processor.get_name(), \"applyContentChangesForOfflineVersion\", start);\n            }\n        }\n    }\n\n    /// Apply content changes for offline version with a content loader callback.\n    /// Used when storage access is available (e.g., from the offline exporter).\n    pub fn apply_content_changes_for_offline_version_with_loader(\n        &mut self,\n        content: &mut String,\n        content_type: ContentTypeId,\n        url: &ParsedUrl,\n        remove_unwanted_code: bool,\n        content_loader: &dyn Fn(&str) -> Option<String>,\n    ) {\n        for processor in &self.processors {\n            if processor.is_content_type_relevant(content_type) {\n                let start = Instant::now();\n                processor.apply_content_changes_for_offline_version_with_loader(\n                    content,\n                    content_type,\n                    url,\n                    remove_unwanted_code,\n                    content_loader,\n                );\n                self.stats\n                    .measure_exec_time(processor.get_name(), \"applyContentChangesForOfflineVersion\", start);\n            }\n        }\n    }\n\n    /// Apply content changes before URL parsing using all relevant processors.\n    pub fn apply_content_changes_before_url_parsing(\n        &mut self,\n        content: &mut String,\n        content_type: ContentTypeId,\n        url: &ParsedUrl,\n    ) {\n        for processor in &self.processors {\n            if processor.is_content_type_relevant(content_type) {\n                let start = Instant::now();\n                processor.apply_content_changes_before_url_parsing(content, content_type, url);\n                self.stats\n                    .measure_exec_time(processor.get_name(), \"applyContentChangesBeforeUrlParsing\", start);\n            }\n        }\n    }\n\n    /// Get reference to the stats tracker\n    pub fn get_stats(&self) -> &ManagerStats {\n        &self.stats\n    }\n}\n\nimpl Default for ContentProcessorManager {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n"
  },
  {
    "path": "src/content_processor/mod.rs",
    "content": "pub mod astro_processor;\r\npub mod base_processor;\r\n#[allow(clippy::module_inception)]\r\npub mod content_processor;\r\npub mod css_processor;\r\npub mod html_processor;\r\npub mod javascript_processor;\r\npub mod manager;\r\npub mod nextjs_processor;\r\npub mod svelte_processor;\r\npub mod xml_processor;\r\n"
  },
  {
    "path": "src/content_processor/nextjs_processor.rs",
    "content": "// SiteOne Crawler - NextJsProcessor\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Handles Next.js specific URL extraction and offline conversion.\n\nuse once_cell::sync::Lazy;\nuse regex::Regex;\n\nuse crate::content_processor::base_processor::{ProcessorConfig, is_relevant};\nuse crate::content_processor::content_processor::ContentProcessor;\nuse crate::content_processor::html_processor::JS_VARIABLE_NAME_URL_DEPTH;\nuse crate::engine::found_url::UrlSource;\nuse crate::engine::found_urls::FoundUrls;\nuse crate::engine::parsed_url::ParsedUrl;\nuse crate::types::ContentTypeId;\n\nstatic RE_MANIFEST_JS: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?is)[\"']([a-z0-9/._\\-\\[\\]]\\.js)[\"']\"#).unwrap());\n\n// Offline conversion regexes\nstatic RE_DISABLE_PREFETCH: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)(prefetch:\\([a-z]+,[a-z]+\\)=>\\{)if\").unwrap());\n\nstatic RE_ESCAPED_NEXT: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?i)\\\\([\"'])/_next/\"#).unwrap());\n\nstatic RE_ASSIGN_NEXT: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?i)([a-z0-9]+\\.[a-z0-9]+=|:)([\"'])/_next/\"#).unwrap());\n\nstatic RE_CONCAT_NEXT: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?i)(concat\\([a-z]+,)([\"']/_next/)([\"'])\"#).unwrap());\n\nstatic RE_NEXT_DATA: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r\"(?is)<script[^>]+__NEXT_DATA__[^>]*>.*?</script>\").unwrap());\n\nstatic RE_PREFETCH_FUNC: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)(prefetch\\()([a-z]+)(\\)\\s*\\{)\\s*let\").unwrap());\n\nstatic RE_HREF_CONCAT: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?i)(\\{href:)([\"'])(/)(['\"]\\.)\"#).unwrap());\n\nstatic RE_PUSH_SLASH: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?i)(push\\(\\[)([\"']/)\"#).unwrap());\n\nstatic RE_RETURN_QUERY: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r#\"(?i)(return\\s*[\"'])\\s*\\?[^\"']+=[^\"']*([\"'])\"#).unwrap());\n\nstatic RE_NEXT_QUERY_PARAMS: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r\"(?i)((_next|chunks)/[a-z0-9/()\\[\\]._@%^{}-]+\\.[a-z0-9]{1,5})\\?[a-z0-9_&=.-]+\").unwrap());\n\nstatic RE_DPL_QUERY: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?i)\\?dpl=[^\"' ]+\"#).unwrap());\n\npub struct NextJsProcessor {\n    #[allow(dead_code)]\n    config: ProcessorConfig,\n    debug_mode: bool,\n    relevant_content_types: Vec<ContentTypeId>,\n}\n\nimpl NextJsProcessor {\n    pub fn new(config: ProcessorConfig) -> Self {\n        Self {\n            config,\n            debug_mode: false,\n            relevant_content_types: vec![ContentTypeId::Html, ContentTypeId::Script, ContentTypeId::Stylesheet],\n        }\n    }\n}\n\nimpl ContentProcessor for NextJsProcessor {\n    fn find_urls(&self, content: &str, source_url: &ParsedUrl) -> Option<FoundUrls> {\n        // Only process Next.js manifest files\n        let is_nextjs_manifest =\n            source_url.path.contains(\"_next/\") && source_url.path.to_lowercase().contains(\"manifest\");\n        if !is_nextjs_manifest {\n            return None;\n        }\n\n        let nextjs_base_dir = if let Some(pos) = source_url.path.find(\"/_next/\") {\n            source_url.path[..pos + 7].to_string() // include \"/_next/\"\n        } else {\n            return None;\n        };\n\n        let mut found_urls_txt: Vec<String> = Vec::new();\n        for caps in RE_MANIFEST_JS.captures_iter(content) {\n            if let Some(m) = caps.get(1) {\n                found_urls_txt.push(format!(\"{}{}\", nextjs_base_dir, m.as_str()));\n            }\n        }\n\n        if found_urls_txt.is_empty() {\n            return None;\n        }\n\n        let mut found_urls = FoundUrls::new();\n        let url_refs: Vec<&str> = found_urls_txt.iter().map(|s| s.as_str()).collect();\n        found_urls.add_urls_from_text_array(&url_refs, &source_url.path, UrlSource::JsUrl);\n\n        if found_urls.get_count() > 0 {\n            Some(found_urls)\n        } else {\n            None\n        }\n    }\n\n    fn apply_content_changes_before_url_parsing(\n        &self,\n        content: &mut String,\n        _content_type: ContentTypeId,\n        _url: &ParsedUrl,\n    ) {\n        // Only process content containing _next\n        if !content.to_lowercase().contains(\"_next\") {\n            return;\n        }\n\n        // Remove query params from static assets in NextJS\n        *content = RE_NEXT_QUERY_PARAMS.replace_all(content, \"$1\").to_string();\n        *content = RE_DPL_QUERY.replace_all(content, \"\").to_string();\n    }\n\n    fn apply_content_changes_for_offline_version(\n        &self,\n        content: &mut String,\n        _content_type: ContentTypeId,\n        url: &ParsedUrl,\n        _remove_unwanted_code: bool,\n    ) {\n        // Only process content containing _next\n        if !content.to_lowercase().contains(\"_next\") {\n            return;\n        }\n\n        // Disable prefetching in NextJS\n        *content = RE_DISABLE_PREFETCH.replace_all(content, \"$1 return; if\").to_string();\n\n        // Calculate depth for relative prefix\n        let base_path = &url.path;\n        let trimmed = base_path.trim_start_matches('/');\n        let mut depth = trimmed.matches('/').count();\n        let needs_index = base_path != \"/\" && !base_path.is_empty() && base_path.ends_with('/');\n        if needs_index {\n            depth += 1;\n        }\n\n        let nextjs_prefix1 = if depth > 0 {\n            \"../\".repeat(depth)\n        } else {\n            \"./\".to_string()\n        };\n\n        // Replace escaped /_next/ paths\n        *content = RE_ESCAPED_NEXT\n            .replace_all(content, |caps: &regex::Captures| {\n                let quote = caps.get(1).map_or(\"\", |m| m.as_str());\n                format!(\"\\\\{}{}_next/\", quote, nextjs_prefix1)\n            })\n            .to_string();\n\n        let nextjs_prefix2 = format!(\n            \"({} > 0 ? \\\"../\\\".repeat({}) : \\\"./\\\")\",\n            JS_VARIABLE_NAME_URL_DEPTH, JS_VARIABLE_NAME_URL_DEPTH\n        );\n\n        // Replace assignment /_next/ patterns\n        *content = RE_ASSIGN_NEXT\n            .replace_all(content, |caps: &regex::Captures| {\n                let prefix = caps.get(1).map_or(\"\", |m| m.as_str());\n                let quote = caps.get(2).map_or(\"\", |m| m.as_str());\n                format!(\"{}{} + {}_next/\", prefix, nextjs_prefix2, quote)\n            })\n            .to_string();\n\n        // concat(e,\"/_next/\" -> concat(e,PREFIX+\"/_next/\")\n        *content = RE_CONCAT_NEXT\n            .replace_all(content, |caps: &regex::Captures| {\n                let concat_start = caps.get(1).map_or(\"\", |m| m.as_str());\n                let next_path = caps.get(2).map_or(\"\", |m| m.as_str());\n                let end_quote = caps.get(3).map_or(\"\", |m| m.as_str());\n                format!(\"{}{}+{}{}\", concat_start, nextjs_prefix2, next_path, end_quote)\n            })\n            .to_string();\n\n        // Remove __NEXT_DATA__ script and replace with empty\n        let empty_next_data =\n            r#\"<script id=\"__NEXT_DATA__\" type=\"application/json\">{\"props\":{\"pageProps\":{}}}</script>\"#;\n        *content = RE_NEXT_DATA.replace_all(content, empty_next_data).to_string();\n\n        // Add prefix to prefetch function\n        *content = RE_PREFETCH_FUNC\n            .replace_all(content, |caps: &regex::Captures| {\n                let start = caps.get(1).map_or(\"\", |m| m.as_str());\n                let arg = caps.get(2).map_or(\"\", |m| m.as_str());\n                let mid = caps.get(3).map_or(\"\", |m| m.as_str());\n                format!(\"{}{}{} {}={}+{}; let\", start, arg, mid, arg, nextjs_prefix2, arg)\n            })\n            .to_string();\n\n        // {href:\"/\".concat\n        *content = RE_HREF_CONCAT\n            .replace_all(content, |caps: &regex::Captures| {\n                let start = caps.get(1).map_or(\"\", |m| m.as_str());\n                let q1 = caps.get(2).map_or(\"\", |m| m.as_str());\n                let slash = caps.get(3).map_or(\"\", |m| m.as_str());\n                let end = caps.get(4).map_or(\"\", |m| m.as_str());\n                format!(\"{}{}+{}{}{}\", start, nextjs_prefix2, q1, slash, end)\n            })\n            .to_string();\n\n        // push([\"/\n        *content = RE_PUSH_SLASH\n            .replace_all(content, |caps: &regex::Captures| {\n                let start = caps.get(1).map_or(\"\", |m| m.as_str());\n                let path = caps.get(2).map_or(\"\", |m| m.as_str());\n                format!(\"{}{}+{}\", start, nextjs_prefix2, path)\n            })\n            .to_string();\n\n        // return\"?dpl=...\" -> return\"\"\n        *content = RE_RETURN_QUERY.replace_all(content, \"$1$2\").to_string();\n\n        // Remove query params from _next/static/ paths\n        *content = RE_NEXT_QUERY_PARAMS.replace_all(content, \"$1\").to_string();\n        *content = RE_DPL_QUERY.replace_all(content, \"\").to_string();\n    }\n\n    fn is_content_type_relevant(&self, content_type: ContentTypeId) -> bool {\n        is_relevant(content_type, &self.relevant_content_types)\n    }\n\n    fn get_name(&self) -> &str {\n        \"NextJsProcessor\"\n    }\n\n    fn set_debug_mode(&mut self, debug_mode: bool) {\n        self.debug_mode = debug_mode;\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    fn make_config() -> ProcessorConfig {\n        ProcessorConfig::new(ParsedUrl::parse(\"https://example.com/\", None))\n    }\n\n    #[test]\n    fn test_non_manifest_returns_none() {\n        let processor = NextJsProcessor::new(make_config());\n        let content = r#\"some javascript content\"#;\n        let source = ParsedUrl::parse(\"https://example.com/app.js\", None);\n        let result = processor.find_urls(content, &source);\n        assert!(result.is_none());\n    }\n\n    #[test]\n    fn test_before_url_parsing_removes_dpl() {\n        let processor = NextJsProcessor::new(make_config());\n        let mut content = r#\"/_next/static/css/file.css?dpl=dpl_abc123\"#.to_string();\n        let source = ParsedUrl::parse(\"https://example.com/page\", None);\n        processor.apply_content_changes_before_url_parsing(&mut content, ContentTypeId::Html, &source);\n        assert!(!content.contains(\"?dpl=\"));\n    }\n}\n"
  },
  {
    "path": "src/content_processor/svelte_processor.rs",
    "content": "// SiteOne Crawler - SvelteProcessor\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Handles SvelteKit specific patterns.\n\nuse once_cell::sync::Lazy;\nuse regex::Regex;\n\nuse crate::content_processor::base_processor::ProcessorConfig;\nuse crate::content_processor::content_processor::ContentProcessor;\nuse crate::engine::found_urls::FoundUrls;\nuse crate::engine::parsed_url::ParsedUrl;\nuse crate::types::ContentTypeId;\n\nstatic RE_SVELTE_TAG: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)<svelte:[^>]+>\\s*\").unwrap());\n\npub struct SvelteProcessor {\n    #[allow(dead_code)]\n    config: ProcessorConfig,\n    debug_mode: bool,\n}\n\nimpl SvelteProcessor {\n    pub fn new(config: ProcessorConfig) -> Self {\n        Self {\n            config,\n            debug_mode: false,\n        }\n    }\n}\n\nimpl ContentProcessor for SvelteProcessor {\n    fn find_urls(&self, _content: &str, _source_url: &ParsedUrl) -> Option<FoundUrls> {\n        // SvelteProcessor doesn't extract URLs\n        None\n    }\n\n    fn apply_content_changes_before_url_parsing(\n        &self,\n        _content: &mut String,\n        _content_type: ContentTypeId,\n        _url: &ParsedUrl,\n    ) {\n        // No changes needed before URL parsing in SvelteProcessor\n    }\n\n    fn apply_content_changes_for_offline_version(\n        &self,\n        content: &mut String,\n        _content_type: ContentTypeId,\n        _url: &ParsedUrl,\n        _remove_unwanted_code: bool,\n    ) {\n        // Remove <svelte:*> tags for offline version\n        if content.contains(\"<svelte:\") {\n            *content = RE_SVELTE_TAG.replace_all(content, \"\").to_string();\n        }\n    }\n\n    fn is_content_type_relevant(&self, content_type: ContentTypeId) -> bool {\n        // SvelteProcessor is only relevant for HTML (overrides the base relevantContentTypes)\n        content_type == ContentTypeId::Html\n    }\n\n    fn get_name(&self) -> &str {\n        \"SvelteProcessor\"\n    }\n\n    fn set_debug_mode(&mut self, debug_mode: bool) {\n        self.debug_mode = debug_mode;\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    fn make_config() -> ProcessorConfig {\n        ProcessorConfig::new(ParsedUrl::parse(\"https://example.com/\", None))\n    }\n\n    #[test]\n    fn test_remove_svelte_tags() {\n        let processor = SvelteProcessor::new(make_config());\n        let mut content = r#\"<html><head><svelte:head></svelte:head></head><body>test</body></html>\"#.to_string();\n        let url = ParsedUrl::parse(\"https://example.com/\", None);\n        processor.apply_content_changes_for_offline_version(&mut content, ContentTypeId::Html, &url, false);\n        assert!(!content.contains(\"<svelte:\"));\n    }\n\n    #[test]\n    fn test_is_relevant_only_for_html() {\n        let processor = SvelteProcessor::new(make_config());\n        assert!(processor.is_content_type_relevant(ContentTypeId::Html));\n        assert!(!processor.is_content_type_relevant(ContentTypeId::Script));\n        assert!(!processor.is_content_type_relevant(ContentTypeId::Stylesheet));\n    }\n}\n"
  },
  {
    "path": "src/content_processor/xml_processor.rs",
    "content": "// SiteOne Crawler - XmlProcessor\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Extracts URLs from sitemap.xml and sitemap index files.\n\nuse quick_xml::Reader;\nuse quick_xml::events::Event;\n\nuse crate::content_processor::base_processor::{ProcessorConfig, is_relevant};\nuse crate::content_processor::content_processor::ContentProcessor;\nuse crate::engine::found_url::{FoundUrl, UrlSource};\nuse crate::engine::found_urls::FoundUrls;\nuse crate::engine::parsed_url::ParsedUrl;\nuse crate::types::ContentTypeId;\n\npub struct XmlProcessor {\n    #[allow(dead_code)]\n    config: ProcessorConfig,\n    debug_mode: bool,\n    relevant_content_types: Vec<ContentTypeId>,\n}\n\nimpl XmlProcessor {\n    pub fn new(config: ProcessorConfig) -> Self {\n        Self {\n            config,\n            debug_mode: false,\n            relevant_content_types: vec![ContentTypeId::Xml],\n        }\n    }\n\n    fn is_sitemap_xml_index(content: &str) -> bool {\n        content.to_lowercase().contains(\"<sitemapindex\")\n    }\n\n    fn is_sitemap_xml(content: &str) -> bool {\n        content.to_lowercase().contains(\"<urlset\")\n    }\n\n    /// Parse URLs from a sitemap.xml <urlset> document\n    fn get_urls_from_sitemap_xml(content: &str) -> Vec<String> {\n        let mut urls = Vec::new();\n        let mut reader = Reader::from_str(content);\n        reader.config_mut().trim_text(true);\n\n        let mut in_loc = false;\n        let mut buf = Vec::new();\n\n        loop {\n            match reader.read_event_into(&mut buf) {\n                Ok(Event::Start(ref e)) => {\n                    let local_name = e.local_name();\n                    if local_name.as_ref() == b\"loc\" {\n                        in_loc = true;\n                    }\n                }\n                Ok(Event::Text(ref e)) => {\n                    if in_loc && let Ok(text) = e.decode() {\n                        let url = text.trim().to_string();\n                        if !url.is_empty() {\n                            urls.push(url);\n                        }\n                    }\n                }\n                Ok(Event::End(ref e)) => {\n                    let local_name = e.local_name();\n                    if local_name.as_ref() == b\"loc\" {\n                        in_loc = false;\n                    }\n                }\n                Ok(Event::Eof) => break,\n                Err(_) => break,\n                _ => {}\n            }\n            buf.clear();\n        }\n\n        urls\n    }\n\n    /// Parse URLs from a sitemap index document\n    fn get_urls_from_sitemap_xml_index(content: &str) -> Vec<String> {\n        let mut urls = Vec::new();\n        let mut reader = Reader::from_str(content);\n        reader.config_mut().trim_text(true);\n\n        let mut in_sitemap = false;\n        let mut in_loc = false;\n        let mut buf = Vec::new();\n\n        loop {\n            match reader.read_event_into(&mut buf) {\n                Ok(Event::Start(ref e)) => {\n                    let local_name = e.local_name();\n                    if local_name.as_ref() == b\"sitemap\" {\n                        in_sitemap = true;\n                    } else if local_name.as_ref() == b\"loc\" && in_sitemap {\n                        in_loc = true;\n                    }\n                }\n                Ok(Event::Text(ref e)) => {\n                    if in_loc && let Ok(text) = e.decode() {\n                        let url = text.trim().to_string();\n                        let url_lower = url.to_lowercase();\n                        // Include .xml and .xml.gz sitemap URLs\n                        if url_lower.ends_with(\".xml\") || url_lower.ends_with(\".xml.gz\") {\n                            urls.push(url);\n                        }\n                    }\n                }\n                Ok(Event::End(ref e)) => {\n                    let local_name = e.local_name();\n                    if local_name.as_ref() == b\"loc\" {\n                        in_loc = false;\n                    } else if local_name.as_ref() == b\"sitemap\" {\n                        in_sitemap = false;\n                    }\n                }\n                Ok(Event::Eof) => break,\n                Err(_) => break,\n                _ => {}\n            }\n            buf.clear();\n        }\n\n        urls\n    }\n}\n\nimpl ContentProcessor for XmlProcessor {\n    fn find_urls(&self, content: &str, source_url: &ParsedUrl) -> Option<FoundUrls> {\n        let source_url_str = source_url.get_full_url(true, false);\n\n        if Self::is_sitemap_xml_index(content) {\n            let urls = Self::get_urls_from_sitemap_xml_index(content);\n            if urls.is_empty() {\n                return None;\n            }\n\n            let mut found_urls = FoundUrls::new();\n            for url in urls {\n                found_urls.add_url(FoundUrl::new(&url, &source_url_str, UrlSource::Sitemap));\n            }\n            return Some(found_urls);\n        }\n\n        if Self::is_sitemap_xml(content) {\n            let urls = Self::get_urls_from_sitemap_xml(content);\n            if urls.is_empty() {\n                return None;\n            }\n\n            let mut found_urls = FoundUrls::new();\n            for url in urls {\n                found_urls.add_url(FoundUrl::new(&url, &source_url_str, UrlSource::Sitemap));\n            }\n            return Some(found_urls);\n        }\n\n        None\n    }\n\n    fn apply_content_changes_before_url_parsing(\n        &self,\n        _content: &mut String,\n        _content_type: ContentTypeId,\n        _url: &ParsedUrl,\n    ) {\n        // No changes needed before URL parsing in XmlProcessor\n    }\n\n    fn apply_content_changes_for_offline_version(\n        &self,\n        _content: &mut String,\n        _content_type: ContentTypeId,\n        _url: &ParsedUrl,\n        _remove_unwanted_code: bool,\n    ) {\n        // XML files don't need offline conversion\n    }\n\n    fn is_content_type_relevant(&self, content_type: ContentTypeId) -> bool {\n        is_relevant(content_type, &self.relevant_content_types)\n    }\n\n    fn get_name(&self) -> &str {\n        \"XmlProcessor\"\n    }\n\n    fn set_debug_mode(&mut self, debug_mode: bool) {\n        self.debug_mode = debug_mode;\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    fn make_config() -> ProcessorConfig {\n        ProcessorConfig::new(ParsedUrl::parse(\"https://example.com/\", None))\n    }\n\n    #[test]\n    fn test_sitemap_xml() {\n        let processor = XmlProcessor::new(make_config());\n        let xml = r#\"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n            <urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n                <url><loc>https://example.com/page1</loc></url>\n                <url><loc>https://example.com/page2</loc></url>\n            </urlset>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/sitemap.xml\", None);\n        let result = processor.find_urls(xml, &source);\n        assert!(result.is_some());\n        assert_eq!(result.unwrap().get_count(), 2);\n    }\n\n    #[test]\n    fn test_sitemap_index() {\n        let processor = XmlProcessor::new(make_config());\n        let xml = r#\"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n            <sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n                <sitemap><loc>https://example.com/sitemap1.xml</loc></sitemap>\n                <sitemap><loc>https://example.com/sitemap2.xml</loc></sitemap>\n                <sitemap><loc>https://example.com/sitemap3.xml.gz</loc></sitemap>\n                <sitemap><loc>https://example.com/sitemap.tar.gz</loc></sitemap>\n            </sitemapindex>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/sitemap.xml\", None);\n        let result = processor.find_urls(xml, &source);\n        assert!(result.is_some());\n        // .xml and .xml.gz are included, but not .tar.gz\n        assert_eq!(result.unwrap().get_count(), 3);\n    }\n\n    #[test]\n    fn test_non_sitemap_xml() {\n        let processor = XmlProcessor::new(make_config());\n        let xml = r#\"<?xml version=\"1.0\"?><root><item>test</item></root>\"#;\n        let source = ParsedUrl::parse(\"https://example.com/data.xml\", None);\n        let result = processor.find_urls(xml, &source);\n        assert!(result.is_none());\n    }\n\n    /// Test the full gzip decompression + XML parsing pipeline.\n    /// Simulates what the crawler does when it fetches a .xml.gz sitemap:\n    /// gzip-compressed bytes → decompress → parse XML → extract URLs.\n    #[test]\n    fn test_gzip_compressed_sitemap() {\n        use flate2::Compression;\n        use flate2::read::GzDecoder;\n        use flate2::write::GzEncoder;\n        use std::io::Write;\n\n        let xml = r#\"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n            <urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n                <url><loc>https://example.com/page1</loc></url>\n                <url><loc>https://example.com/page2</loc></url>\n                <url><loc>https://example.com/page3</loc></url>\n            </urlset>\"#;\n\n        // Compress the XML (simulates what a .xml.gz file contains)\n        let mut encoder = GzEncoder::new(Vec::new(), Compression::default());\n        encoder.write_all(xml.as_bytes()).unwrap();\n        let compressed = encoder.finish().unwrap();\n\n        // Verify it's actually compressed (smaller or at least different)\n        assert_ne!(compressed, xml.as_bytes());\n\n        // Decompress (same logic as in Crawler::process_url for .xml.gz)\n        let mut decoder = GzDecoder::new(&compressed[..]);\n        let mut decompressed = Vec::new();\n        std::io::Read::read_to_end(&mut decoder, &mut decompressed).unwrap();\n        let decompressed_str = String::from_utf8(decompressed).unwrap();\n\n        // Parse the decompressed XML with XmlProcessor\n        let processor = XmlProcessor::new(make_config());\n        let source = ParsedUrl::parse(\"https://example.com/sitemap.xml.gz\", None);\n        let result = processor.find_urls(&decompressed_str, &source);\n        assert!(result.is_some());\n        assert_eq!(result.unwrap().get_count(), 3);\n    }\n\n    /// Same test for gzip-compressed sitemap index.\n    #[test]\n    fn test_gzip_compressed_sitemap_index() {\n        use flate2::Compression;\n        use flate2::read::GzDecoder;\n        use flate2::write::GzEncoder;\n        use std::io::Write;\n\n        let xml = r#\"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n            <sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n                <sitemap><loc>https://example.com/sitemap-posts.xml</loc></sitemap>\n                <sitemap><loc>https://example.com/sitemap-pages.xml.gz</loc></sitemap>\n            </sitemapindex>\"#;\n\n        let mut encoder = GzEncoder::new(Vec::new(), Compression::default());\n        encoder.write_all(xml.as_bytes()).unwrap();\n        let compressed = encoder.finish().unwrap();\n\n        let mut decoder = GzDecoder::new(&compressed[..]);\n        let mut decompressed = Vec::new();\n        std::io::Read::read_to_end(&mut decoder, &mut decompressed).unwrap();\n        let decompressed_str = String::from_utf8(decompressed).unwrap();\n\n        let processor = XmlProcessor::new(make_config());\n        let source = ParsedUrl::parse(\"https://example.com/sitemap-index.xml.gz\", None);\n        let result = processor.find_urls(&decompressed_str, &source);\n        assert!(result.is_some());\n        // Both .xml and .xml.gz URLs from the index\n        assert_eq!(result.unwrap().get_count(), 2);\n    }\n}\n"
  },
  {
    "path": "src/debugger.rs",
    "content": "// SiteOne Crawler - Debugger\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::fs::OpenOptions;\nuse std::io::Write;\nuse std::sync::RwLock;\n\nuse crate::utils;\n\npub const DEBUG: &str = \"debug\";\npub const INFO: &str = \"info\";\npub const NOTICE: &str = \"notice\";\npub const WARNING: &str = \"warning\";\npub const CRITICAL: &str = \"critical\";\n\nstatic DEBUG_ENABLED: RwLock<bool> = RwLock::new(false);\nstatic DEBUG_PRINT_TO_OUTPUT: RwLock<bool> = RwLock::new(false);\nstatic DEBUG_LOG_FILE: RwLock<Option<String>> = RwLock::new(None);\n\npub fn debug(category: &str, message: &str, severity: &str, time: Option<f64>, size: Option<i64>) {\n    let enabled = DEBUG_ENABLED.read().map(|v| *v).unwrap_or(false);\n    if !enabled {\n        return;\n    }\n\n    let now = chrono::Local::now().format(\"%Y-%m-%d %H:%M:%S\").to_string();\n    let mut final_message = format!(\"{} | {:8} | {:14} | \", now, severity, category,);\n\n    if let Some(t) = time {\n        final_message.push_str(&format!(\"{:7} | \", utils::get_formatted_duration(t)));\n    }\n    if let Some(s) = size {\n        final_message.push_str(&format!(\"{:7} | \", utils::get_formatted_size(s, 0)));\n    }\n\n    final_message.push_str(message);\n\n    print_debug(&final_message);\n    log_debug(&final_message);\n}\n\npub fn console_array_debug(row_data: &[String], col_widths: &[usize]) {\n    let enabled = DEBUG_ENABLED.read().map(|v| *v).unwrap_or(false);\n    if !enabled {\n        return;\n    }\n\n    let console_width = utils::get_console_width();\n    let widths: Vec<usize> = if col_widths.is_empty() {\n        let col_width = console_width / row_data.len();\n        vec![col_width.max(10); row_data.len()]\n    } else {\n        col_widths.iter().map(|w| (*w).max(10)).collect()\n    };\n\n    let mut row = Vec::new();\n    for (i, value) in row_data.iter().enumerate() {\n        let w = widths.get(i).copied().unwrap_or(10);\n        let val = if value.len() > w {\n            utils::truncate_in_two_thirds(value, w, \"..\", None)\n        } else {\n            format!(\"{:<width$}\", value, width = w)\n        };\n        row.push(val);\n    }\n\n    let message = row.join(\" | \");\n    print_debug(&message);\n    log_debug(&message);\n}\n\npub fn force_enabled_debug(log_file: Option<&str>) {\n    if let Ok(mut d) = DEBUG_ENABLED.write() {\n        *d = true;\n    }\n    if let Ok(mut p) = DEBUG_PRINT_TO_OUTPUT.write() {\n        *p = true;\n    }\n    if let Some(f) = log_file\n        && let Ok(mut lf) = DEBUG_LOG_FILE.write()\n    {\n        *lf = Some(f.to_string());\n    }\n}\n\npub fn set_config(debug_enabled: bool, debug_log_file: Option<&str>) {\n    if debug_enabled {\n        if let Ok(mut d) = DEBUG_ENABLED.write() {\n            *d = true;\n        }\n        if let Ok(mut p) = DEBUG_PRINT_TO_OUTPUT.write() {\n            *p = true;\n        }\n        if let Some(f) = debug_log_file\n            && let Ok(mut lf) = DEBUG_LOG_FILE.write()\n        {\n            *lf = Some(f.to_string());\n        }\n    } else if debug_log_file.is_some() {\n        // when debug is disabled but debugLogFile is set, logging to file is enabled but printing to output is not\n        if let Ok(mut d) = DEBUG_ENABLED.write() {\n            *d = true;\n        }\n        if let Ok(mut p) = DEBUG_PRINT_TO_OUTPUT.write() {\n            *p = false;\n        }\n        if let Some(f) = debug_log_file\n            && let Ok(mut lf) = DEBUG_LOG_FILE.write()\n        {\n            *lf = Some(f.to_string());\n        }\n    }\n}\n\nfn print_debug(message: &str) {\n    let should_print = DEBUG_PRINT_TO_OUTPUT.read().map(|v| *v).unwrap_or(false);\n    if should_print {\n        println!(\"{}\", message);\n    }\n}\n\nfn log_debug(message: &str) {\n    let log_file = DEBUG_LOG_FILE.read().ok().and_then(|v| v.clone());\n    if let Some(path) = log_file {\n        let abs_path = utils::get_absolute_path(&path);\n        if let Ok(mut file) = OpenOptions::new().create(true).append(true).open(&abs_path) {\n            let _ = writeln!(file, \"{}\", message);\n        }\n    }\n}\n"
  },
  {
    "path": "src/engine/crawler.rs",
    "content": "// SiteOne Crawler - Core Crawler Engine\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Main crawling engine with concurrent URL processing.\n\nuse std::collections::{HashMap, VecDeque};\nuse std::sync::atomic::{AtomicBool, AtomicI64, AtomicUsize, Ordering};\nuse std::sync::{Arc, Mutex};\n\nuse dashmap::DashMap;\nuse md5::{Digest, Md5};\nuse once_cell::sync::Lazy;\nuse regex::Regex;\nuse tokio::sync::Semaphore;\n\n/// Regex to extract <base href=\"...\"> from HTML\nstatic RE_BASE_HREF: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?is)<base[^>]+href\\s*=\\s*[\"']?([^\"'\\s>]+)\"#).unwrap());\n\n/// Static regexes for title/description/keywords extraction (Fix #12)\nstatic RE_TITLE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?is)<title[^>]*>([^<]*)</title>\").unwrap());\nstatic RE_DESCRIPTION: Lazy<Regex> = Lazy::new(|| {\n    Regex::new(r#\"(?is)<meta\\s+[^>]*name=[\"']description[\"']\\s+[^>]*content=[\"']([^\"']+)[\"'][^>]*>\"#).unwrap()\n});\nstatic RE_KEYWORDS: Lazy<Regex> = Lazy::new(|| {\n    Regex::new(r#\"(?is)<meta\\s+[^>]*name=[\"']keywords[\"']\\s+[^>]*content=[\"']([^\"']+)[\"'][^>]*>\"#).unwrap()\n});\nstatic RE_DOM_COUNT: Lazy<Regex> = Lazy::new(|| Regex::new(r\"<\\w+\").unwrap());\n\nuse crate::analysis::manager::AnalysisManager;\nuse crate::content_processor::html_processor::HTML_PAGES_EXTENSIONS;\nuse crate::content_processor::manager::ContentProcessorManager;\nuse crate::engine::found_url::UrlSource;\nuse crate::engine::found_urls::FoundUrls;\nuse crate::engine::http_client::HttpClient;\nuse crate::engine::http_response::HttpResponse;\nuse crate::engine::parsed_url::ParsedUrl;\nuse crate::engine::robots_txt::RobotsTxt;\nuse crate::error::CrawlerResult;\nuse crate::options::core_options::CoreOptions;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::result::visited_url::VisitedUrl;\nuse crate::types::{ContentTypeId, DeviceType, SkippedReason};\nuse crate::utils;\nuse crate::version;\n\n/// Entry in the URL queue\n#[derive(Debug, Clone)]\n#[allow(dead_code)]\npub struct QueueEntry {\n    pub url: String,\n    pub uq_id: String,\n    pub source_uq_id: String,\n    pub source_attr: i32,\n}\n\n/// Entry for a visited URL in the visited table\n#[derive(Debug, Clone)]\n#[allow(dead_code)]\npub struct VisitedEntry {\n    pub url: String,\n    pub uq_id: String,\n    pub source_uq_id: String,\n    pub source_attr: i32,\n}\n\n/// Entry for a skipped URL\n#[derive(Debug, Clone)]\n#[allow(dead_code)]\npub struct SkippedEntry {\n    pub url: String,\n    pub reason: SkippedReason,\n    pub source_uq_id: String,\n    pub source_attr: i32,\n}\n\n/// Accept header for HTTP requests\nconst ACCEPT_HEADER: &str = \"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7\";\n\n/// Main crawler engine\npub struct Crawler {\n    options: Arc<CoreOptions>,\n    http_client: Arc<HttpClient>,\n    content_processor_manager: Arc<Mutex<ContentProcessorManager>>,\n    analysis_manager: Arc<Mutex<AnalysisManager>>,\n    output: Arc<Mutex<Box<dyn Output>>>,\n    status: Arc<Mutex<Status>>,\n\n    /// URL queue (key = md5 of full URL, value = QueueEntry)\n    queue: Arc<DashMap<String, QueueEntry>>,\n    /// Insertion-ordered queue keys for breadth-first processing\n    queue_order: Arc<Mutex<VecDeque<String>>>,\n    /// Visited URLs (key = md5 of full URL, value = VisitedEntry)\n    visited: Arc<DashMap<String, VisitedEntry>>,\n    /// Skipped URLs (key = md5 of full URL, value = SkippedEntry)\n    skipped: Arc<DashMap<String, SkippedEntry>>,\n\n    /// Initial parsed URL\n    initial_parsed_url: ParsedUrl,\n    /// Final user agent string\n    final_user_agent: String,\n    /// Accept header (may be modified for offline export)\n    accept_header: String,\n    /// Whether the initial URL has been found as existing HTML\n    initial_existing_url_found: Arc<AtomicBool>,\n    /// Whether the crawler has been terminated\n    terminated: Arc<AtomicBool>,\n\n    /// Rate limiting: optimal delay between requests in seconds\n    optimal_delay_between_requests: f64,\n    /// Last request timestamp (epoch seconds)\n    last_request_time: Arc<Mutex<f64>>,\n\n    /// Counter for done URLs\n    done_urls_count: Arc<AtomicUsize>,\n\n    /// Non-200 basenames to their occurrence counts\n    non200_basenames_to_occurrences: Arc<DashMap<String, i64>>,\n\n    /// Cached robots.txt data per domain:port\n    robots_txt_cache: Arc<DashMap<String, Option<RobotsTxt>>>,\n    /// Counter for loaded robots.txt files\n    loaded_robots_txt_count: Arc<AtomicI64>,\n\n    /// Cached resolve mappings (domain:port -> IP)\n    resolve_cache: Arc<DashMap<String, String>>,\n\n    /// Pre-compiled include regex patterns\n    compiled_include_regex: Arc<Vec<Regex>>,\n    /// Pre-compiled ignore regex patterns\n    compiled_ignore_regex: Arc<Vec<Regex>>,\n}\n\nimpl Crawler {\n    pub fn new(\n        options: Arc<CoreOptions>,\n        http_client: HttpClient,\n        content_processor_manager: ContentProcessorManager,\n        analysis_manager: AnalysisManager,\n        output: Box<dyn Output>,\n        status: Status,\n    ) -> Self {\n        let initial_parsed_url = ParsedUrl::parse(&options.url, None);\n        let final_user_agent = Self::build_final_user_agent(&options);\n\n        // Set the final user agent in status\n        let status = {\n            status.set_final_user_agent(&final_user_agent);\n            status\n        };\n\n        let optimal_delay = (1.0 / options.max_reqs_per_sec).max(0.001);\n\n        // Pre-compile include/ignore regex patterns\n        let compiled_include_regex: Vec<Regex> = options\n            .include_regex\n            .iter()\n            .filter_map(|p| {\n                let pattern = utils::extract_pcre_regex_pattern(p);\n                Regex::new(&pattern).ok()\n            })\n            .collect();\n        let compiled_ignore_regex: Vec<Regex> = options\n            .ignore_regex\n            .iter()\n            .filter_map(|p| {\n                let pattern = utils::extract_pcre_regex_pattern(p);\n                Regex::new(&pattern).ok()\n            })\n            .collect();\n\n        // Build resolve cache\n        let resolve_cache = DashMap::new();\n        let resolve_re = Regex::new(r\"^([^:]+):([0-9]+):(.+)$\");\n        for resolve in &options.resolve {\n            if let Ok(ref re) = resolve_re\n                && let Some(caps) = re.captures(resolve)\n            {\n                let domain = caps.get(1).map_or(\"\", |m| m.as_str());\n                let port = caps.get(2).map_or(\"\", |m| m.as_str());\n                let ip = caps.get(3).map_or(\"\", |m| m.as_str());\n                resolve_cache.insert(format!(\"{}:{}\", domain, port), ip.to_string());\n            }\n        }\n\n        Crawler {\n            options,\n            http_client: Arc::new(http_client),\n            content_processor_manager: Arc::new(Mutex::new(content_processor_manager)),\n            analysis_manager: Arc::new(Mutex::new(analysis_manager)),\n            output: Arc::new(Mutex::new(output)),\n            status: Arc::new(Mutex::new(status)),\n            queue: Arc::new(DashMap::new()),\n            queue_order: Arc::new(Mutex::new(VecDeque::new())),\n            visited: Arc::new(DashMap::new()),\n            skipped: Arc::new(DashMap::new()),\n            initial_parsed_url,\n            final_user_agent,\n            accept_header: ACCEPT_HEADER.to_string(),\n            initial_existing_url_found: Arc::new(AtomicBool::new(false)),\n            terminated: Arc::new(AtomicBool::new(false)),\n            optimal_delay_between_requests: optimal_delay,\n            last_request_time: Arc::new(Mutex::new(0.0)),\n            done_urls_count: Arc::new(AtomicUsize::new(0)),\n            non200_basenames_to_occurrences: Arc::new(DashMap::new()),\n            robots_txt_cache: Arc::new(DashMap::new()),\n            loaded_robots_txt_count: Arc::new(AtomicI64::new(0)),\n            resolve_cache: Arc::new(resolve_cache),\n            compiled_include_regex: Arc::new(compiled_include_regex),\n            compiled_ignore_regex: Arc::new(compiled_ignore_regex),\n        }\n    }\n\n    /// Main crawl loop. Processes URLs concurrently with rate limiting.\n    pub async fn run(&mut self) -> CrawlerResult<()> {\n        // Add initial URL to queue\n        self.add_url_to_queue(&self.initial_parsed_url.clone(), None, UrlSource::InitUrl as i32);\n\n        // Print table header\n        if let Ok(mut output) = self.output.lock() {\n            output.add_table_header();\n        }\n\n        // Set up Ctrl+C handler\n        let terminated = self.terminated.clone();\n        let ctrl_c_handler = tokio::spawn(async move {\n            if let Ok(()) = tokio::signal::ctrl_c().await {\n                terminated.store(true, Ordering::SeqCst);\n            }\n        });\n\n        // Semaphore for controlling concurrent workers\n        let semaphore = Arc::new(Semaphore::new(self.options.workers as usize));\n        let mut join_handles = Vec::new();\n\n        loop {\n            if self.terminated.load(Ordering::SeqCst) {\n                if let Ok(mut output) = self.output.lock() {\n                    output.add_notice(\n                        \"Crawler interrupted by user (Ctrl+C). Processing will stop after in-flight requests complete.\",\n                    );\n                }\n                break;\n            }\n\n            // Take the next URL from the queue\n            let entry = self.take_next_from_queue();\n            let entry = match entry {\n                Some(e) => e,\n                None => {\n                    // Queue is empty - check if there are still active workers\n                    let avail = semaphore.available_permits();\n                    let total = self.options.workers as usize;\n                    if avail == total {\n                        // No active workers and empty queue = done\n                        break;\n                    }\n                    // Wait a bit for workers to finish and potentially add new URLs\n                    tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;\n                    continue;\n                }\n            };\n\n            // Acquire semaphore permit\n            let permit = semaphore.clone().acquire_owned().await;\n            let permit = match permit {\n                Ok(p) => p,\n                Err(_) => break,\n            };\n\n            // Clone all needed Arcs for the spawned task\n            let options = self.options.clone();\n            let http_client = self.http_client.clone();\n            let content_processor_manager = self.content_processor_manager.clone();\n            let analysis_manager = self.analysis_manager.clone();\n            let output = self.output.clone();\n            let status = self.status.clone();\n            let queue = self.queue.clone();\n            let queue_order = self.queue_order.clone();\n            let visited = self.visited.clone();\n            let skipped = self.skipped.clone();\n            let initial_parsed_url = self.initial_parsed_url.clone();\n            let final_user_agent = self.final_user_agent.clone();\n            let accept_header = self.accept_header.clone();\n            let initial_existing_url_found = self.initial_existing_url_found.clone();\n            let terminated = self.terminated.clone();\n            let done_urls_count = self.done_urls_count.clone();\n            let non200_basenames = self.non200_basenames_to_occurrences.clone();\n            let robots_txt_cache = self.robots_txt_cache.clone();\n            let loaded_robots_txt_count = self.loaded_robots_txt_count.clone();\n            let resolve_cache = self.resolve_cache.clone();\n            let last_request_time = self.last_request_time.clone();\n            let optimal_delay = self.optimal_delay_between_requests;\n            let compiled_include_regex = self.compiled_include_regex.clone();\n            let compiled_ignore_regex = self.compiled_ignore_regex.clone();\n\n            let handle = tokio::spawn(async move {\n                let _permit = permit; // Hold permit until task completes\n\n                if terminated.load(Ordering::SeqCst) {\n                    return;\n                }\n\n                Self::process_url(\n                    entry,\n                    &options,\n                    &http_client,\n                    &content_processor_manager,\n                    &analysis_manager,\n                    &output,\n                    &status,\n                    &queue,\n                    &queue_order,\n                    &visited,\n                    &skipped,\n                    &initial_parsed_url,\n                    &final_user_agent,\n                    &accept_header,\n                    &initial_existing_url_found,\n                    &terminated,\n                    &done_urls_count,\n                    &non200_basenames,\n                    &robots_txt_cache,\n                    &loaded_robots_txt_count,\n                    &resolve_cache,\n                    &last_request_time,\n                    optimal_delay,\n                    &compiled_include_regex,\n                    &compiled_ignore_regex,\n                )\n                .await;\n            });\n\n            join_handles.push(handle);\n\n            // Clean up finished handles periodically\n            if join_handles.len() > 100 {\n                let mut remaining = Vec::new();\n                for h in join_handles {\n                    if !h.is_finished() {\n                        remaining.push(h);\n                    }\n                }\n                join_handles = remaining;\n            }\n        }\n\n        // Wait for all in-flight workers to complete\n        for handle in join_handles {\n            let _ = handle.await;\n        }\n\n        ctrl_c_handler.abort();\n\n        Ok(())\n    }\n\n    /// Take the next URL from the queue (breadth-first order)\n    fn take_next_from_queue(&self) -> Option<QueueEntry> {\n        let mut order = self.queue_order.lock().unwrap_or_else(|e| e.into_inner());\n        while !order.is_empty() {\n            let Some(key) = order.pop_front() else { break };\n            if let Some((_, entry)) = self.queue.remove(&key) {\n                // Add to visited table\n                self.visited.insert(\n                    key.clone(),\n                    VisitedEntry {\n                        url: entry.url.clone(),\n                        uq_id: entry.uq_id.clone(),\n                        source_uq_id: entry.source_uq_id.clone(),\n                        source_attr: entry.source_attr,\n                    },\n                );\n                return Some(entry);\n            }\n        }\n        None\n    }\n\n    /// Process a single URL: fetch, parse content, extract URLs, update status\n    #[allow(clippy::too_many_arguments)]\n    async fn process_url(\n        entry: QueueEntry,\n        options: &Arc<CoreOptions>,\n        http_client: &Arc<HttpClient>,\n        content_processor_manager: &Arc<Mutex<ContentProcessorManager>>,\n        analysis_manager: &Arc<Mutex<AnalysisManager>>,\n        output: &Arc<Mutex<Box<dyn Output>>>,\n        status: &Arc<Mutex<Status>>,\n        queue: &Arc<DashMap<String, QueueEntry>>,\n        queue_order: &Arc<Mutex<VecDeque<String>>>,\n        visited: &Arc<DashMap<String, VisitedEntry>>,\n        skipped: &Arc<DashMap<String, SkippedEntry>>,\n        initial_parsed_url: &ParsedUrl,\n        final_user_agent: &str,\n        accept_header: &str,\n        initial_existing_url_found: &Arc<AtomicBool>,\n        terminated: &Arc<AtomicBool>,\n        done_urls_count: &Arc<AtomicUsize>,\n        non200_basenames: &Arc<DashMap<String, i64>>,\n        robots_txt_cache: &Arc<DashMap<String, Option<RobotsTxt>>>,\n        loaded_robots_txt_count: &Arc<AtomicI64>,\n        resolve_cache: &Arc<DashMap<String, String>>,\n        last_request_time: &Arc<Mutex<f64>>,\n        optimal_delay: f64,\n        compiled_include_regex: &Arc<Vec<Regex>>,\n        compiled_ignore_regex: &Arc<Vec<Regex>>,\n    ) {\n        let parsed_url = ParsedUrl::parse(&entry.url, None);\n        let parsed_url_uq_id = Self::compute_url_uq_id(&parsed_url);\n\n        let is_asset_url = parsed_url\n            .extension\n            .as_ref()\n            .map(|ext| !HTML_PAGES_EXTENSIONS.contains(&ext.to_lowercase().as_str()))\n            .unwrap_or(false);\n\n        let scheme = parsed_url\n            .scheme\n            .as_deref()\n            .unwrap_or(initial_parsed_url.scheme.as_deref().unwrap_or(\"https\"));\n\n        let host_and_port =\n            if parsed_url.host.is_none() || parsed_url.host.as_deref() == initial_parsed_url.host.as_deref() {\n                let host = initial_parsed_url.host.as_deref().unwrap_or(\"\");\n                let port = initial_parsed_url.port.unwrap_or(443);\n                if port != 80 && port != 443 {\n                    format!(\"{}:{}\", host, port)\n                } else {\n                    host.to_string()\n                }\n            } else {\n                let host = parsed_url.host.as_deref().unwrap_or(\"\");\n                let port = parsed_url.port.unwrap_or(443);\n                if port != 80 && port != 443 {\n                    format!(\"{}:{}\", host, port)\n                } else {\n                    host.to_string()\n                }\n            };\n\n        let host = match &parsed_url.host {\n            Some(h) => h.clone(),\n            None => {\n                if let Ok(mut out) = output.lock() {\n                    out.add_error(&format!(\"Invalid/unsupported URL found: {}\", entry.url));\n                }\n                return;\n            }\n        };\n\n        let absolute_url = format!(\n            \"{}://{}{}{}\",\n            scheme,\n            host_and_port,\n            parsed_url.path,\n            parsed_url.query.as_ref().map(|q| format!(\"?{}\", q)).unwrap_or_default()\n        );\n\n        let final_url_for_client = if options.add_random_query_params {\n            Self::add_random_query_params(&parsed_url.path)\n        } else {\n            format!(\n                \"{}{}\",\n                parsed_url.path,\n                parsed_url.query.as_ref().map(|q| format!(\"?{}\", q)).unwrap_or_default()\n            )\n        };\n\n        // Get origin header from source URL\n        let origin = if !entry.source_uq_id.is_empty() {\n            match status.lock() {\n                Ok(st) => st.get_origin_header_value_by_source_uq_id(&entry.source_uq_id),\n                _ => None,\n            }\n        } else {\n            None\n        };\n\n        let is_image = parsed_url.is_image();\n        let set_origin = origin.is_some() && !is_image;\n\n        // For security: only send HTTP auth to same 2nd-level domain\n        let use_http_auth = initial_parsed_url\n            .domain_2nd_level\n            .as_ref()\n            .map(|d2| parsed_url.domain_2nd_level.as_deref() == Some(d2.as_str()))\n            .unwrap_or(parsed_url.host == initial_parsed_url.host);\n\n        let url_basename = parsed_url.get_base_name();\n\n        // Check non-200 basename protection\n        let http_response = if let Some(ref basename) = url_basename {\n            match non200_basenames.get(basename) {\n                Some(count) => {\n                    if *count > options.max_non200_responses_per_basename {\n                        Some(HttpResponse::create_skipped(\n                            final_url_for_client.clone(),\n                            format!(\n                                \"URL with basename '{}' has more than {} non-200 responses ({}).\",\n                                basename, options.max_non200_responses_per_basename, *count\n                            ),\n                        ))\n                    } else {\n                        None\n                    }\n                }\n                _ => None,\n            }\n        } else {\n            None\n        };\n\n        let http_response = match http_response {\n            Some(skipped) => skipped,\n            None => {\n                let port = parsed_url.port.unwrap_or(if scheme == \"https\" { 443 } else { 80 });\n\n                // Apply URL transformations\n                let (http_request_host, http_request_path) =\n                    Self::apply_http_request_transformations(&host, &final_url_for_client, &options.transform_url);\n\n                let forced_ip = resolve_cache\n                    .get(&format!(\"{}:{}\", http_request_host, port))\n                    .map(|v| v.value().clone());\n\n                // Rate limiting: skip delay for cached responses (no actual HTTP request needed)\n                let origin_for_request = if set_origin { origin.as_deref() } else { None };\n                if !http_client.is_url_cached(\n                    &http_request_host,\n                    port,\n                    scheme,\n                    &http_request_path,\n                    \"GET\",\n                    final_user_agent,\n                    accept_header,\n                    &options.accept_encoding,\n                    origin_for_request,\n                ) {\n                    let sleep_duration = {\n                        let now = Self::current_timestamp();\n                        let mut last_time = last_request_time.lock().unwrap_or_else(|e| e.into_inner());\n                        let elapsed = now - *last_time;\n                        if elapsed < optimal_delay {\n                            let sleep = optimal_delay - elapsed;\n                            *last_time = now + sleep; // Reserve slot immediately to avoid TOCTOU race\n                            sleep\n                        } else {\n                            *last_time = now;\n                            0.0\n                        }\n                    };\n                    if sleep_duration > 0.0 {\n                        tokio::time::sleep(tokio::time::Duration::from_secs_f64(sleep_duration.max(0.001))).await;\n                    }\n                }\n\n                match http_client\n                    .request(\n                        &http_request_host,\n                        port,\n                        scheme,\n                        &http_request_path,\n                        \"GET\",\n                        options.timeout as u64,\n                        final_user_agent,\n                        accept_header,\n                        &options.accept_encoding,\n                        origin_for_request,\n                        use_http_auth,\n                        forced_ip.as_deref(),\n                    )\n                    .await\n                {\n                    Ok(resp) => resp,\n                    Err(e) => {\n                        if let Ok(mut out) = output.lock() {\n                            out.add_error(&format!(\"HTTP request error for {}: {}\", absolute_url, e));\n                        }\n                        return;\n                    }\n                }\n            }\n        };\n\n        // When the crawler has been terminated, do not process response\n        if terminated.load(Ordering::SeqCst) {\n            return;\n        }\n\n        let response_status = http_response.status_code;\n        let elapsed_time = http_response.exec_time;\n\n        // Handle gzip-compressed sitemaps (.xml.gz): decompress body before processing\n        let is_gzip_sitemap = parsed_url.path.to_lowercase().ends_with(\".xml.gz\");\n        let (body, body_text) = if is_gzip_sitemap\n            && let Some(ref raw_body) = http_response.body\n            && !raw_body.is_empty()\n        {\n            use flate2::read::GzDecoder;\n            let mut decoder = GzDecoder::new(&raw_body[..]);\n            let mut decompressed = Vec::new();\n            if std::io::Read::read_to_end(&mut decoder, &mut decompressed).is_ok() {\n                let text = String::from_utf8_lossy(&decompressed).to_string();\n                (Some(decompressed), Some(text))\n            } else {\n                (http_response.body.clone(), http_response.body_text())\n            }\n        } else {\n            (http_response.body.clone(), http_response.body_text())\n        };\n\n        let body_size = if is_asset_url {\n            http_response\n                .get_header(\"content-length\")\n                .and_then(|v| v.parse::<i64>().ok())\n                .unwrap_or_else(|| body.as_ref().map(|b| b.len() as i64).unwrap_or(0))\n        } else {\n            body.as_ref().map(|b| b.len() as i64).unwrap_or(0)\n        };\n\n        if response_status != 200 {\n            Self::process_non200_url(&parsed_url, non200_basenames);\n        }\n\n        // Detect content type\n        let content_type_header = http_response.get_header(\"content-type\").cloned().unwrap_or_default();\n        let is_html_body = content_type_header.to_lowercase().contains(\"text/html\");\n        let is_css_body = content_type_header.to_lowercase().contains(\"text/css\");\n        let is_js_body = content_type_header.to_lowercase().contains(\"application/javascript\")\n            || content_type_header.to_lowercase().contains(\"text/javascript\");\n        let is_xml_body = is_gzip_sitemap\n            || content_type_header.to_lowercase().contains(\"application/xml\")\n            || content_type_header.to_lowercase().contains(\"text/xml\");\n\n        let is_allowed_for_crawling =\n            Self::is_url_allowed_by_regexes(&parsed_url, options, compiled_include_regex, compiled_ignore_regex)\n                && Self::is_external_domain_allowed_for_crawling(\n                    parsed_url.host.as_deref().unwrap_or(\"\"),\n                    initial_parsed_url,\n                    &options.allowed_domains_for_crawling,\n                );\n\n        let mut extra_parsed_content: HashMap<String, String> = HashMap::new();\n\n        // Mark initial URL as found\n        if !initial_existing_url_found.load(Ordering::SeqCst) && is_html_body && response_status == 200 && body_size > 0\n        {\n            initial_existing_url_found.store(true, Ordering::SeqCst);\n        }\n\n        // Get content type ID\n        let has_location = http_response.get_header(\"location\").is_some();\n        let content_type = if has_location && response_status > 300 && response_status < 320 {\n            ContentTypeId::Redirect\n        } else if is_gzip_sitemap {\n            ContentTypeId::Xml\n        } else {\n            Self::get_content_type_id_by_header(&content_type_header)\n        };\n\n        // Apply content changes before URL parsing (text-based, for HTML/CSS/JS)\n        let mut body_for_parsing = body_text.clone().unwrap_or_default();\n        if let Ok(mut cpm) = content_processor_manager.lock() {\n            cpm.apply_content_changes_before_url_parsing(&mut body_for_parsing, content_type, &parsed_url);\n        }\n\n        // Parse body and fill queue with new URLs\n        if !body_for_parsing.is_empty() && is_html_body && is_allowed_for_crawling {\n            let html_extras = Self::parse_html_body_and_fill_queue(\n                &body_for_parsing,\n                content_type,\n                &parsed_url,\n                options,\n                content_processor_manager,\n                queue,\n                queue_order,\n                visited,\n                skipped,\n                initial_parsed_url,\n                non200_basenames,\n                robots_txt_cache,\n                loaded_robots_txt_count,\n                resolve_cache,\n                http_client,\n                output,\n                status,\n                terminated,\n                compiled_include_regex,\n                compiled_ignore_regex,\n            );\n            for (k, v) in html_extras {\n                extra_parsed_content.insert(k, v);\n            }\n        } else if !body_for_parsing.is_empty() && (is_js_body || is_css_body || is_xml_body) {\n            Self::parse_content_and_fill_url_queue(\n                &body_for_parsing,\n                content_type,\n                &parsed_url,\n                options,\n                content_processor_manager,\n                queue,\n                queue_order,\n                visited,\n                skipped,\n                initial_parsed_url,\n                non200_basenames,\n                robots_txt_cache,\n                loaded_robots_txt_count,\n                resolve_cache,\n                http_client,\n                output,\n                status,\n                terminated,\n                compiled_include_regex,\n                compiled_ignore_regex,\n            );\n        }\n\n        // Handle redirect\n        if (301..=308).contains(&response_status)\n            && let Some(redirect_location) = http_response.get_header(\"location\")\n        {\n            let redirect_location = redirect_location.clone();\n            extra_parsed_content.insert(\"Location\".to_string(), redirect_location.clone());\n            Self::add_redirect_location_to_queue_if_suitable(\n                &redirect_location,\n                &parsed_url_uq_id,\n                scheme,\n                &host_and_port,\n                &parsed_url,\n                options,\n                queue,\n                queue_order,\n                visited,\n                skipped,\n                initial_parsed_url,\n                terminated,\n                compiled_include_regex,\n                compiled_ignore_regex,\n            );\n        }\n\n        // Set extras from headers\n        for extra_column in &options.extra_columns {\n            let col_name_lower = extra_column.name.to_lowercase();\n            if let Some(header_val) = http_response.get_header(&col_name_lower) {\n                extra_parsed_content.insert(extra_column.name.clone(), header_val.clone());\n            }\n        }\n\n        // Caching\n        let (cache_type_flags, cache_lifetime) = if http_response.status_code > 0 {\n            (\n                Self::get_cache_type_flags(&http_response.headers),\n                Self::get_cache_lifetime(&http_response.headers),\n            )\n        } else {\n            (crate::result::visited_url::CACHE_TYPE_NOT_AVAILABLE, None)\n        };\n\n        // Create VisitedUrl and update status\n        let is_external = parsed_url\n            .host\n            .as_deref()\n            .map(|h| !Self::hosts_are_www_equivalent(h, initial_parsed_url.host.as_deref().unwrap_or(\"\")))\n            .unwrap_or(false);\n\n        let visited_url = VisitedUrl::new(\n            parsed_url_uq_id.clone(),\n            entry.source_uq_id.clone(),\n            entry.source_attr,\n            absolute_url.clone(),\n            response_status,\n            elapsed_time,\n            Some(body_size),\n            content_type,\n            Some(content_type_header.clone()),\n            http_response.get_header(\"content-encoding\").cloned(),\n            if extra_parsed_content.is_empty() {\n                None\n            } else {\n                Some(extra_parsed_content.clone())\n            },\n            is_external,\n            is_allowed_for_crawling,\n            cache_type_flags,\n            cache_lifetime.map(|l| l as i64),\n        );\n\n        if let Ok(mut st) = status.lock() {\n            st.add_visited_url(visited_url.clone(), body.as_deref(), Some(&http_response.headers));\n        }\n\n        // Run per-URL analysis (headers, security, accessibility, best practices, etc.)\n        if let Ok(mut am) = analysis_manager.lock()\n            && let Ok(st) = status.lock()\n        {\n            let analysis_results =\n                am.analyze_visited_url(&visited_url, body_text.as_deref(), Some(&http_response.headers), &st);\n\n            // Store analysis results as extra columns for progress table display\n            let extra_column_values = am.get_analysis_column_values(&analysis_results);\n            for (col_name, col_value) in extra_column_values {\n                extra_parsed_content.insert(col_name, col_value);\n            }\n        }\n\n        // Increment done count\n        let done_count = done_urls_count.fetch_add(1, Ordering::SeqCst) + 1;\n        let total_count = queue.len() + visited.len();\n        let progress_status = format!(\"{}/{}\", done_count, total_count);\n\n        // Print table row to output\n        if let Ok(mut out) = output.lock() {\n            out.add_table_row(\n                &http_response.headers,\n                &absolute_url,\n                response_status,\n                elapsed_time,\n                body_size,\n                content_type as i32,\n                &extra_parsed_content,\n                &progress_status,\n                cache_type_flags as i32,\n                cache_lifetime,\n            );\n        }\n    }\n\n    /// Parse HTML body, extract URLs, and fill the queue\n    #[allow(clippy::too_many_arguments)]\n    fn parse_html_body_and_fill_queue(\n        body: &str,\n        content_type: ContentTypeId,\n        url: &ParsedUrl,\n        options: &Arc<CoreOptions>,\n        content_processor_manager: &Arc<Mutex<ContentProcessorManager>>,\n        queue: &Arc<DashMap<String, QueueEntry>>,\n        queue_order: &Arc<Mutex<VecDeque<String>>>,\n        visited: &Arc<DashMap<String, VisitedEntry>>,\n        skipped: &Arc<DashMap<String, SkippedEntry>>,\n        initial_parsed_url: &ParsedUrl,\n        non200_basenames: &Arc<DashMap<String, i64>>,\n        robots_txt_cache: &Arc<DashMap<String, Option<RobotsTxt>>>,\n        loaded_robots_txt_count: &Arc<AtomicI64>,\n        resolve_cache: &Arc<DashMap<String, String>>,\n        http_client: &Arc<HttpClient>,\n        output: &Arc<Mutex<Box<dyn Output>>>,\n        status: &Arc<Mutex<Status>>,\n        terminated: &Arc<AtomicBool>,\n        compiled_include_regex: &[Regex],\n        compiled_ignore_regex: &[Regex],\n    ) -> HashMap<String, String> {\n        let mut result = HashMap::new();\n\n        // Skip link following from HTML pages when initial URL is a sitemap.xml\n        // (sitemap-only mode: only crawl URLs listed in the sitemap)\n        let is_sitemap_only = Self::is_sitemap_url(initial_parsed_url);\n        if !is_sitemap_only || content_type == ContentTypeId::Xml {\n            Self::parse_content_and_fill_url_queue(\n                body,\n                content_type,\n                url,\n                options,\n                content_processor_manager,\n                queue,\n                queue_order,\n                visited,\n                skipped,\n                initial_parsed_url,\n                non200_basenames,\n                robots_txt_cache,\n                loaded_robots_txt_count,\n                resolve_cache,\n                http_client,\n                output,\n                status,\n                terminated,\n                compiled_include_regex,\n                compiled_ignore_regex,\n            );\n        }\n\n        // Extract Title\n        if let Some(caps) = RE_TITLE.captures(body) {\n            let title = caps.get(1).map_or(\"\", |m| m.as_str()).trim();\n            result.insert(\"Title\".to_string(), Self::decode_html_entities(title));\n        }\n\n        // Extract Description\n        if let Some(caps) = RE_DESCRIPTION.captures(body) {\n            let desc = caps.get(1).map_or(\"\", |m| m.as_str()).trim();\n            result.insert(\"Description\".to_string(), Self::decode_html_entities(desc));\n        }\n\n        // Extract Keywords if needed\n        if options.has_header_to_table(\"Keywords\")\n            && let Some(caps) = RE_KEYWORDS.captures(body)\n        {\n            let keywords = caps.get(1).map_or(\"\", |m| m.as_str()).trim();\n            result.insert(\"Keywords\".to_string(), Self::decode_html_entities(keywords));\n        }\n\n        // Extract DOM count if needed\n        if options.has_header_to_table(\"DOM\") {\n            let dom_count = RE_DOM_COUNT.find_iter(body).count();\n            result.insert(\"DOM\".to_string(), dom_count.to_string());\n        }\n\n        // Custom extraction for extra columns\n        for extra_column in &options.extra_columns {\n            if extra_column.custom_method.is_some()\n                && let Some(value) = extra_column.extract_value(body)\n            {\n                result.insert(extra_column.name.clone(), value);\n            }\n        }\n\n        result\n    }\n\n    /// Parse content (HTML/CSS/JS/XML) and fill URL queue\n    #[allow(clippy::too_many_arguments)]\n    fn parse_content_and_fill_url_queue(\n        content: &str,\n        content_type: ContentTypeId,\n        url: &ParsedUrl,\n        options: &Arc<CoreOptions>,\n        content_processor_manager: &Arc<Mutex<ContentProcessorManager>>,\n        queue: &Arc<DashMap<String, QueueEntry>>,\n        queue_order: &Arc<Mutex<VecDeque<String>>>,\n        visited: &Arc<DashMap<String, VisitedEntry>>,\n        skipped: &Arc<DashMap<String, SkippedEntry>>,\n        initial_parsed_url: &ParsedUrl,\n        non200_basenames: &Arc<DashMap<String, i64>>,\n        robots_txt_cache: &Arc<DashMap<String, Option<RobotsTxt>>>,\n        loaded_robots_txt_count: &Arc<AtomicI64>,\n        resolve_cache: &Arc<DashMap<String, String>>,\n        http_client: &Arc<HttpClient>,\n        output: &Arc<Mutex<Box<dyn Output>>>,\n        status: &Arc<Mutex<Status>>,\n        terminated: &Arc<AtomicBool>,\n        compiled_include_regex: &[Regex],\n        compiled_ignore_regex: &[Regex],\n    ) {\n        // Detect <base href=\"...\"> in HTML content to use as base URL for resolving relative URLs\n        let effective_base_url = if content_type == ContentTypeId::Html {\n            if let Some(caps) = RE_BASE_HREF.captures(content) {\n                if let Some(base_href) = caps.get(1) {\n                    let base_href_str = base_href.as_str();\n                    // Only use base href if it looks like a valid URL or path\n                    if base_href_str.starts_with(\"http://\")\n                        || base_href_str.starts_with(\"https://\")\n                        || base_href_str.starts_with(\"//\")\n                        || base_href_str.starts_with('/')\n                    {\n                        Some(ParsedUrl::parse(base_href_str, Some(url)))\n                    } else {\n                        None\n                    }\n                } else {\n                    None\n                }\n            } else {\n                None\n            }\n        } else {\n            None\n        };\n        let source_url = effective_base_url.as_ref().unwrap_or(url);\n\n        let found_urls_list = match content_processor_manager.lock() {\n            Ok(mut cpm) => cpm.find_urls(content, content_type, source_url),\n            _ => Vec::new(),\n        };\n\n        for found_urls in found_urls_list {\n            Self::add_suitable_urls_to_queue(\n                &found_urls,\n                source_url,\n                options,\n                queue,\n                queue_order,\n                visited,\n                skipped,\n                initial_parsed_url,\n                non200_basenames,\n                robots_txt_cache,\n                loaded_robots_txt_count,\n                resolve_cache,\n                http_client,\n                output,\n                status,\n                terminated,\n                compiled_include_regex,\n                compiled_ignore_regex,\n            );\n        }\n    }\n\n    /// Add suitable found URLs to the queue after filtering\n    #[allow(clippy::too_many_arguments)]\n    fn add_suitable_urls_to_queue(\n        found_urls: &FoundUrls,\n        source_url: &ParsedUrl,\n        options: &Arc<CoreOptions>,\n        queue: &Arc<DashMap<String, QueueEntry>>,\n        queue_order: &Arc<Mutex<VecDeque<String>>>,\n        visited: &Arc<DashMap<String, VisitedEntry>>,\n        skipped: &Arc<DashMap<String, SkippedEntry>>,\n        initial_parsed_url: &ParsedUrl,\n        non200_basenames: &Arc<DashMap<String, i64>>,\n        robots_txt_cache: &Arc<DashMap<String, Option<RobotsTxt>>>,\n        _loaded_robots_txt_count: &Arc<AtomicI64>,\n        _resolve_cache: &Arc<DashMap<String, String>>,\n        _http_client: &Arc<HttpClient>,\n        _output: &Arc<Mutex<Box<dyn Output>>>,\n        _status: &Arc<Mutex<Status>>,\n        terminated: &Arc<AtomicBool>,\n        compiled_include_regex: &[Regex],\n        compiled_ignore_regex: &[Regex],\n    ) {\n        let source_url_uq_id = Self::compute_url_uq_id(source_url);\n\n        for found_url in found_urls.get_urls().values() {\n            if terminated.load(Ordering::SeqCst) {\n                return;\n            }\n\n            let url_for_queue = found_url.url.trim().to_string();\n            let parsed_url_for_queue = ParsedUrl::parse(&url_for_queue, Some(source_url));\n\n            // Skip URLs that are not requestable resources\n            if !utils::is_href_for_requestable_resource(&url_for_queue) {\n                continue;\n            }\n\n            // Check if URL is on same host or allowed host\n            let is_url_on_same_host = parsed_url_for_queue.host.is_none()\n                || parsed_url_for_queue.host == initial_parsed_url.host\n                || Self::hosts_are_www_equivalent(\n                    parsed_url_for_queue.host.as_deref().unwrap_or(\"\"),\n                    initial_parsed_url.host.as_deref().unwrap_or(\"\"),\n                );\n            let mut is_url_on_allowed_host = false;\n            if let Some(ref parsed_host) = parsed_url_for_queue.host\n                && Some(parsed_host.as_str()) != initial_parsed_url.host.as_deref()\n            {\n                let is_allowed_static = !options.allowed_domains_for_external_files.is_empty()\n                    && Self::is_domain_allowed_for_static_files(\n                        parsed_host,\n                        &options.allowed_domains_for_external_files,\n                    );\n                let is_allowed_crawlable = !options.allowed_domains_for_crawling.is_empty()\n                    && Self::is_external_domain_allowed_for_crawling(\n                        parsed_host,\n                        initial_parsed_url,\n                        &options.allowed_domains_for_crawling,\n                    );\n                if (is_allowed_static && found_url.is_included_asset()) || is_allowed_crawlable {\n                    is_url_on_allowed_host = true;\n                }\n            }\n\n            // Skip basename with too many non-200s\n            if let Some(ref basename) = parsed_url_for_queue.get_base_name()\n                && let Some(count) = non200_basenames.get(basename)\n                && *count >= options.max_non200_responses_per_basename\n            {\n                continue;\n            }\n\n            if !is_url_on_same_host && !is_url_on_allowed_host {\n                // Add to skipped\n                let url_key = Self::compute_url_key(&parsed_url_for_queue);\n                if !skipped.contains_key(&url_key) {\n                    skipped.insert(\n                        url_key,\n                        SkippedEntry {\n                            url: parsed_url_for_queue.get_full_url(true, false),\n                            reason: SkippedReason::NotAllowedHost,\n                            source_uq_id: source_url_uq_id.clone(),\n                            source_attr: found_url.source as i32,\n                        },\n                    );\n                }\n                continue;\n            }\n\n            // Check robots.txt (skip for static files)\n            if !parsed_url_for_queue.is_static_file() && !options.ignore_robots_txt {\n                let check_host = parsed_url_for_queue\n                    .host\n                    .as_deref()\n                    .unwrap_or(initial_parsed_url.host.as_deref().unwrap_or(\"\"));\n                if !Self::is_url_allowed_by_robots_txt_cached(check_host, &url_for_queue, robots_txt_cache) {\n                    let url_key = Self::compute_url_key(&parsed_url_for_queue);\n                    if !skipped.contains_key(&url_key) {\n                        skipped.insert(\n                            url_key,\n                            SkippedEntry {\n                                url: parsed_url_for_queue.get_full_url(true, false),\n                                reason: SkippedReason::RobotsTxt,\n                                source_uq_id: source_url_uq_id.clone(),\n                                source_attr: found_url.source as i32,\n                            },\n                        );\n                    }\n                    continue;\n                }\n            }\n\n            // Build absolute URL\n            let source_full_url = source_url.get_full_url(true, false);\n            let absolute_url = utils::get_absolute_url_by_base_url(&source_full_url, &url_for_queue);\n\n            if absolute_url.is_empty() {\n                continue;\n            }\n\n            // Remove fragment\n            let absolute_url = if let Some(hash_pos) = absolute_url.find('#') {\n                absolute_url[..hash_pos].to_string()\n            } else {\n                absolute_url\n            };\n\n            // Filter query params if configured\n            let absolute_url = if options.remove_query_params {\n                if let Some(q_pos) = absolute_url.find('?') {\n                    absolute_url[..q_pos].to_string()\n                } else {\n                    absolute_url\n                }\n            } else if !options.keep_query_params.is_empty() {\n                filter_query_params(&absolute_url, &options.keep_query_params)\n            } else {\n                absolute_url\n            };\n\n            // Re-parse and check suitability\n            let mut parsed_url_for_queue = ParsedUrl::parse(&absolute_url, Some(source_url));\n\n            // Force relative URLs: normalize host/scheme variants to match initial URL\n            if options.force_relative_urls {\n                Self::normalize_url_to_initial(&mut parsed_url_for_queue, initial_parsed_url);\n            }\n\n            let suitable = Self::is_url_suitable_for_queue_static(\n                &parsed_url_for_queue,\n                queue,\n                visited,\n                options,\n                compiled_include_regex,\n                compiled_ignore_regex,\n            );\n            if suitable {\n                Self::add_url_to_queue_static(\n                    &parsed_url_for_queue,\n                    Some(&source_url_uq_id),\n                    found_url.source as i32,\n                    queue,\n                    queue_order,\n                    visited,\n                    options,\n                    terminated,\n                );\n            }\n        }\n    }\n\n    /// Add URL to the queue\n    fn add_url_to_queue(&self, url: &ParsedUrl, source_uq_id: Option<&str>, source_attr: i32) {\n        Self::add_url_to_queue_static(\n            url,\n            source_uq_id,\n            source_attr,\n            &self.queue,\n            &self.queue_order,\n            &self.visited,\n            &self.options,\n            &self.terminated,\n        );\n    }\n\n    /// Static version of add_url_to_queue for use in async contexts\n    #[allow(clippy::too_many_arguments)]\n    fn add_url_to_queue_static(\n        url: &ParsedUrl,\n        source_uq_id: Option<&str>,\n        source_attr: i32,\n        queue: &DashMap<String, QueueEntry>,\n        queue_order: &Mutex<VecDeque<String>>,\n        visited: &DashMap<String, VisitedEntry>,\n        options: &CoreOptions,\n        terminated: &AtomicBool,\n    ) {\n        if terminated.load(Ordering::SeqCst) {\n            return;\n        }\n\n        // Check max_visited_urls limit\n        if (queue.len() + visited.len()) as i64 >= options.max_visited_urls {\n            return;\n        }\n\n        let url_str = url.get_full_url(true, false);\n        let url_key = Self::compute_url_key(url);\n        let uq_id = Self::compute_url_uq_id(url);\n\n        if (queue.len() as i64) >= options.max_queue_length {\n            return;\n        }\n\n        let entry = QueueEntry {\n            url: url_str,\n            uq_id,\n            source_uq_id: source_uq_id.unwrap_or(\"\").to_string(),\n            source_attr,\n        };\n\n        queue.insert(url_key.clone(), entry);\n        if let Ok(mut order) = queue_order.lock() {\n            order.push_back(url_key);\n        }\n    }\n\n    /// Normalize URL host/scheme to match the initial URL when force_relative_urls is enabled.\n    /// Handles www/non-www and http/https variants of the same domain.\n    fn normalize_url_to_initial(url: &mut ParsedUrl, initial_url: &ParsedUrl) {\n        if let (Some(url_host), Some(initial_host)) = (url.host.as_ref(), initial_url.host.as_ref()) {\n            let url_host_no_www = url_host.strip_prefix(\"www.\").unwrap_or(url_host);\n            let initial_host_no_www = initial_host.strip_prefix(\"www.\").unwrap_or(initial_host);\n\n            if url_host_no_www.eq_ignore_ascii_case(initial_host_no_www) {\n                // Normalize host to match initial URL\n                if url.host.as_deref() != initial_url.host.as_deref() {\n                    url.host = initial_url.host.clone();\n                }\n                // Normalize scheme to match initial URL\n                if url.scheme != initial_url.scheme {\n                    url.scheme = initial_url.scheme.clone();\n                }\n                // Rebuild the url string\n                url.url = url.get_full_url(true, true);\n            }\n        }\n    }\n\n    /// Check if a URL is suitable for the queue\n    fn is_url_suitable_for_queue_static(\n        url: &ParsedUrl,\n        queue: &DashMap<String, QueueEntry>,\n        visited: &DashMap<String, VisitedEntry>,\n        options: &CoreOptions,\n        compiled_include: &[Regex],\n        compiled_ignore: &[Regex],\n    ) -> bool {\n        if !Self::is_url_allowed_by_regexes(url, options, compiled_include, compiled_ignore) {\n            return false;\n        }\n\n        if (visited.len() + queue.len()) as i64 >= options.max_visited_urls {\n            return false;\n        }\n\n        let full_url = url.get_full_url(true, false);\n        let url_key = Self::compute_url_key(url);\n\n        let is_in_queue = queue.contains_key(&url_key);\n        let is_already_visited = visited.contains_key(&url_key);\n        let is_url_with_html = url.extension.is_none()\n            || HTML_PAGES_EXTENSIONS.contains(&url.extension.as_deref().unwrap_or(\"\").to_lowercase().as_str());\n        let path_lower = url.path.to_lowercase();\n        let is_url_with_sitemap =\n            path_lower.contains(\"sitemap\") && (path_lower.ends_with(\".xml\") || path_lower.ends_with(\".xml.gz\"));\n        let is_url_too_long = full_url.len() as i64 > options.max_url_length;\n        let allowed_only_html = options.crawl_only_html_files();\n\n        if !is_in_queue\n            && !is_already_visited\n            && !is_url_too_long\n            && (is_url_with_html || !allowed_only_html || is_url_with_sitemap)\n        {\n            return true;\n        }\n\n        false\n    }\n\n    /// Check if URL is allowed by include/ignore regex rules\n    fn is_url_allowed_by_regexes(\n        url: &ParsedUrl,\n        options: &CoreOptions,\n        compiled_include: &[Regex],\n        compiled_ignore: &[Regex],\n    ) -> bool {\n        // Bypass regex filtering for static files if configured\n        if options.regex_filtering_only_for_pages && url.is_static_file() {\n            return true;\n        }\n\n        let full_url = url.get_full_url(true, false);\n\n        let mut is_allowed = compiled_include.is_empty();\n        for re in compiled_include {\n            if re.is_match(&full_url) {\n                is_allowed = true;\n                break;\n            }\n        }\n\n        for re in compiled_ignore {\n            if re.is_match(&full_url) {\n                is_allowed = false;\n                break;\n            }\n        }\n\n        is_allowed\n    }\n\n    /// Check if a domain is allowed for static file downloads\n    fn is_domain_allowed_for_static_files(domain: &str, allowed_domains: &[String]) -> bool {\n        use std::sync::OnceLock;\n        static COMPILED: OnceLock<Vec<Regex>> = OnceLock::new();\n        let patterns = COMPILED.get_or_init(|| compile_domain_patterns(allowed_domains));\n        patterns.iter().any(|re| re.is_match(domain))\n    }\n\n    /// Check if two hosts are www/non-www equivalents\n    fn hosts_are_www_equivalent(host_a: &str, host_b: &str) -> bool {\n        if host_a == host_b {\n            return true;\n        }\n        let a = host_a.strip_prefix(\"www.\").unwrap_or(host_a);\n        let b = host_b.strip_prefix(\"www.\").unwrap_or(host_b);\n        a == b\n    }\n\n    /// Check if an external domain is allowed for whole-domain crawling\n    fn is_external_domain_allowed_for_crawling(\n        domain: &str,\n        initial_parsed_url: &ParsedUrl,\n        allowed_domains: &[String],\n    ) -> bool {\n        let initial_host = initial_parsed_url.host.as_deref().unwrap_or(\"\");\n        if domain == initial_host {\n            return true;\n        }\n\n        // www/non-www equivalence: handles redirects like\n        // www.rust-lang.org -> rust-lang.org (or vice versa)\n        if Self::hosts_are_www_equivalent(domain, initial_host) {\n            return true;\n        }\n\n        use std::sync::OnceLock;\n        static COMPILED: OnceLock<Vec<Regex>> = OnceLock::new();\n        let patterns = COMPILED.get_or_init(|| compile_domain_patterns(allowed_domains));\n        patterns.iter().any(|re| re.is_match(domain))\n    }\n\n    /// Add redirect location to queue if suitable\n    #[allow(clippy::too_many_arguments)]\n    fn add_redirect_location_to_queue_if_suitable(\n        redirect_location: &str,\n        source_uq_id: &str,\n        scheme: &str,\n        host_and_port: &str,\n        source_url: &ParsedUrl,\n        options: &Arc<CoreOptions>,\n        queue: &Arc<DashMap<String, QueueEntry>>,\n        queue_order: &Arc<Mutex<VecDeque<String>>>,\n        visited: &Arc<DashMap<String, VisitedEntry>>,\n        _skipped: &Arc<DashMap<String, SkippedEntry>>,\n        _initial_parsed_url: &ParsedUrl,\n        terminated: &Arc<AtomicBool>,\n        compiled_include_regex: &[Regex],\n        compiled_ignore_regex: &[Regex],\n    ) {\n        let redirect_url = if redirect_location.starts_with(\"//\") {\n            format!(\"{}:{}\", scheme, redirect_location)\n        } else if redirect_location.starts_with('/') {\n            format!(\"{}://{}{}\", scheme, host_and_port, redirect_location)\n        } else if redirect_location.starts_with(\"http://\") || redirect_location.starts_with(\"https://\") {\n            redirect_location.to_string()\n        } else {\n            format!(\n                \"{}://{}{}/{}\",\n                scheme, host_and_port, source_url.path, redirect_location\n            )\n        };\n\n        let parsed_redirect_url = ParsedUrl::parse(&redirect_url, Some(source_url));\n\n        if Self::is_url_suitable_for_queue_static(\n            &parsed_redirect_url,\n            queue,\n            visited,\n            options,\n            compiled_include_regex,\n            compiled_ignore_regex,\n        ) {\n            Self::add_url_to_queue_static(\n                &parsed_redirect_url,\n                Some(source_uq_id),\n                UrlSource::Redirect as i32,\n                queue,\n                queue_order,\n                visited,\n                options,\n                terminated,\n            );\n\n            // If initial URL redirects to same 2nd-level domain, the domain checks\n            // in is_external_domain_allowed_for_crawling and is_url_on_same_host\n            // handle this via 2nd-level domain comparison.\n        }\n    }\n\n    /// Process a URL that returned non-200 status\n    fn process_non200_url(url: &ParsedUrl, non200_basenames: &DashMap<String, i64>) {\n        if let Some(basename) = url.get_base_name()\n            && basename != \"index.html\"\n            && basename != \"index.htm\"\n            && basename != \"index\"\n        {\n            non200_basenames\n                .entry(basename)\n                .and_modify(|count| *count += 1)\n                .or_insert(1);\n        }\n    }\n\n    /// Check if URL is allowed by robots.txt (using cache)\n    fn is_url_allowed_by_robots_txt_cached(\n        domain: &str,\n        url: &str,\n        robots_txt_cache: &DashMap<String, Option<RobotsTxt>>,\n    ) -> bool {\n        // Only check the matching domain's robots.txt\n        for entry in robots_txt_cache.iter() {\n            if !entry.key().starts_with(domain) {\n                continue;\n            }\n            if let Some(ref robots_txt) = *entry.value()\n                && !robots_txt.is_allowed(url)\n            {\n                return false;\n            }\n        }\n        true\n    }\n\n    /// Fetch and parse robots.txt for a domain\n    pub async fn fetch_robots_txt(&self, domain: &str, port: u16, scheme: &str) {\n        if self.options.ignore_robots_txt {\n            return;\n        }\n\n        let cache_key = format!(\"{}:{}\", domain, port);\n        if self.robots_txt_cache.contains_key(&cache_key) {\n            return;\n        }\n\n        // Prevent parallel fetches for same domain\n        self.robots_txt_cache.insert(cache_key.clone(), None);\n\n        let use_http_auth = self\n            .initial_parsed_url\n            .domain_2nd_level\n            .as_ref()\n            .map(|d2| domain.ends_with(d2.as_str()))\n            .unwrap_or(domain == self.initial_parsed_url.host.as_deref().unwrap_or(\"\"));\n\n        let (http_request_host, http_request_path) =\n            Self::apply_http_request_transformations(domain, \"/robots.txt\", &self.options.transform_url);\n\n        let forced_ip = self\n            .resolve_cache\n            .get(&format!(\"{}:{}\", http_request_host, port))\n            .map(|v| v.value().clone());\n\n        let response = self\n            .http_client\n            .request(\n                &http_request_host,\n                port,\n                scheme,\n                &http_request_path,\n                \"GET\",\n                3,\n                &Self::get_crawler_user_agent_signature(),\n                ACCEPT_HEADER,\n                \"gzip, deflate, br\",\n                None,\n                use_http_auth,\n                forced_ip.as_deref(),\n            )\n            .await;\n\n        let count = self.loaded_robots_txt_count.fetch_add(1, Ordering::SeqCst) + 1;\n\n        if let Ok(resp) = response {\n            if count <= 10\n                && let Ok(st) = self.status.lock()\n            {\n                st.add_notice_to_summary(\n                    &format!(\"robots-txt-{}\", domain),\n                    &format!(\n                        \"Loaded robots.txt for domain '{}': status code {}, size {} and took {}.\",\n                        domain,\n                        resp.status_code,\n                        resp.get_formatted_body_length(),\n                        resp.get_formatted_exec_time(),\n                    ),\n                );\n            }\n\n            if resp.status_code == 200\n                && let Some(ref body_bytes) = resp.body\n            {\n                let body_str = String::from_utf8_lossy(body_bytes);\n                let robots_txt = RobotsTxt::parse(&body_str);\n\n                if let Ok(st) = self.status.lock() {\n                    st.set_robots_txt_content(scheme, domain, port, &body_str);\n                }\n\n                self.robots_txt_cache.insert(cache_key, Some(robots_txt));\n                return;\n            }\n        }\n\n        // No valid robots.txt found\n        self.robots_txt_cache.insert(cache_key, None);\n    }\n\n    /// Get content type ID from Content-Type header\n    fn get_content_type_id_by_header(content_type_header: &str) -> ContentTypeId {\n        let header_lower = content_type_header.to_lowercase();\n\n        if header_lower.contains(\"text/html\") {\n            ContentTypeId::Html\n        } else if header_lower.contains(\"text/javascript\")\n            || header_lower.contains(\"application/javascript\")\n            || header_lower.contains(\"application/x-javascript\")\n        {\n            ContentTypeId::Script\n        } else if header_lower.contains(\"text/css\") {\n            ContentTypeId::Stylesheet\n        } else if header_lower.contains(\"image/\") {\n            ContentTypeId::Image\n        } else if header_lower.contains(\"audio/\") {\n            ContentTypeId::Audio\n        } else if header_lower.contains(\"video/\") {\n            ContentTypeId::Video\n        } else if header_lower.contains(\"font/\") {\n            ContentTypeId::Font\n        } else if header_lower.contains(\"application/json\") {\n            ContentTypeId::Json\n        } else if header_lower.contains(\"application/xml\")\n            || header_lower.contains(\"text/xml\")\n            || header_lower.contains(\"+xml\")\n        {\n            ContentTypeId::Xml\n        } else if header_lower.contains(\"application/pdf\")\n            || header_lower.contains(\"application/msword\")\n            || header_lower.contains(\"application/vnd.ms-excel\")\n            || header_lower.contains(\"application/vnd.ms-powerpoint\")\n            || header_lower.contains(\"text/plain\")\n            || header_lower.contains(\"document\")\n        {\n            ContentTypeId::Document\n        } else {\n            ContentTypeId::Other\n        }\n    }\n\n    /// Build final user agent string\n    fn build_final_user_agent(options: &CoreOptions) -> String {\n        let base = if let Some(ref ua) = options.user_agent {\n            ua.clone()\n        } else {\n            match options.device {\n                DeviceType::Desktop => format!(\n                    \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{}.0.0.0 Safari/537.36\",\n                    chrono::Utc::now().format(\"%y\")\n                ),\n                DeviceType::Mobile => \"Mozilla/5.0 (iPhone; CPU iPhone OS 15_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15A5370a Safari/604.1\".to_string(),\n                DeviceType::Tablet => \"Mozilla/5.0 (Linux; Android 11; SAMSUNG SM-T875) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/14.0 Chrome/87.0.4280.141 Safari/537.36\".to_string(),\n            }\n        };\n\n        // Add signature unless user agent ends with '!'\n        if base.ends_with('!') {\n            base.trim_end_matches('!').trim_end().to_string()\n        } else {\n            format!(\"{} {}\", base, Self::get_crawler_user_agent_signature())\n        }\n    }\n\n    /// Get crawler user agent signature\n    pub fn get_crawler_user_agent_signature() -> String {\n        format!(\"siteone-crawler/{}\", version::CODE)\n    }\n\n    /// Compute MD5-based key for URL deduplication\n    fn compute_url_key(url: &ParsedUrl) -> String {\n        let relevant_parts = url.get_full_url(true, false);\n        let mut hasher = Md5::new();\n        hasher.update(relevant_parts.as_bytes());\n        format!(\"{:x}\", hasher.finalize())\n    }\n\n    /// Check if URL points to a sitemap.xml or sitemap.xml.gz file\n    fn is_sitemap_url(url: &ParsedUrl) -> bool {\n        let path_lower = url.path.to_lowercase();\n        path_lower.contains(\"sitemap\") && (path_lower.ends_with(\".xml\") || path_lower.ends_with(\".xml.gz\"))\n    }\n\n    /// Compute short unique ID for a URL (first 8 chars of MD5)\n    fn compute_url_uq_id(url: &ParsedUrl) -> String {\n        let full_url = url.get_full_url(true, false);\n        let mut hasher = Md5::new();\n        hasher.update(full_url.as_bytes());\n        let hash = format!(\"{:x}\", hasher.finalize());\n        hash[..8].to_string()\n    }\n\n    /// Decode HTML entities\n    fn decode_html_entities(text: &str) -> String {\n        text.replace(\"&amp;\", \"&\")\n            .replace(\"&lt;\", \"<\")\n            .replace(\"&gt;\", \">\")\n            .replace(\"&quot;\", \"\\\"\")\n            .replace(\"&#39;\", \"'\")\n            .replace(\"&ndash;\", \"\\u{2013}\")\n            .replace(\"&mdash;\", \"\\u{2014}\")\n    }\n\n    /// Get current timestamp in seconds\n    fn current_timestamp() -> f64 {\n        std::time::SystemTime::now()\n            .duration_since(std::time::UNIX_EPOCH)\n            .unwrap_or_default()\n            .as_secs_f64()\n    }\n\n    /// Add random query parameters to a URL path\n    fn add_random_query_params(path: &str) -> String {\n        let random_val = rand_simple();\n        if path.contains('?') {\n            format!(\"{}&_soc={}\", path, random_val)\n        } else {\n            format!(\"{}?_soc={}\", path, random_val)\n        }\n    }\n\n    /// Apply URL transformations for HTTP request (--transform-url)\n    fn apply_http_request_transformations(host: &str, path: &str, transform_url: &[String]) -> (String, String) {\n        if transform_url.is_empty() {\n            return (host.to_string(), path.to_string());\n        }\n\n        let mut full_url = format!(\"{}{}\", host, path);\n        let original_url = full_url.clone();\n\n        for transform in transform_url {\n            let parts: Vec<&str> = transform.splitn(2, \"->\").collect();\n            if parts.len() != 2 {\n                continue;\n            }\n\n            let from = parts[0].trim();\n            let to = parts[1].trim();\n\n            // Check if it's a regex pattern\n            let is_regex = utils::is_regex_pattern(from);\n\n            if is_regex {\n                if let Ok(re) = Regex::new(from) {\n                    full_url = re.replace_all(&full_url, to).to_string();\n                }\n            } else {\n                full_url = full_url.replace(from, to);\n            }\n        }\n\n        if full_url != original_url {\n            // Parse transformed URL back to host and path\n            if let Ok(parsed) = url::Url::parse(&format!(\"http://{}\", full_url)) {\n                let new_host = parsed.host_str().unwrap_or(host).to_string();\n                let new_path = if let Some(query) = parsed.query() {\n                    format!(\"{}?{}\", parsed.path(), query)\n                } else {\n                    parsed.path().to_string()\n                };\n                return (new_host, new_path);\n            }\n        }\n\n        (host.to_string(), path.to_string())\n    }\n\n    /// Remove AVIF and WebP support from Accept header (for offline export)\n    pub fn remove_avif_and_webp_support_from_accept_header(&mut self) {\n        self.accept_header = self.accept_header.replace(\"image/avif,\", \"\").replace(\"image/webp,\", \"\");\n    }\n\n    /// Terminate the crawler\n    pub fn terminate(&self) {\n        self.terminated.store(true, Ordering::SeqCst);\n    }\n\n    /// Get forced IP for domain and port from --resolve options\n    pub fn get_forced_ip_for_domain_and_port(&self, domain: &str, port: u16) -> Option<String> {\n        self.resolve_cache\n            .get(&format!(\"{}:{}\", domain, port))\n            .map(|v| v.value().clone())\n    }\n\n    /// Get cache type flags from response headers\n    fn get_cache_type_flags(headers: &HashMap<String, String>) -> u32 {\n        use crate::result::visited_url::*;\n\n        let mut flags: u32 = 0;\n\n        if let Some(cache_control) = headers.get(\"cache-control\") {\n            flags |= CACHE_TYPE_HAS_CACHE_CONTROL;\n            let cc_lower = cache_control.to_lowercase();\n            if cc_lower.contains(\"max-age\") {\n                flags |= CACHE_TYPE_HAS_MAX_AGE;\n            }\n            if cc_lower.contains(\"s-maxage\") || cc_lower.contains(\"s-max-age\") {\n                flags |= CACHE_TYPE_HAS_S_MAX_AGE;\n            }\n            if cc_lower.contains(\"stale-while-revalidate\") {\n                flags |= CACHE_TYPE_HAS_STALE_WHILE_REVALIDATE;\n            }\n            if cc_lower.contains(\"stale-if-error\") {\n                flags |= CACHE_TYPE_HAS_STALE_IF_ERROR;\n            }\n            if cc_lower.contains(\"public\") {\n                flags |= CACHE_TYPE_HAS_PUBLIC;\n            }\n            if cc_lower.contains(\"private\") {\n                flags |= CACHE_TYPE_HAS_PRIVATE;\n            }\n            if cc_lower.contains(\"no-cache\") {\n                flags |= CACHE_TYPE_HAS_NO_CACHE;\n            }\n            if cc_lower.contains(\"no-store\") {\n                flags |= CACHE_TYPE_HAS_NO_STORE;\n            }\n            if cc_lower.contains(\"must-revalidate\") {\n                flags |= CACHE_TYPE_HAS_MUST_REVALIDATE;\n            }\n            if cc_lower.contains(\"proxy-revalidate\") {\n                flags |= CACHE_TYPE_HAS_PROXY_REVALIDATE;\n            }\n            if cc_lower.contains(\"immutable\") {\n                flags |= CACHE_TYPE_HAS_IMMUTABLE;\n            }\n        }\n\n        if headers.contains_key(\"expires\") {\n            flags |= CACHE_TYPE_HAS_EXPIRES;\n        }\n        if headers.contains_key(\"etag\") {\n            flags |= CACHE_TYPE_HAS_ETAG;\n        }\n        if headers.contains_key(\"last-modified\") {\n            flags |= CACHE_TYPE_HAS_LAST_MODIFIED;\n        }\n\n        if flags == 0 {\n            flags = CACHE_TYPE_NO_CACHE_HEADERS;\n        }\n\n        flags\n    }\n\n    /// Get cache lifetime from response headers (in seconds)\n    fn get_cache_lifetime(headers: &HashMap<String, String>) -> Option<i32> {\n        if let Some(cache_control) = headers.get(\"cache-control\") {\n            let cc_lower = cache_control.to_lowercase();\n            // Try max-age first\n            if let Some(pos) = cc_lower.find(\"max-age=\") {\n                let after = &cc_lower[pos + 8..];\n                let num_str: String = after.chars().take_while(|c| c.is_ascii_digit()).collect();\n                if let Ok(seconds) = num_str.parse::<i32>() {\n                    return Some(seconds);\n                }\n            }\n        }\n        None\n    }\n\n    // --- Public accessors ---\n\n    pub fn get_content_processor_manager(&self) -> &Arc<Mutex<ContentProcessorManager>> {\n        &self.content_processor_manager\n    }\n\n    pub fn get_initial_parsed_url(&self) -> &ParsedUrl {\n        &self.initial_parsed_url\n    }\n\n    pub fn get_options(&self) -> &Arc<CoreOptions> {\n        &self.options\n    }\n\n    pub fn get_output(&self) -> &Arc<Mutex<Box<dyn Output>>> {\n        &self.output\n    }\n\n    pub fn get_status(&self) -> &Arc<Mutex<Status>> {\n        &self.status\n    }\n\n    pub fn get_visited(&self) -> &Arc<DashMap<String, VisitedEntry>> {\n        &self.visited\n    }\n\n    pub fn get_queue(&self) -> &Arc<DashMap<String, QueueEntry>> {\n        &self.queue\n    }\n\n    pub fn get_skipped(&self) -> &Arc<DashMap<String, SkippedEntry>> {\n        &self.skipped\n    }\n\n    pub fn get_analysis_manager(&self) -> &Arc<Mutex<AnalysisManager>> {\n        &self.analysis_manager\n    }\n\n    pub fn get_done_urls_count(&self) -> usize {\n        self.done_urls_count.load(Ordering::SeqCst)\n    }\n}\n\n/// Simple pseudo-random number for query params\nfn rand_simple() -> u64 {\n    let now = std::time::SystemTime::now()\n        .duration_since(std::time::UNIX_EPOCH)\n        .unwrap_or_default();\n    now.as_nanos() as u64 % 1_000_000\n}\n\n/// Pre-compile domain wildcard patterns into regex (e.g. \"*.example.com\" → \"^.*\\.example\\.com$\")\nfn compile_domain_patterns(domains: &[String]) -> Vec<Regex> {\n    domains\n        .iter()\n        .filter_map(|d| {\n            let pattern = format!(\"^{}$\", regex::escape(d).replace(r\"\\*\", \".*\"));\n            Regex::new(&pattern).ok()\n        })\n        .collect()\n}\n\n/// Filter query parameters in a URL, keeping only those whose names are in the allowlist.\nfn filter_query_params(url: &str, keep_params: &[String]) -> String {\n    if let Some(q_pos) = url.find('?') {\n        let base = &url[..q_pos];\n        let query_str = &url[q_pos + 1..];\n        let filtered: Vec<&str> = query_str\n            .split('&')\n            .filter(|pair| {\n                let name = pair.split('=').next().unwrap_or(\"\");\n                !name.is_empty() && keep_params.iter().any(|k| k == name)\n            })\n            .collect();\n        if filtered.is_empty() {\n            base.to_string()\n        } else {\n            format!(\"{}?{}\", base, filtered.join(\"&\"))\n        }\n    } else {\n        url.to_string()\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    // =========================================================================\n    // <base href> regex tests (#68)\n    // =========================================================================\n\n    #[test]\n    fn base_href_double_quotes() {\n        let html = r#\"<html><head><base href=\"https://example.com/subdir/\"></head></html>\"#;\n        let caps = RE_BASE_HREF.captures(html).unwrap();\n        assert_eq!(caps.get(1).unwrap().as_str(), \"https://example.com/subdir/\");\n    }\n\n    #[test]\n    fn base_href_single_quotes() {\n        let html = r#\"<html><head><base href='https://example.com/'></head></html>\"#;\n        let caps = RE_BASE_HREF.captures(html).unwrap();\n        assert_eq!(caps.get(1).unwrap().as_str(), \"https://example.com/\");\n    }\n\n    #[test]\n    fn base_href_no_quotes() {\n        let html = r#\"<base href=https://example.com/dir/>\"#;\n        let caps = RE_BASE_HREF.captures(html).unwrap();\n        assert_eq!(caps.get(1).unwrap().as_str(), \"https://example.com/dir/\");\n    }\n\n    #[test]\n    fn base_href_relative_path() {\n        let html = r#\"<base href=\"/subdir/\">\"#;\n        let caps = RE_BASE_HREF.captures(html).unwrap();\n        assert_eq!(caps.get(1).unwrap().as_str(), \"/subdir/\");\n    }\n\n    #[test]\n    fn base_href_case_insensitive() {\n        let html = r#\"<BASE HREF=\"https://example.com/\">\"#;\n        let caps = RE_BASE_HREF.captures(html).unwrap();\n        assert_eq!(caps.get(1).unwrap().as_str(), \"https://example.com/\");\n    }\n\n    #[test]\n    fn base_href_absent() {\n        let html = r#\"<html><head><title>No base</title></head></html>\"#;\n        assert!(RE_BASE_HREF.captures(html).is_none());\n    }\n\n    #[test]\n    fn base_href_with_other_attrs() {\n        let html = r#\"<base target=\"_blank\" href=\"https://example.com/app/\">\"#;\n        let caps = RE_BASE_HREF.captures(html).unwrap();\n        assert_eq!(caps.get(1).unwrap().as_str(), \"https://example.com/app/\");\n    }\n\n    // =========================================================================\n    // is_sitemap_url tests (#69)\n    // =========================================================================\n\n    #[test]\n    fn sitemap_url_standard() {\n        let url = ParsedUrl::parse(\"https://example.com/sitemap.xml\", None);\n        assert!(Crawler::is_sitemap_url(&url));\n    }\n\n    #[test]\n    fn sitemap_url_with_index() {\n        let url = ParsedUrl::parse(\"https://example.com/sitemap-index.xml\", None);\n        assert!(Crawler::is_sitemap_url(&url));\n    }\n\n    #[test]\n    fn sitemap_url_nested() {\n        let url = ParsedUrl::parse(\"https://example.com/sitemaps/sitemap-pages.xml\", None);\n        assert!(Crawler::is_sitemap_url(&url));\n    }\n\n    #[test]\n    fn sitemap_url_case_insensitive() {\n        let url = ParsedUrl::parse(\"https://example.com/Sitemap.XML\", None);\n        assert!(Crawler::is_sitemap_url(&url));\n    }\n\n    #[test]\n    fn not_sitemap_regular_page() {\n        let url = ParsedUrl::parse(\"https://example.com/about\", None);\n        assert!(!Crawler::is_sitemap_url(&url));\n    }\n\n    #[test]\n    fn not_sitemap_xml_without_sitemap() {\n        let url = ParsedUrl::parse(\"https://example.com/feed.xml\", None);\n        assert!(!Crawler::is_sitemap_url(&url));\n    }\n\n    #[test]\n    fn not_sitemap_html_page() {\n        let url = ParsedUrl::parse(\"https://example.com/sitemap.html\", None);\n        assert!(!Crawler::is_sitemap_url(&url));\n    }\n\n    #[test]\n    fn sitemap_url_gzip() {\n        let url = ParsedUrl::parse(\"https://example.com/sitemap.xml.gz\", None);\n        assert!(Crawler::is_sitemap_url(&url));\n    }\n\n    #[test]\n    fn sitemap_url_gzip_nested() {\n        let url = ParsedUrl::parse(\"https://example.com/sitemaps/sitemap-posts.xml.gz\", None);\n        assert!(Crawler::is_sitemap_url(&url));\n    }\n\n    #[test]\n    fn not_sitemap_tar_gz() {\n        let url = ParsedUrl::parse(\"https://example.com/archive.tar.gz\", None);\n        assert!(!Crawler::is_sitemap_url(&url));\n    }\n\n    // =========================================================================\n    // normalize_url_to_initial tests (#35)\n    // =========================================================================\n\n    #[test]\n    fn normalize_www_to_no_www() {\n        let initial = ParsedUrl::parse(\"https://example.com/\", None);\n        let mut url = ParsedUrl::parse(\"https://www.example.com/page\", None);\n        Crawler::normalize_url_to_initial(&mut url, &initial);\n        assert_eq!(url.host.as_deref(), Some(\"example.com\"));\n        assert_eq!(url.scheme, Some(\"https\".to_string()));\n    }\n\n    #[test]\n    fn normalize_no_www_to_www() {\n        let initial = ParsedUrl::parse(\"https://www.example.com/\", None);\n        let mut url = ParsedUrl::parse(\"https://example.com/page\", None);\n        Crawler::normalize_url_to_initial(&mut url, &initial);\n        assert_eq!(url.host.as_deref(), Some(\"www.example.com\"));\n    }\n\n    #[test]\n    fn normalize_http_to_https() {\n        let initial = ParsedUrl::parse(\"https://example.com/\", None);\n        let mut url = ParsedUrl::parse(\"http://example.com/page\", None);\n        Crawler::normalize_url_to_initial(&mut url, &initial);\n        assert_eq!(url.scheme, Some(\"https\".to_string()));\n    }\n\n    #[test]\n    fn normalize_both_www_and_scheme() {\n        let initial = ParsedUrl::parse(\"https://example.com/\", None);\n        let mut url = ParsedUrl::parse(\"http://www.example.com/page\", None);\n        Crawler::normalize_url_to_initial(&mut url, &initial);\n        assert_eq!(url.host.as_deref(), Some(\"example.com\"));\n        assert_eq!(url.scheme, Some(\"https\".to_string()));\n    }\n\n    #[test]\n    fn normalize_leaves_different_domain_unchanged() {\n        let initial = ParsedUrl::parse(\"https://example.com/\", None);\n        let mut url = ParsedUrl::parse(\"https://other.com/page\", None);\n        Crawler::normalize_url_to_initial(&mut url, &initial);\n        assert_eq!(url.host.as_deref(), Some(\"other.com\"));\n    }\n\n    #[test]\n    fn normalize_same_url_no_change() {\n        let initial = ParsedUrl::parse(\"https://example.com/\", None);\n        let mut url = ParsedUrl::parse(\"https://example.com/page\", None);\n        Crawler::normalize_url_to_initial(&mut url, &initial);\n        assert_eq!(url.host.as_deref(), Some(\"example.com\"));\n        assert_eq!(url.scheme, Some(\"https\".to_string()));\n    }\n\n    #[test]\n    fn normalize_preserves_path() {\n        let initial = ParsedUrl::parse(\"https://example.com/\", None);\n        let mut url = ParsedUrl::parse(\"http://www.example.com/some/deep/path?q=1\", None);\n        Crawler::normalize_url_to_initial(&mut url, &initial);\n        assert_eq!(url.path, \"/some/deep/path\");\n        assert_eq!(url.query.as_deref(), Some(\"q=1\"));\n    }\n\n    #[test]\n    fn filter_query_params_keeps_specified() {\n        let keep = vec![\"foo\".to_string(), \"baz\".to_string()];\n        let result = filter_query_params(\"https://example.com/page?foo=1&bar=2&baz=3\", &keep);\n        assert_eq!(result, \"https://example.com/page?foo=1&baz=3\");\n    }\n\n    #[test]\n    fn filter_query_params_removes_all_when_none_match() {\n        let keep = vec![\"xyz\".to_string()];\n        let result = filter_query_params(\"https://example.com/page?foo=1&bar=2\", &keep);\n        assert_eq!(result, \"https://example.com/page\");\n    }\n\n    #[test]\n    fn filter_query_params_no_query_string() {\n        let keep = vec![\"foo\".to_string()];\n        let result = filter_query_params(\"https://example.com/page\", &keep);\n        assert_eq!(result, \"https://example.com/page\");\n    }\n\n    #[test]\n    fn filter_query_params_keeps_param_without_value() {\n        let keep = vec![\"debug\".to_string()];\n        let result = filter_query_params(\"https://example.com/page?debug&foo=bar\", &keep);\n        assert_eq!(result, \"https://example.com/page?debug\");\n    }\n\n    #[test]\n    fn filter_query_params_preserves_order() {\n        let keep = vec![\"c\".to_string(), \"a\".to_string()];\n        let result = filter_query_params(\"https://example.com/?a=1&b=2&c=3\", &keep);\n        assert_eq!(result, \"https://example.com/?a=1&c=3\");\n    }\n\n    #[test]\n    fn filter_query_params_single_kept_param() {\n        let keep = vec![\"id\".to_string()];\n        let result = filter_query_params(\"https://example.com/page?id=42&session=abc&tracking=xyz\", &keep);\n        assert_eq!(result, \"https://example.com/page?id=42\");\n    }\n}\n"
  },
  {
    "path": "src/engine/found_url.rs",
    "content": "// SiteOne Crawler - FoundUrl\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse once_cell::sync::Lazy;\nuse regex::Regex;\n\nuse super::parsed_url::ParsedUrl;\n\n/// Source of discovered URL - where in HTML/CSS/JS was found\n#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]\n#[repr(u8)]\npub enum UrlSource {\n    InitUrl = 5,\n    AHref = 10,\n    ImgSrc = 20,\n    ImgSrcset = 21,\n    InputSrc = 22,\n    SourceSrc = 23,\n    VideoSrc = 24,\n    AudioSrc = 25,\n    ScriptSrc = 30,\n    InlineScriptSrc = 40,\n    LinkHref = 50,\n    CssUrl = 60,\n    JsUrl = 70,\n    Redirect = 80,\n    Sitemap = 90,\n}\n\nimpl UrlSource {\n    /// Get short human-readable name for this source type\n    pub fn short_name(&self) -> &'static str {\n        match self {\n            UrlSource::InitUrl => \"Initial URL\",\n            UrlSource::AHref => \"<a href>\",\n            UrlSource::ImgSrc => \"<img src>\",\n            UrlSource::ImgSrcset => \"<img srcset>\",\n            UrlSource::InputSrc => \"<input src>\",\n            UrlSource::SourceSrc => \"<source src>\",\n            UrlSource::VideoSrc => \"<video src>\",\n            UrlSource::AudioSrc => \"<audio src>\",\n            UrlSource::ScriptSrc => \"<script src>\",\n            UrlSource::InlineScriptSrc => \"inline <script src>\",\n            UrlSource::LinkHref => \"<link href>\",\n            UrlSource::CssUrl => \"css url()\",\n            UrlSource::JsUrl => \"js url\",\n            UrlSource::Redirect => \"redirect\",\n            UrlSource::Sitemap => \"sitemap\",\n        }\n    }\n\n    /// Convert from integer source code.\n    pub fn from_code(code: u8) -> Option<Self> {\n        match code {\n            5 => Some(UrlSource::InitUrl),\n            10 => Some(UrlSource::AHref),\n            20 => Some(UrlSource::ImgSrc),\n            21 => Some(UrlSource::ImgSrcset),\n            22 => Some(UrlSource::InputSrc),\n            23 => Some(UrlSource::SourceSrc),\n            24 => Some(UrlSource::VideoSrc),\n            25 => Some(UrlSource::AudioSrc),\n            30 => Some(UrlSource::ScriptSrc),\n            40 => Some(UrlSource::InlineScriptSrc),\n            50 => Some(UrlSource::LinkHref),\n            60 => Some(UrlSource::CssUrl),\n            70 => Some(UrlSource::JsUrl),\n            80 => Some(UrlSource::Redirect),\n            90 => Some(UrlSource::Sitemap),\n            _ => None,\n        }\n    }\n}\n\nimpl std::fmt::Display for UrlSource {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}\", self.short_name())\n    }\n}\n\n/// Regex to match absolute HTTP URLs\nstatic HTTP_URL_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)^https?://\").unwrap());\n\n/// A URL found during crawling, with information about where it was found\n#[derive(Debug, Clone)]\npub struct FoundUrl {\n    /// The normalized found URL\n    pub url: String,\n    /// URL of the page where this URL was found\n    pub source_url: String,\n    /// Source type (where in HTML/CSS the URL was found)\n    pub source: UrlSource,\n}\n\nimpl FoundUrl {\n    pub fn new(url: &str, source_url: &str, source: UrlSource) -> Self {\n        let normalized = normalize_url(url, source_url);\n        Self {\n            url: normalized,\n            source_url: source_url.to_string(),\n            source,\n        }\n    }\n\n    /// Is this URL an included asset (img src, script src, link href) and not linked by href?\n    pub fn is_included_asset(&self) -> bool {\n        self.source != UrlSource::AHref\n    }\n}\n\nimpl std::fmt::Display for FoundUrl {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}\", self.url)\n    }\n}\n\n/// Normalize URL and remove strange characters/behavior.\n/// Remove unwanted http(s)://SAME_DOMAIN:SAME_OPTIONAL_PORT prefix when it matches the source URL.\nfn normalize_url(url: &str, source_url: &str) -> String {\n    // Replace HTML entities and escape sequences\n    let mut normalized = url\n        .replace(\"&#38;\", \"&\")\n        .replace(\"&amp;\", \"&\")\n        .replace(\"\\\\ \", \"%20\")\n        .replace(' ', \"%20\");\n\n    // Trim leading quotes/tabs/spaces\n    normalized = normalized.trim_start_matches(['\"', '\\'', '\\t', ' ']).to_string();\n    // Trim trailing &, quotes, tabs, spaces\n    normalized = normalized.trim_end_matches(['&', '\"', '\\'', '\\t', ' ']).to_string();\n\n    // Remove unwanted http(s)://SAME_DOMAIN:SAME_OPTIONAL_PORT\n    if HTTP_URL_RE.is_match(&normalized) {\n        let parsed_url = ParsedUrl::parse(&normalized, Some(&ParsedUrl::parse(source_url, None)));\n        let parsed_source = ParsedUrl::parse(source_url, None);\n\n        if parsed_url.host == parsed_source.host\n            && parsed_source.port == parsed_url.port\n            && parsed_source.port.is_some()\n            && let (Some(scheme), Some(host)) = (&parsed_url.scheme, &parsed_url.host)\n        {\n            // Build regex pattern to strip scheme://host[:port]\n            let port_pattern = match parsed_url.port {\n                Some(p) => format!(\"(:{p})?\"),\n                None => String::new(),\n            };\n            let pattern = format!(\n                r\"(?i){}://{}{}\",\n                regex::escape(scheme),\n                regex::escape(host),\n                port_pattern\n            );\n            if let Ok(re) = Regex::new(&pattern) {\n                normalized = re.replace(&normalized, \"\").to_string();\n            }\n        }\n    }\n\n    normalized\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_normalize_url_entities() {\n        let found = FoundUrl::new(\"/page?a=1&amp;b=2\", \"https://example.com/\", UrlSource::AHref);\n        assert_eq!(found.url, \"/page?a=1&b=2\");\n    }\n\n    #[test]\n    fn test_normalize_url_spaces() {\n        let found = FoundUrl::new(\"/path with spaces\", \"https://example.com/\", UrlSource::AHref);\n        assert_eq!(found.url, \"/path%20with%20spaces\");\n    }\n\n    #[test]\n    fn test_is_included_asset() {\n        let link = FoundUrl::new(\"/page\", \"https://example.com/\", UrlSource::AHref);\n        assert!(!link.is_included_asset());\n\n        let img = FoundUrl::new(\"/img.png\", \"https://example.com/\", UrlSource::ImgSrc);\n        assert!(img.is_included_asset());\n    }\n\n    #[test]\n    fn test_source_short_name() {\n        assert_eq!(UrlSource::AHref.short_name(), \"<a href>\");\n        assert_eq!(UrlSource::Redirect.short_name(), \"redirect\");\n    }\n}\n"
  },
  {
    "path": "src/engine/found_urls.rs",
    "content": "// SiteOne Crawler - FoundUrls collection\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse md5::{Digest, Md5};\nuse once_cell::sync::Lazy;\nuse regex::Regex;\n\nuse super::found_url::{FoundUrl, UrlSource};\n\n/// Regex for detecting non-http scheme URLs (mailto:, javascript:, data:, tel:, etc.)\nstatic NON_HTTP_SCHEME_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)^[a-z]+:[a-z0-9]\").unwrap());\n\n/// Collection of found URLs, deduplicated by MD5 hash of URL\n#[derive(Debug, Clone)]\npub struct FoundUrls {\n    found_urls: HashMap<String, FoundUrl>,\n}\n\nimpl FoundUrls {\n    pub fn new() -> Self {\n        Self {\n            found_urls: HashMap::new(),\n        }\n    }\n\n    /// Add a found URL, deduplicated by MD5 hash\n    pub fn add_url(&mut self, found_url: FoundUrl) {\n        let key = md5_hex(&found_url.url);\n        self.found_urls.entry(key).or_insert(found_url);\n    }\n\n    /// Add URLs from a text array, filtering out invalid ones\n    pub fn add_urls_from_text_array(&mut self, urls: &[&str], source_url: &str, source: UrlSource) {\n        for url in urls {\n            if is_url_valid_for_crawling(url) {\n                self.add_url(FoundUrl::new(url, source_url, source));\n            }\n        }\n    }\n\n    /// Get all found URLs\n    pub fn get_urls(&self) -> &HashMap<String, FoundUrl> {\n        &self.found_urls\n    }\n\n    /// Get count of found URLs\n    pub fn get_count(&self) -> usize {\n        self.found_urls.len()\n    }\n}\n\nimpl Default for FoundUrls {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\n/// Compute MD5 hex hash of a string\nfn md5_hex(input: &str) -> String {\n    let mut hasher = Md5::new();\n    hasher.update(input.as_bytes());\n    format!(\"{:x}\", hasher.finalize())\n}\n\n/// Check if URL is valid for crawling. Ignored are:\n/// - anchor #fragment links\n/// - data:, mailto:, javascript: and other non-http(s) links\n/// - file:// links\nfn is_url_valid_for_crawling(url: &str) -> bool {\n    let url = url.trim();\n    if url.starts_with('#') {\n        return false;\n    }\n    if NON_HTTP_SCHEME_RE.is_match(url) {\n        return false;\n    }\n    if url.to_lowercase().starts_with(\"file://\") {\n        return false;\n    }\n    true\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_dedup_by_md5() {\n        let mut urls = FoundUrls::new();\n        urls.add_url(FoundUrl::new(\"/page\", \"https://example.com/\", UrlSource::AHref));\n        urls.add_url(FoundUrl::new(\"/page\", \"https://example.com/other\", UrlSource::AHref));\n        assert_eq!(urls.get_count(), 1);\n    }\n\n    #[test]\n    fn test_add_urls_from_text_array() {\n        let mut urls = FoundUrls::new();\n        urls.add_urls_from_text_array(\n            &[\"/page1\", \"/page2\", \"#fragment\", \"mailto:test@test.com\", \"/page1\"],\n            \"https://example.com/\",\n            UrlSource::AHref,\n        );\n        assert_eq!(urls.get_count(), 2);\n    }\n\n    #[test]\n    fn test_is_url_valid_for_crawling() {\n        assert!(is_url_valid_for_crawling(\"/page\"));\n        assert!(is_url_valid_for_crawling(\"https://example.com\"));\n        assert!(!is_url_valid_for_crawling(\"#fragment\"));\n        assert!(!is_url_valid_for_crawling(\"mailto:test@test.com\"));\n        assert!(!is_url_valid_for_crawling(\"javascript:void(0)\"));\n        assert!(!is_url_valid_for_crawling(\"data:text/html,test\"));\n        assert!(!is_url_valid_for_crawling(\"file:///etc/passwd\"));\n    }\n}\n"
  },
  {
    "path": "src/engine/http_client.rs",
    "content": "// SiteOne Crawler - HttpClient\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\nuse std::path::Path;\nuse std::time::Instant;\n\nuse flate2::Compression;\nuse flate2::read::GzDecoder;\nuse flate2::write::GzEncoder;\nuse md5::{Digest, Md5};\nuse reqwest::header::{HeaderMap, HeaderName, HeaderValue};\n\nuse base64::Engine as _;\n\nuse super::http_response::HttpResponse;\nuse crate::error::{CrawlerError, CrawlerResult};\nuse crate::version;\n\n/// Async HTTP client for crawling with caching, proxy, and auth support\npub struct HttpClient {\n    /// Reusable reqwest client (Arc-backed, clone is cheap)\n    client: reqwest::Client,\n    /// Basic HTTP auth in format \"username:password\"\n    http_auth: Option<String>,\n    /// Cache directory. If None, caching is disabled\n    cache_dir: Option<String>,\n    /// Whether to compress cached data with gzip\n    compression: bool,\n    /// Cache TTL in seconds. None = infinite (never expires)\n    cache_ttl: Option<u64>,\n}\n\nimpl HttpClient {\n    pub fn new(\n        proxy: Option<String>,\n        http_auth: Option<String>,\n        cache_dir: Option<String>,\n        compression: bool,\n        cache_ttl: Option<u64>,\n        accept_invalid_certs: bool,\n    ) -> Self {\n        let client = Self::build_shared_client(&proxy, accept_invalid_certs);\n        Self {\n            client,\n            http_auth,\n            cache_dir,\n            compression,\n            cache_ttl,\n        }\n    }\n\n    /// Build the shared reqwest::Client with proxy support.\n    /// Timeout is set per-request, not on the shared client.\n    fn build_shared_client(proxy: &Option<String>, accept_invalid_certs: bool) -> reqwest::Client {\n        let mut builder = reqwest::Client::builder()\n            .danger_accept_invalid_certs(accept_invalid_certs)\n            .redirect(reqwest::redirect::Policy::none());\n\n        if let Some(proxy_str) = proxy {\n            let parts: Vec<&str> = proxy_str.splitn(2, ':').collect();\n            if parts.len() == 2 {\n                let proxy_url = format!(\"http://{}:{}\", parts[0], parts[1]);\n                if let Ok(proxy) = reqwest::Proxy::all(&proxy_url) {\n                    builder = builder.proxy(proxy);\n                }\n            }\n        }\n\n        builder.build().unwrap_or_else(|_| reqwest::Client::new())\n    }\n\n    /// Perform an HTTP request (GET or HEAD)\n    #[allow(clippy::too_many_arguments)]\n    pub async fn request(\n        &self,\n        host: &str,\n        port: u16,\n        scheme: &str,\n        url: &str,\n        http_method: &str,\n        timeout_secs: u64,\n        user_agent: &str,\n        accept: &str,\n        accept_encoding: &str,\n        origin: Option<&str>,\n        use_http_auth_if_configured: bool,\n        forced_ip: Option<&str>,\n    ) -> CrawlerResult<HttpResponse> {\n        let path = url::Url::parse(url).ok().map(|u| u.path().to_string());\n        let extension = path.as_ref().and_then(|p| {\n            std::path::Path::new(p)\n                .extension()\n                .and_then(|e| e.to_str())\n                .map(|e| e.to_string())\n        });\n\n        let args_for_cache: Vec<String> = vec![\n            host.to_string(),\n            port.to_string(),\n            scheme.to_string(),\n            url.to_string(),\n            http_method.to_string(),\n            user_agent.to_string(),\n            accept.to_string(),\n            accept_encoding.to_string(),\n            origin.unwrap_or(\"\").to_string(),\n        ];\n        let cache_key = self.get_cache_key(host, port, &args_for_cache, extension.as_deref());\n\n        // Check cache first (skip URLs with spaces as they are likely problematic)\n        if !url.contains(' ')\n            && let Some(mut cached) = self.get_from_cache(&cache_key)\n        {\n            cached.set_loaded_from_cache(true);\n            return Ok(cached);\n        }\n\n        // Build request headers\n        let mut request_headers = HeaderMap::new();\n        if let Ok(v) = HeaderValue::from_str(&format!(\"siteone-crawler/{}\", version::CODE)) {\n            request_headers.insert(\"x-crawler-info\", v);\n        }\n        if let Ok(v) = HeaderValue::from_str(user_agent) {\n            request_headers.insert(reqwest::header::USER_AGENT, v);\n        }\n        if let Ok(v) = HeaderValue::from_str(accept) {\n            request_headers.insert(reqwest::header::ACCEPT, v);\n        }\n        if let Ok(v) = HeaderValue::from_str(accept_encoding) {\n            request_headers.insert(reqwest::header::ACCEPT_ENCODING, v);\n        }\n        if let Ok(v) = HeaderValue::from_str(\"close\") {\n            request_headers.insert(reqwest::header::CONNECTION, v);\n        }\n\n        if let Some(ip) = forced_ip {\n            let _ = ip; // forced_ip handling: set Host header\n            if let Ok(v) = HeaderValue::from_str(host) {\n                request_headers.insert(reqwest::header::HOST, v);\n            }\n        }\n\n        if let Some(origin_val) = origin\n            && let Ok(v) = HeaderValue::from_str(origin_val)\n            && let Ok(name) = HeaderName::from_bytes(b\"origin\")\n        {\n            request_headers.insert(name, v);\n        }\n\n        // Use shared client with per-request timeout\n        let client = self.client.clone();\n\n        // Fix spaces in URL\n        let request_url = url.replace(\"\\\\ \", \"%20\").replace(' ', \"%20\");\n\n        // Build the actual URL to request\n        let actual_host = forced_ip.unwrap_or(host);\n        let full_url = if request_url.starts_with(\"http://\") || request_url.starts_with(\"https://\") {\n            request_url.clone()\n        } else {\n            let port_str = match (scheme, port) {\n                (\"http\", 80) | (\"https\", 443) => String::new(),\n                _ => format!(\":{}\", port),\n            };\n            format!(\"{}://{}{}{}\", scheme, actual_host, port_str, request_url)\n        };\n\n        let start_time = Instant::now();\n\n        let timeout = std::time::Duration::from_secs(timeout_secs);\n        let request = match http_method.to_uppercase().as_str() {\n            \"HEAD\" => client.head(&full_url).timeout(timeout),\n            _ => client.get(&full_url).timeout(timeout),\n        };\n\n        let request = request.headers(request_headers);\n\n        // Add basic auth if configured and requested\n        let request = if use_http_auth_if_configured {\n            if let Some(ref auth) = self.http_auth {\n                let parts: Vec<&str> = auth.splitn(2, ':').collect();\n                if parts.len() == 2 {\n                    request.basic_auth(parts[0], Some(parts[1]))\n                } else {\n                    request.basic_auth(auth, Option::<&str>::None)\n                }\n            } else {\n                request\n            }\n        } else {\n            request\n        };\n\n        let result = match request.send().await {\n            Ok(resp) => {\n                let status = resp.status().as_u16() as i32;\n                let mut resp_headers = convert_response_headers(resp.headers());\n                // reqwest auto-decompresses and strips Content-Encoding header.\n                // Detect decompression by checking if Transfer-Encoding: chunked and\n                // Vary: Accept-Encoding are present (indicating the response was compressed).\n                let has_transfer_chunked = resp_headers\n                    .get(\"transfer-encoding\")\n                    .map(|vals| vals.iter().any(|v| v.contains(\"chunked\")))\n                    .unwrap_or(false);\n                let has_vary_encoding = resp_headers\n                    .get(\"vary\")\n                    .map(|vals| vals.iter().any(|v| v.contains(\"Accept-Encoding\")))\n                    .unwrap_or(false);\n                if has_transfer_chunked && has_vary_encoding && !resp_headers.contains_key(\"content-encoding\") {\n                    resp_headers.insert(\"content-encoding\".to_string(), vec![\"gzip\".to_string()]);\n                }\n                let body = resp.bytes().await.ok().map(|b| b.to_vec());\n                let elapsed = start_time.elapsed().as_secs_f64();\n\n                HttpResponse::new(url.to_string(), status, body, resp_headers, elapsed)\n            }\n            Err(e) => {\n                let elapsed = start_time.elapsed().as_secs_f64();\n                let status = if e.is_connect() {\n                    -1 // Connection failure\n                } else if e.is_timeout() {\n                    -2 // Timeout\n                } else if e.is_request() {\n                    -4 // Send error\n                } else {\n                    -1 // Generic connection failure\n                };\n                HttpResponse::new(url.to_string(), status, None, HashMap::new(), elapsed)\n            }\n        };\n\n        self.save_to_cache(&cache_key, &result)?;\n        Ok(result)\n    }\n\n    /// Get cached HTTP response\n    fn get_from_cache(&self, cache_key: &str) -> Option<HttpResponse> {\n        let cache_file = self.get_cache_file_path(cache_key)?;\n\n        let cache_path = Path::new(&cache_file);\n        if !cache_path.is_file() {\n            return None;\n        }\n\n        // Check TTL: if cache file is older than TTL, treat as miss\n        if let Some(ttl_secs) = self.cache_ttl\n            && let Ok(metadata) = cache_path.metadata()\n            && let Ok(modified) = metadata.modified()\n            && let Ok(age) = modified.elapsed()\n            && age.as_secs() > ttl_secs\n        {\n            return None;\n        }\n\n        let data = std::fs::read(&cache_file).ok()?;\n        let json_str = if self.compression {\n            let mut decoder = GzDecoder::new(&data[..]);\n            let mut decompressed = String::new();\n            std::io::Read::read_to_string(&mut decoder, &mut decompressed).ok()?;\n            decompressed\n        } else {\n            String::from_utf8(data).ok()?\n        };\n\n        let cached: CachedResponse = serde_json::from_str(&json_str).ok()?;\n\n        // Don't use cached responses with error/server-error status codes\n        if matches!(cached.status_code, 429 | 500 | 502 | 503 | -1 | -2 | -3 | -4) {\n            return None;\n        }\n\n        let mut headers = HashMap::new();\n        for (k, v) in &cached.headers {\n            headers.insert(k.clone(), vec![v.clone()]);\n        }\n\n        // Decode body: try base64 first (new format), fall back to raw UTF-8 (old cache format)\n        let body_bytes = cached.body.as_ref().map(|b| {\n            // Try base64 decode first, fall back to raw UTF-8 bytes (old cache format)\n            base64::engine::general_purpose::STANDARD\n                .decode(b)\n                .unwrap_or_else(|_| b.as_bytes().to_vec())\n        });\n\n        Some(HttpResponse::new(\n            cached.url,\n            cached.status_code,\n            body_bytes,\n            headers,\n            cached.exec_time,\n        ))\n    }\n\n    /// Save HTTP response to disk cache\n    fn save_to_cache(&self, cache_key: &str, result: &HttpResponse) -> CrawlerResult<()> {\n        let cache_file = match self.get_cache_file_path(cache_key) {\n            Some(f) => f,\n            None => return Ok(()),\n        };\n\n        let cache_dir = Path::new(&cache_file)\n            .parent()\n            .map(|p| p.to_string_lossy().to_string())\n            .unwrap_or_default();\n\n        if !Path::new(&cache_dir).is_dir() {\n            std::fs::create_dir_all(&cache_dir).map_err(|e| {\n                CrawlerError::Io(std::io::Error::new(\n                    e.kind(),\n                    format!(\"Cannot create cache dir {}: {}\", cache_dir, e),\n                ))\n            })?;\n        }\n\n        let cached = CachedResponse {\n            url: result.url.clone(),\n            status_code: result.status_code,\n            body: result\n                .body\n                .as_ref()\n                .map(|b| base64::engine::general_purpose::STANDARD.encode(b)),\n            headers: result.headers.clone(),\n            exec_time: result.exec_time,\n        };\n\n        let json = serde_json::to_string(&cached)\n            .map_err(|e| CrawlerError::Other(format!(\"Cache serialization error: {}\", e)))?;\n\n        let data = if self.compression {\n            let mut encoder = GzEncoder::new(Vec::new(), Compression::default());\n            std::io::Write::write_all(&mut encoder, json.as_bytes()).map_err(CrawlerError::Io)?;\n            encoder.finish().map_err(CrawlerError::Io)?\n        } else {\n            json.into_bytes()\n        };\n\n        std::fs::write(&cache_file, &data).map_err(|e| {\n            CrawlerError::Io(std::io::Error::new(\n                e.kind(),\n                format!(\"Cannot write to cache file {}: {}\", cache_file, e),\n            ))\n        })?;\n\n        Ok(())\n    }\n\n    /// Check if a response for the given request parameters exists in cache.\n    /// Used to skip rate limiting for cached responses.\n    #[allow(clippy::too_many_arguments)]\n    pub fn is_url_cached(\n        &self,\n        host: &str,\n        port: u16,\n        scheme: &str,\n        url: &str,\n        http_method: &str,\n        user_agent: &str,\n        accept: &str,\n        accept_encoding: &str,\n        origin: Option<&str>,\n    ) -> bool {\n        if self.cache_dir.is_none() || url.contains(' ') {\n            return false;\n        }\n        let path = url::Url::parse(url).ok().map(|u| u.path().to_string());\n        let extension = path.as_ref().and_then(|p| {\n            std::path::Path::new(p)\n                .extension()\n                .and_then(|e| e.to_str())\n                .map(|e| e.to_string())\n        });\n        let args_for_cache: Vec<String> = vec![\n            host.to_string(),\n            port.to_string(),\n            scheme.to_string(),\n            url.to_string(),\n            http_method.to_string(),\n            user_agent.to_string(),\n            accept.to_string(),\n            accept_encoding.to_string(),\n            origin.unwrap_or(\"\").to_string(),\n        ];\n        let cache_key = self.get_cache_key(host, port, &args_for_cache, extension.as_deref());\n        match self.get_cache_file_path(&cache_key) {\n            Some(file) => Path::new(&file).is_file(),\n            None => false,\n        }\n    }\n\n    /// Get cache file path for a given cache key\n    fn get_cache_file_path(&self, cache_key: &str) -> Option<String> {\n        let cache_dir = self.cache_dir.as_ref()?;\n        let ext = if self.compression { \".cache.gz\" } else { \".cache\" };\n        Some(format!(\"{}/{}{}\", cache_dir, cache_key, ext))\n    }\n\n    /// Generate a cache key from request parameters\n    fn get_cache_key(&self, host: &str, port: u16, args: &[String], extension: Option<&str>) -> String {\n        let mut hasher = Md5::new();\n        for arg in args {\n            hasher.update(arg.as_bytes());\n        }\n        let md5 = format!(\"{:x}\", hasher.finalize());\n        let ext_suffix = extension.map(|e| format!(\".{}\", e)).unwrap_or_default();\n        format!(\"{}-{}/{}/{}{}\", host, port, &md5[..2], md5, ext_suffix)\n    }\n}\n\n/// Internal struct for cache serialization\n#[derive(serde::Serialize, serde::Deserialize)]\nstruct CachedResponse {\n    url: String,\n    status_code: i32,\n    /// Body stored as base64-encoded bytes to preserve binary data in JSON\n    body: Option<String>,\n    headers: HashMap<String, String>,\n    exec_time: f64,\n}\n\n/// Convert reqwest response headers to HashMap<String, Vec<String>>\nfn convert_response_headers(headers: &reqwest::header::HeaderMap) -> HashMap<String, Vec<String>> {\n    let mut result: HashMap<String, Vec<String>> = HashMap::new();\n    for (key, value) in headers.iter() {\n        let key_str = key.as_str().to_lowercase();\n        let val_str = value.to_str().unwrap_or(\"\").to_string();\n        result.entry(key_str).or_default().push(val_str);\n    }\n    result\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_cache_key_generation() {\n        let client = HttpClient::new(None, None, Some(\"/tmp/cache\".to_string()), false, None, false);\n        let args = vec![\n            \"example.com\".to_string(),\n            \"443\".to_string(),\n            \"https\".to_string(),\n            \"/page\".to_string(),\n        ];\n        let key = client.get_cache_key(\"example.com\", 443, &args, Some(\"html\"));\n        assert!(key.starts_with(\"example.com-443/\"));\n        assert!(key.ends_with(\".html\"));\n    }\n\n    #[test]\n    fn test_cache_file_path() {\n        let client = HttpClient::new(None, None, Some(\"/tmp/cache\".to_string()), false, None, false);\n        let path = client.get_cache_file_path(\"example.com-443/ab/abcdef\");\n        assert_eq!(path, Some(\"/tmp/cache/example.com-443/ab/abcdef.cache\".to_string()));\n\n        let client_gz = HttpClient::new(None, None, Some(\"/tmp/cache\".to_string()), true, None, false);\n        let path_gz = client_gz.get_cache_file_path(\"example.com-443/ab/abcdef\");\n        assert_eq!(\n            path_gz,\n            Some(\"/tmp/cache/example.com-443/ab/abcdef.cache.gz\".to_string())\n        );\n    }\n\n    #[test]\n    fn test_no_cache_when_disabled() {\n        let client = HttpClient::new(None, None, None, false, None, false);\n        assert!(client.get_cache_file_path(\"any-key\").is_none());\n    }\n}\n"
  },
  {
    "path": "src/engine/http_response.rs",
    "content": "// SiteOne Crawler - HttpResponse\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse crate::utils;\n\n/// HTTP response from the crawler's HTTP client.\n/// Body is stored as raw bytes (`Vec<u8>`) to preserve binary data (images, fonts, etc.)\n/// without UTF-8 corruption. Use `body_text()` when you need a String for text processing.\n#[derive(Debug, Clone)]\npub struct HttpResponse {\n    pub url: String,\n    pub status_code: i32,\n    pub body: Option<Vec<u8>>,\n    pub headers: HashMap<String, String>,\n    pub exec_time: f64,\n    pub skipped_reason: Option<String>,\n    loaded_from_cache: bool,\n}\n\nimpl HttpResponse {\n    pub fn new(\n        url: String,\n        status_code: i32,\n        body: Option<Vec<u8>>,\n        headers: HashMap<String, Vec<String>>,\n        exec_time: f64,\n    ) -> Self {\n        let (status_code, body, headers) = Self::detect_redirect_and_set_meta_redirect(status_code, body, headers);\n\n        let flat_headers = utils::get_flat_response_headers(&headers);\n\n        Self {\n            url,\n            status_code,\n            body,\n            headers: flat_headers,\n            exec_time,\n            skipped_reason: None,\n            loaded_from_cache: false,\n        }\n    }\n\n    /// Get body as text (lossy UTF-8 conversion). Use for HTML/CSS/JS processing.\n    pub fn body_text(&self) -> Option<String> {\n        self.body.as_ref().map(|b| String::from_utf8_lossy(b).into_owned())\n    }\n\n    pub fn get_formatted_exec_time(&self) -> String {\n        utils::get_formatted_duration(self.exec_time)\n    }\n\n    pub fn get_formatted_body_length(&self) -> String {\n        let len = self.body.as_ref().map(|b| b.len()).unwrap_or(0) as i64;\n        utils::get_formatted_size(len, 0)\n    }\n\n    /// Detect redirect and modify response to text/html with <meta> redirect (required for offline mode)\n    fn detect_redirect_and_set_meta_redirect(\n        status_code: i32,\n        mut body: Option<Vec<u8>>,\n        mut headers: HashMap<String, Vec<String>>,\n    ) -> (i32, Option<Vec<u8>>, HashMap<String, Vec<String>>) {\n        if status_code > 300 && status_code < 320 {\n            let location = headers.get(\"location\").and_then(|v| v.first()).cloned();\n            if let Some(ref loc) = location {\n                body = Some(\n                    format!(\n                        \"<meta http-equiv=\\\"refresh\\\" content=\\\"0; url={}\\\"> Redirecting to {} ...\",\n                        loc, loc\n                    )\n                    .into_bytes(),\n                );\n                headers.insert(\"content-type\".to_string(), vec![\"text/html\".to_string()]);\n            }\n        }\n        (status_code, body, headers)\n    }\n\n    pub fn set_loaded_from_cache(&mut self, loaded: bool) {\n        self.loaded_from_cache = loaded;\n    }\n\n    pub fn is_loaded_from_cache(&self) -> bool {\n        self.loaded_from_cache\n    }\n\n    pub fn is_skipped(&self) -> bool {\n        self.skipped_reason.is_some()\n    }\n\n    /// Create a skipped response (status code -6)\n    pub fn create_skipped(url: String, reason: String) -> Self {\n        let mut response = Self {\n            url,\n            status_code: -6,\n            body: Some(Vec::new()),\n            headers: HashMap::new(),\n            exec_time: 0.0,\n            skipped_reason: Some(reason),\n            loaded_from_cache: false,\n        };\n        response.skipped_reason = response.skipped_reason.take();\n        response\n    }\n\n    /// Get a header value by name (case-insensitive lookup)\n    pub fn get_header(&self, name: &str) -> Option<&String> {\n        let lower = name.to_lowercase();\n        self.headers.get(&lower)\n    }\n\n    /// Get the content-type header value\n    pub fn get_content_type(&self) -> Option<&String> {\n        self.get_header(\"content-type\")\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_redirect_meta() {\n        let mut headers = HashMap::new();\n        headers.insert(\"location\".to_string(), vec![\"https://example.com/new\".to_string()]);\n        let response = HttpResponse::new(\"https://example.com/old\".to_string(), 301, None, headers, 0.1);\n        assert!(response.body_text().map(|b| b.contains(\"Redirecting\")).unwrap_or(false));\n        assert_eq!(\n            response.headers.get(\"content-type\").map(|s| s.as_str()),\n            Some(\"text/html\")\n        );\n    }\n\n    #[test]\n    fn test_skipped_response() {\n        let response = HttpResponse::create_skipped(\"https://example.com\".to_string(), \"test reason\".to_string());\n        assert!(response.is_skipped());\n        assert_eq!(response.status_code, -6);\n    }\n\n    #[test]\n    fn test_no_redirect_for_200() {\n        let headers = HashMap::new();\n        let response = HttpResponse::new(\n            \"https://example.com/\".to_string(),\n            200,\n            Some(b\"<html>ok</html>\".to_vec()),\n            headers,\n            0.05,\n        );\n        assert_eq!(response.body_text().as_deref(), Some(\"<html>ok</html>\"));\n    }\n}\n"
  },
  {
    "path": "src/engine/initiator.rs",
    "content": "// SiteOne Crawler - Initiator\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Parses CLI arguments, validates options, creates and returns Manager.\n\nuse crate::analysis::manager::AnalysisManager;\nuse crate::engine::manager::Manager;\nuse crate::error::CrawlerResult;\nuse crate::options::core_options;\nuse crate::utils;\nuse crate::version;\n\n// Import all analyzers for registration\nuse crate::analysis::caching_analyzer::CachingAnalyzer;\nuse crate::analysis::content_type_analyzer::ContentTypeAnalyzer;\nuse crate::analysis::dns_analyzer::DnsAnalyzer;\nuse crate::analysis::external_links_analyzer::ExternalLinksAnalyzer;\nuse crate::analysis::fastest_analyzer::FastestAnalyzer;\nuse crate::analysis::headers_analyzer::HeadersAnalyzer;\nuse crate::analysis::page404_analyzer::Page404Analyzer;\nuse crate::analysis::redirects_analyzer::RedirectsAnalyzer;\nuse crate::analysis::skipped_urls_analyzer::SkippedUrlsAnalyzer;\nuse crate::analysis::slowest_analyzer::SlowestAnalyzer;\nuse crate::analysis::source_domains_analyzer::SourceDomainsAnalyzer;\n\n// Import complex analyzers\nuse crate::analysis::accessibility_analyzer::AccessibilityAnalyzer;\nuse crate::analysis::best_practice_analyzer::BestPracticeAnalyzer;\nuse crate::analysis::security_analyzer::SecurityAnalyzer;\nuse crate::analysis::seo_opengraph_analyzer::SeoAndOpenGraphAnalyzer;\nuse crate::analysis::ssl_tls_analyzer::SslTlsAnalyzer;\n\npub struct Initiator {\n    options: core_options::CoreOptions,\n    analysis_manager: AnalysisManager,\n}\n\nimpl Initiator {\n    /// Create a new Initiator by parsing CLI arguments\n    pub fn new(argv: &[String]) -> CrawlerResult<Self> {\n        // Handle --help and --version before full parsing\n        for arg in argv {\n            if arg == \"--help\" || arg == \"-h\" {\n                Self::print_help();\n                std::process::exit(2);\n            } else if arg == \"--version\" || arg == \"-v\" {\n                println!(\n                    \"{}\",\n                    utils::get_color_text(&format!(\"Version: {}\", version::CODE), \"blue\", false,)\n                );\n                std::process::exit(2);\n            }\n        }\n\n        // Parse core options from argv\n        let options = core_options::parse_argv(argv)?;\n\n        // Handle special options that were parsed\n        if options.show_help_only {\n            Self::print_help();\n            std::process::exit(2);\n        }\n        if options.show_version_only {\n            println!(\n                \"{}\",\n                utils::get_color_text(&format!(\"Version: {}\", version::CODE), \"blue\", false,)\n            );\n            std::process::exit(2);\n        }\n\n        // Create and populate analysis manager\n        let mut analysis_manager = AnalysisManager::new();\n        Self::register_analyzers(&mut analysis_manager, &options);\n        analysis_manager.auto_activate_analyzers();\n\n        // Apply analyzer filter regex if specified\n        if let Some(ref filter_regex) = options.analyzer_filter_regex {\n            analysis_manager.filter_analyzers_by_regex(filter_regex);\n        }\n\n        Ok(Self {\n            options,\n            analysis_manager,\n        })\n    }\n\n    /// Create and return a Manager ready to run\n    pub fn create_manager(self) -> CrawlerResult<Manager> {\n        Manager::new(self.options, self.analysis_manager)\n    }\n\n    /// Get reference to parsed options\n    pub fn get_options(&self) -> &core_options::CoreOptions {\n        &self.options\n    }\n\n    /// Register all analyzers with the analysis manager.\n    fn register_analyzers(analysis_manager: &mut AnalysisManager, options: &core_options::CoreOptions) {\n        // Register all analyzers in alphabetical order\n        analysis_manager.register_analyzer(Box::new(AccessibilityAnalyzer::new()));\n        analysis_manager.register_analyzer(Box::new(BestPracticeAnalyzer::new()));\n        analysis_manager.register_analyzer(Box::new(CachingAnalyzer::new()));\n        analysis_manager.register_analyzer(Box::new(ContentTypeAnalyzer::new()));\n        analysis_manager.register_analyzer(Box::new(DnsAnalyzer::new()));\n        analysis_manager.register_analyzer(Box::new(ExternalLinksAnalyzer::new()));\n\n        // FastestAnalyzer: pass fastest_top_limit and fastest_max_time from options\n        let mut fastest = FastestAnalyzer::new();\n        fastest.set_config(options.fastest_top_limit as usize, options.fastest_max_time);\n        analysis_manager.register_analyzer(Box::new(fastest));\n\n        analysis_manager.register_analyzer(Box::new(HeadersAnalyzer::new()));\n        analysis_manager.register_analyzer(Box::new(Page404Analyzer::new()));\n        analysis_manager.register_analyzer(Box::new(RedirectsAnalyzer::new()));\n        analysis_manager.register_analyzer(Box::new(SecurityAnalyzer::new()));\n\n        // SeoAndOpenGraphAnalyzer: pass max_heading_level from options\n        let mut seo = SeoAndOpenGraphAnalyzer::new();\n        seo.set_config(options.max_heading_level as i32);\n        analysis_manager.register_analyzer(Box::new(seo));\n\n        analysis_manager.register_analyzer(Box::new(SkippedUrlsAnalyzer::new()));\n\n        // SlowestAnalyzer: pass slowest_top_limit, slowest_min_time, slowest_max_time from options\n        let mut slowest = SlowestAnalyzer::new();\n        slowest.set_config(\n            options.slowest_top_limit as usize,\n            options.slowest_min_time,\n            options.slowest_max_time,\n        );\n        analysis_manager.register_analyzer(Box::new(slowest));\n\n        analysis_manager.register_analyzer(Box::new(SourceDomainsAnalyzer::new()));\n        analysis_manager.register_analyzer(Box::new(SslTlsAnalyzer::new()));\n    }\n\n    /// Print help text.\n    pub fn print_help() {\n        println!();\n        println!(\n            \"{}\",\n            utils::get_color_text(\n                \"Usage: siteone-crawler --url=https://mydomain.tld/ [options]\",\n                \"yellow\",\n                false,\n            )\n        );\n        println!(\n            \"{}\",\n            utils::get_color_text(&format!(\"Version: {}\", version::CODE), \"blue\", false,)\n        );\n        println!();\n\n        let help_text = core_options::get_help_text();\n        print!(\"{}\", help_text);\n\n        println!();\n        println!(\"For more detailed descriptions of parameters, see README.md.\");\n        println!();\n        println!(\n            \"{}{}{}\",\n            utils::get_color_text(\"Created with \", \"gray\", false),\n            utils::get_color_text(\"\\u{2665}\", \"red\", false),\n            utils::get_color_text(\n                \" by J\\u{00e1}n Rege\\u{0161} (jan.reges@siteone.cz) from www.SiteOne.io (Czech Republic) [2023-2026]\",\n                \"gray\",\n                false,\n            )\n        );\n    }\n}\n"
  },
  {
    "path": "src/engine/manager.rs",
    "content": "// SiteOne Crawler - Manager\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Orchestrates the crawler: initializes all components, runs the crawl,\n// then runs post-crawl analysis and exporters.\n\nuse std::sync::Arc;\nuse std::time::Instant;\n\nuse crate::analysis::manager::AnalysisManager;\nuse crate::components::super_table::SuperTable;\nuse crate::content_processor::astro_processor::AstroProcessor;\nuse crate::content_processor::base_processor::ProcessorConfig;\nuse crate::content_processor::css_processor::CssProcessor;\nuse crate::content_processor::html_processor::HtmlProcessor;\nuse crate::content_processor::javascript_processor::JavaScriptProcessor;\nuse crate::content_processor::manager::{ContentProcessorManager, SUPER_TABLE_CONTENT_PROCESSORS_STATS};\nuse crate::content_processor::nextjs_processor::NextJsProcessor;\nuse crate::content_processor::svelte_processor::SvelteProcessor;\nuse crate::content_processor::xml_processor::XmlProcessor;\nuse crate::engine::crawler::Crawler;\nuse crate::engine::http_client::HttpClient;\nuse crate::engine::parsed_url::ParsedUrl;\nuse crate::error::{CrawlerError, CrawlerResult};\nuse crate::export::exporter::Exporter;\nuse crate::export::file_exporter::FileExporter;\nuse crate::export::html_report::report::HtmlReport;\nuse crate::export::mailer_exporter::MailerExporter;\nuse crate::export::markdown_exporter::MarkdownExporter;\nuse crate::export::offline_website_exporter::OfflineWebsiteExporter;\nuse crate::export::sitemap_exporter::SitemapExporter;\nuse crate::export::upload_exporter::UploadExporter;\nuse crate::info::Info;\nuse crate::options::core_options::{CoreOptions, StorageType};\nuse crate::output::json_output::JsonOutput;\nuse crate::output::multi_output::MultiOutput;\nuse crate::output::output::{CrawlerInfo, Output};\nuse crate::output::text_output::TextOutput;\nuse crate::result::status::Status;\nuse crate::result::storage::file_storage::FileStorage;\nuse crate::result::storage::memory_storage::MemoryStorage;\nuse crate::scoring::ci_gate;\nuse crate::scoring::scorer;\nuse crate::types::OutputType;\nuse crate::utils;\nuse crate::version;\n\npub struct Manager {\n    options: Arc<CoreOptions>,\n    analysis_manager: Option<AnalysisManager>,\n    start_time: Instant,\n}\n\nimpl Manager {\n    pub fn new(options: CoreOptions, analysis_manager: AnalysisManager) -> CrawlerResult<Self> {\n        let start_time = Instant::now();\n\n        // Apply color settings\n        if options.no_color {\n            utils::disable_colors();\n        } else if options.force_color {\n            utils::force_enabled_colors();\n        }\n\n        // Apply forced console width if specified\n        if let Some(width) = options.console_width\n            && width > 0\n        {\n            utils::set_forced_console_width(width as usize);\n        }\n\n        // Apply hard rows limit for analysis tables\n        SuperTable::set_hard_rows_limit(options.rows_limit as usize);\n\n        Ok(Self {\n            options: Arc::new(options),\n            analysis_manager: Some(analysis_manager),\n            start_time,\n        })\n    }\n\n    /// Run the complete crawl process: init, crawl, analyze, export, summarize.\n    /// Returns an exit code: 0 = success, 10 = CI gate failed.\n    pub async fn run(&mut self) -> CrawlerResult<i32> {\n        let options = self.options.clone();\n\n        // Build crawler info\n        let command = std::env::args().collect::<Vec<_>>().join(\" \");\n        let hostname = gethostname::gethostname().to_string_lossy().to_string();\n\n        // Build the final user agent the same way Crawler does\n        let final_user_agent = {\n            let base = if let Some(ref ua) = options.user_agent {\n                ua.clone()\n            } else {\n                match options.device {\n                    crate::types::DeviceType::Desktop => format!(\n                        \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{}.0.0.0 Safari/537.36\",\n                        chrono::Utc::now().format(\"%y\")\n                    ),\n                    crate::types::DeviceType::Mobile => \"Mozilla/5.0 (iPhone; CPU iPhone OS 15_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15A5370a Safari/604.1\".to_string(),\n                    crate::types::DeviceType::Tablet => \"Mozilla/5.0 (Linux; Android 11; SAMSUNG SM-T875) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/14.0 Chrome/87.0.4280.141 Safari/537.36\".to_string(),\n                }\n            };\n            if base.ends_with('!') {\n                base.trim_end_matches('!').trim_end().to_string()\n            } else {\n                format!(\"{} siteone-crawler/{}\", base, version::CODE)\n            }\n        };\n\n        let crawler_info = Info::new(\n            \"SiteOne Crawler\".to_string(),\n            version::CODE.to_string(),\n            chrono::Utc::now().format(\"%Y-%m-%d %H:%M:%S\").to_string(),\n            utils::get_safe_command(&command),\n            hostname,\n            final_user_agent,\n            options.url.clone(),\n        );\n\n        // Create storage\n        let origin_url = ParsedUrl::parse(&options.url, None);\n        let origin_url_domain = format!(\n            \"{}{}\",\n            origin_url.host.as_deref().unwrap_or(\"\"),\n            origin_url.port.map(|p| format!(\"-{}\", p)).unwrap_or_default()\n        );\n\n        let storage: Box<dyn crate::result::storage::storage::Storage> = match options.result_storage {\n            StorageType::Memory => Box::new(MemoryStorage::new(options.result_storage_compression)),\n            StorageType::File => {\n                let result_storage_dir = crate::utils::get_absolute_path(&options.result_storage_dir);\n                Box::new(FileStorage::new(\n                    &result_storage_dir,\n                    options.result_storage_compression,\n                    &origin_url_domain,\n                )?)\n            }\n        };\n\n        // Create status\n        let status = Status::new(storage, true, crawler_info.clone(), self.start_time);\n\n        // Create output\n        let output = self.create_output(&options, &crawler_info)?;\n\n        // Create HTTP client\n        let http_cache_dir =\n            if options.http_cache_dir.as_deref() == Some(\"off\") || options.http_cache_dir.as_deref() == Some(\"\") {\n                None\n            } else {\n                options\n                    .http_cache_dir\n                    .as_ref()\n                    .map(|dir| crate::utils::get_absolute_path(dir))\n            };\n\n        let http_client = HttpClient::new(\n            options.proxy.clone(),\n            options.http_auth.clone(),\n            http_cache_dir,\n            options.http_cache_compression,\n            options.http_cache_ttl,\n            options.accept_invalid_certs,\n        );\n\n        // Create content processor manager and register processors\n        let content_processor_manager = Self::create_content_processor_manager(&options);\n\n        // Take the analysis_manager out of self (it will live inside the Crawler)\n        let analysis_manager = self\n            .analysis_manager\n            .take()\n            .ok_or_else(|| CrawlerError::Config(\"AnalysisManager already consumed\".to_string()))?;\n\n        // Create crawler\n        let mut crawler = Crawler::new(\n            options.clone(),\n            http_client,\n            content_processor_manager,\n            analysis_manager,\n            output,\n            status,\n        );\n\n        // Set extra columns from analyzers (for Access., Best pr. columns in progress table)\n        if let (Ok(am), Ok(mut out)) = (crawler.get_analysis_manager().lock(), crawler.get_output().lock()) {\n            let extra_cols = am.get_extra_columns();\n            out.set_extra_columns_from_analysis(extra_cols);\n        }\n\n        // Print banner\n        if let Ok(mut out) = crawler.get_output().lock() {\n            out.add_banner();\n        }\n\n        // Fetch initial robots.txt\n        let initial_scheme = options.get_initial_scheme();\n        let initial_host = options.get_initial_host(false);\n        let initial_port = ParsedUrl::parse(&options.url, None)\n            .port\n            .unwrap_or(if initial_scheme == \"https\" { 443 } else { 80 });\n        crawler\n            .fetch_robots_txt(&initial_host, initial_port, &initial_scheme)\n            .await;\n\n        // Run the crawler\n        crawler.run().await?;\n\n        // Post-crawl: run analyzers\n        let exit_code = self.run_post_crawl(&crawler);\n\n        Ok(exit_code)\n    }\n\n    /// Run post-crawl analysis and produce final output.\n    /// Returns exit code: 0 = success, 3 = no pages crawled, 10 = CI gate failed.\n    fn run_post_crawl(&mut self, crawler: &Crawler) -> i32 {\n        let status = crawler.get_status();\n        let output = crawler.get_output();\n        let analysis_manager = crawler.get_analysis_manager();\n\n        // Transfer skipped URLs from crawler to status\n        {\n            let skipped = crawler.get_skipped();\n            if let Ok(mut st) = status.lock() {\n                for entry in skipped.iter() {\n                    st.add_skipped_url(\n                        entry.url.clone(),\n                        entry.reason,\n                        entry.source_uq_id.clone(),\n                        entry.source_attr,\n                    );\n                }\n            }\n        }\n\n        // Run post-crawl analyzers\n        if let (Ok(mut am), Ok(st), Ok(mut out)) = (analysis_manager.lock(), status.lock(), output.lock()) {\n            am.run_analyzers(&st, &mut **out);\n        }\n\n        // Add content processor stats\n        if let Ok(cpm) = crawler.get_content_processor_manager().lock() {\n            let mut super_table = cpm.get_stats().get_super_table(\n                SUPER_TABLE_CONTENT_PROCESSORS_STATS,\n                \"Content processor stats\",\n                \"No content processors found.\",\n                None,\n                None,\n            );\n\n            if let Ok(st) = status.lock() {\n                st.configure_super_table_url_stripping(&mut super_table);\n            }\n            if let Ok(mut out) = output.lock() {\n                out.add_super_table(&super_table);\n            }\n            if let Ok(st) = status.lock() {\n                st.add_super_table_at_end(super_table);\n            }\n        }\n\n        // Run exporters\n        self.run_exporters(crawler);\n\n        // Print used options\n        if let Ok(mut out) = output.lock() {\n            out.add_used_options();\n        }\n\n        // Print total stats\n        if let Ok(st) = status.lock() {\n            let basic_stats = st.get_basic_stats();\n            let output_stats = crate::output::output::BasicStats {\n                total_urls: basic_stats.total_urls,\n                total_size: basic_stats.total_size,\n                total_size_formatted: basic_stats.total_size_formatted.clone(),\n                total_execution_time: basic_stats.total_execution_time,\n                total_requests_times: basic_stats.total_requests_times,\n                total_requests_times_avg: basic_stats.total_requests_times_avg,\n                total_requests_times_min: basic_stats.total_requests_times_min,\n                total_requests_times_max: basic_stats.total_requests_times_max,\n                count_by_status: basic_stats.count_by_status.clone(),\n                count_by_content_type: basic_stats.count_by_content_type.clone(),\n            };\n            if let Ok(mut out) = output.lock() {\n                out.add_total_stats(&output_stats);\n            }\n        }\n\n        // Calculate and print quality scores, then CI gate, then summary\n        let mut ci_exit_code = 0i32;\n        if let Ok(st) = status.lock() {\n            let mut summary = st.get_summary();\n            let basic_stats = st.get_basic_stats();\n            let output_stats = crate::output::output::BasicStats {\n                total_urls: basic_stats.total_urls,\n                total_size: basic_stats.total_size,\n                total_size_formatted: basic_stats.total_size_formatted.clone(),\n                total_execution_time: basic_stats.total_execution_time,\n                total_requests_times: basic_stats.total_requests_times,\n                total_requests_times_avg: basic_stats.total_requests_times_avg,\n                total_requests_times_min: basic_stats.total_requests_times_min,\n                total_requests_times_max: basic_stats.total_requests_times_max,\n                count_by_status: basic_stats.count_by_status.clone(),\n                count_by_content_type: basic_stats.count_by_content_type.clone(),\n            };\n            let quality_scores = scorer::calculate_scores(&summary, &output_stats);\n            if let Ok(mut out) = output.lock() {\n                out.add_quality_scores(&quality_scores);\n            }\n\n            // CI/CD quality gate evaluation\n            if self.options.ci {\n                let ci_result = ci_gate::evaluate(&self.options, &quality_scores, &output_stats, &summary);\n                ci_exit_code = ci_result.exit_code;\n                if let Ok(mut out) = output.lock() {\n                    out.add_ci_gate_result(&ci_result);\n                }\n            }\n\n            if let Ok(mut out) = output.lock() {\n                out.add_summary(&mut summary);\n            }\n        }\n\n        // Check if no pages were successfully crawled (e.g. initial URL failed with timeout, DNS error, etc.)\n        // URLs with negative status codes (-1 connection error, -2 timeout, etc.) are counted in\n        // total_urls but don't represent successful responses, so we check for any positive status code.\n        let no_pages_crawled = match status.lock() {\n            Ok(st) => {\n                let stats = st.get_basic_stats();\n                !stats.count_by_status.keys().any(|&code| code > 0)\n            }\n            _ => false,\n        };\n\n        // Finalize output\n        if let Ok(mut out) = output.lock() {\n            out.end();\n        }\n\n        // Save text/JSON report files after output is finalized (includes quality scores,\n        // CI gate result, and summary that were missing when run_exporters captured content)\n        if self.options.output_text_file.is_some() || self.options.output_json_file.is_some() {\n            let initial_host = Some(self.options.get_initial_host(false));\n            let mut file_exporter = FileExporter::new(\n                None,\n                None,\n                self.options.output_json_file.clone(),\n                self.options.output_text_file.clone(),\n                self.options.add_timestamp_to_output_file,\n                self.options.add_host_to_output_file,\n                initial_host,\n            );\n            if let Ok(out) = output.lock() {\n                if let Some(text) = out.get_output_text() {\n                    file_exporter.set_text_output_content(text);\n                }\n                if let Some(json) = out.get_json_content() {\n                    file_exporter.set_json_output_content(json);\n                }\n            }\n            if let Ok(st) = status.lock()\n                && let Ok(out) = output.lock()\n                && let Err(e) = file_exporter.export(&st, &**out)\n            {\n                eprintln!(\"Error saving text/JSON report files: {}\", e);\n            }\n        }\n\n        if ci_exit_code != 0 {\n            ci_exit_code\n        } else if no_pages_crawled {\n            3\n        } else {\n            0\n        }\n    }\n\n    /// Run all activated exporters after crawling and analysis.\n    fn run_exporters(&self, crawler: &Crawler) {\n        let status = crawler.get_status();\n        let output = crawler.get_output();\n        let options = &self.options;\n\n        // Generate HTML report content if any exporter needs it\n        let html_report_needed =\n            options.output_html_report.is_some() || !options.mail_to.is_empty() || options.upload_enabled;\n\n        let html_report_content = if html_report_needed {\n            match status.lock() {\n                Ok(st) => {\n                    let report = HtmlReport::new(&st, 5, options.html_report_options.as_deref());\n                    Some(report.get_html())\n                }\n                _ => None,\n            }\n        } else {\n            None\n        };\n\n        // Build list of activated exporters (excluding offline/markdown which run separately)\n        let mut exporters: Vec<Box<dyn Exporter>> = Vec::new();\n\n        // 1. SitemapExporter\n        {\n            let sitemap = SitemapExporter::new(\n                options.sitemap_xml_file.clone(),\n                options.sitemap_txt_file.clone(),\n                options.sitemap_base_priority,\n                options.sitemap_priority_increase,\n            );\n            if sitemap.should_be_activated() {\n                exporters.push(Box::new(sitemap));\n            }\n        }\n\n        // 2. OfflineWebsiteExporter — run separately to collect exported file paths\n        let offline_paths = {\n            let mut offline = OfflineWebsiteExporter::new();\n            offline.set_offline_export_directory(options.offline_export_dir.clone());\n            offline.set_offline_export_store_only_url_regex(options.offline_export_store_only_url_regex.clone());\n            offline.set_offline_export_remove_unwanted_code(options.offline_export_remove_unwanted_code);\n            offline.set_offline_export_no_auto_redirect_html(options.offline_export_no_auto_redirect_html);\n            offline.set_offline_export_preserve_url_structure(options.offline_export_preserve_url_structure);\n            offline.set_offline_export_lowercase(options.offline_export_lowercase);\n            offline.set_ignore_store_file_error(options.ignore_store_file_error);\n            offline.set_replace_content(options.replace_content.clone());\n            offline.set_replace_query_string(options.replace_query_string.clone());\n            let initial_parsed = ParsedUrl::parse(&options.url, None);\n            offline.set_initial_parsed_url(initial_parsed);\n            offline.set_content_processor_manager(crawler.get_content_processor_manager().clone());\n            if offline.should_be_activated() {\n                if let (Ok(st), Ok(out)) = (status.lock(), output.lock())\n                    && let Err(e) = offline.export(&st, &**out)\n                {\n                    st.add_critical_to_summary(offline.get_name(), &format!(\"{} error: {}\", offline.get_name(), e));\n                }\n                let paths = offline.get_exported_file_paths().clone();\n                if paths.is_empty() { None } else { Some(paths) }\n            } else {\n                None\n            }\n        };\n\n        // 3. MarkdownExporter — run separately to collect exported file paths\n        let markdown_paths = {\n            let mut markdown = MarkdownExporter::new();\n            markdown.set_markdown_export_directory(options.markdown_export_dir.clone());\n            markdown.set_markdown_export_single_file(options.markdown_export_single_file.clone());\n            markdown.set_markdown_move_content_before_h1_to_end(options.markdown_move_content_before_h1_to_end);\n            markdown.set_markdown_disable_images(options.markdown_disable_images);\n            markdown.set_markdown_disable_files(options.markdown_disable_files);\n            markdown.set_markdown_remove_links_and_images_from_single_file(\n                options.markdown_remove_links_and_images_from_single_file,\n            );\n            markdown.set_markdown_exclude_selector(options.markdown_exclude_selector.clone());\n            markdown.set_markdown_replace_content(options.markdown_replace_content.clone());\n            markdown.set_markdown_replace_query_string(options.markdown_replace_query_string.clone());\n            markdown.set_markdown_export_store_only_url_regex(options.markdown_export_store_only_url_regex.clone());\n            markdown.set_markdown_ignore_store_file_error(options.markdown_ignore_store_file_error);\n            markdown.set_initial_parsed_url(ParsedUrl::parse(&options.url, None));\n            markdown.set_ignore_regexes(options.ignore_regex.clone());\n            markdown.set_initial_url(options.url.clone());\n            markdown.set_content_processor_manager(crawler.get_content_processor_manager().clone());\n            if markdown.should_be_activated() {\n                if let (Ok(st), Ok(out)) = (status.lock(), output.lock())\n                    && let Err(e) = markdown.export(&st, &**out)\n                {\n                    st.add_critical_to_summary(markdown.get_name(), &format!(\"{} error: {}\", markdown.get_name(), e));\n                }\n                let paths = markdown.get_exported_file_paths().clone();\n                if paths.is_empty() { None } else { Some(paths) }\n            } else {\n                None\n            }\n        };\n\n        // Inject exported file paths into JSON output results\n        if (offline_paths.is_some() || markdown_paths.is_some())\n            && let Ok(mut out) = output.lock()\n        {\n            out.set_export_file_paths(offline_paths.as_ref(), markdown_paths.as_ref());\n        }\n\n        // 4. FileExporter for HTML report only (text/JSON files are saved later in\n        //    run_post_crawl after quality scores and summary have been added to output)\n        {\n            let initial_host = Some(options.get_initial_host(false));\n            let mut file_exporter = FileExporter::new(\n                options.output_html_report.clone(),\n                options.html_report_options.clone(),\n                None,\n                None,\n                options.add_timestamp_to_output_file,\n                options.add_host_to_output_file,\n                initial_host,\n            );\n            if let Some(ref content) = html_report_content {\n                file_exporter.set_html_report_content(content.clone());\n            }\n            if file_exporter.should_be_activated() {\n                exporters.push(Box::new(file_exporter));\n            }\n        }\n\n        // 5. MailerExporter\n        {\n            let initial_host = Some(options.get_initial_host(false));\n            let mut mailer = MailerExporter::new(\n                options.mail_to.clone(),\n                options.mail_from.clone(),\n                options.mail_from_name.clone(),\n                options.mail_smtp_host.clone(),\n                options.mail_smtp_port.clamp(1, 65535) as u16,\n                options.mail_smtp_user.clone(),\n                options.mail_smtp_pass.clone(),\n                options.mail_subject_template.clone(),\n                initial_host,\n            );\n            if let Some(ref content) = html_report_content {\n                mailer.set_html_report_content(content.clone());\n            }\n            if mailer.should_be_activated() {\n                exporters.push(Box::new(mailer));\n            }\n        }\n\n        // 6. UploadExporter\n        {\n            let mut upload = UploadExporter::new(\n                options.upload_enabled,\n                options.upload_to.clone(),\n                Some(options.upload_retention.clone()),\n                options.upload_password.clone(),\n                options.upload_timeout as u64,\n            );\n            if let Some(ref content) = html_report_content {\n                upload.set_html_report_content(content.clone());\n            }\n            if upload.should_be_activated() {\n                exporters.push(Box::new(upload));\n            }\n        }\n\n        // Run remaining activated exporters (sitemap, file, mailer, upload)\n        for exporter in &mut exporters {\n            if let (Ok(st), Ok(out)) = (status.lock(), output.lock())\n                && let Err(e) = exporter.export(&st, &**out)\n            {\n                st.add_critical_to_summary(exporter.get_name(), &format!(\"{} error: {}\", exporter.get_name(), e));\n            }\n        }\n    }\n\n    /// Create output based on options\n    fn create_output(&self, options: &CoreOptions, crawler_info: &Info) -> CrawlerResult<Box<dyn Output>> {\n        let output_crawler_info = CrawlerInfo {\n            name: crawler_info.name.clone(),\n            version: crawler_info.version.clone(),\n            executed_at: crawler_info.executed_at.clone(),\n            command: crawler_info.command.clone(),\n            hostname: crawler_info.hostname.clone(),\n            final_user_agent: crawler_info.final_user_agent.clone(),\n            url: options.url.clone(),\n            device: options.device.as_str().to_string(),\n            workers: options.workers as usize,\n        };\n\n        // Create MultiOutput with both TextOutput and JsonOutput when FileExporter is active.\n        // TextOutput prints to stdout only when output_type == Text.\n        // JsonOutput prints to stdout only when output_type == Json.\n        // Both are always created so FileExporter can save both formats.\n        let file_exporter_active = options.output_html_report.is_some()\n            || options.output_json_file.is_some()\n            || options.output_text_file.is_some();\n\n        let need_text = options.output_type == OutputType::Text || file_exporter_active;\n        let need_json = options.output_type == OutputType::Json\n            || file_exporter_active\n            || !options.mail_to.is_empty()\n            || options.sitemap_xml_file.is_some()\n            || options.sitemap_txt_file.is_some();\n\n        let mut outputs: Vec<Box<dyn Output>> = Vec::new();\n\n        if need_text {\n            outputs.push(Box::new(TextOutput::new(\n                output_crawler_info.clone(),\n                options.extra_columns.clone(),\n                options.hide_progress_bar,\n                options.show_scheme_and_host,\n                options.do_not_truncate_url,\n                options.add_random_query_params,\n                options.url_column_size.map(|s| s as usize),\n                options.show_inline_criticals,\n                options.show_inline_warnings,\n                options.hide_columns.clone(),\n                options.workers as usize,\n                options.memory_limit.clone(),\n                options.output_type == OutputType::Text, // print_to_output\n                options.ci,                              // disable_animation\n            )));\n        }\n\n        if need_json {\n            let options_json = serde_json::to_value(options).ok();\n            outputs.push(Box::new(JsonOutput::new(\n                output_crawler_info,\n                options.extra_columns.clone(),\n                options.hide_progress_bar,\n                options.output_type == OutputType::Json, // print_to_output\n                options_json,\n            )));\n        }\n\n        if outputs.len() > 1 {\n            let mut multi = MultiOutput::new();\n            for out in outputs {\n                multi.add_output(out);\n            }\n            Ok(Box::new(multi))\n        } else {\n            match outputs.into_iter().next() {\n                Some(out) => Ok(out),\n                _ => Err(CrawlerError::Config(\"Unknown output type\".to_string())),\n            }\n        }\n    }\n\n    /// Create and register all content processors\n    fn create_content_processor_manager(options: &CoreOptions) -> ContentProcessorManager {\n        let initial_url = ParsedUrl::parse(&options.url, None);\n        let mut config = ProcessorConfig::new(initial_url);\n        config.single_page = options.single_page;\n        config.single_foreign_page = options.single_foreign_page;\n        config.max_depth = options.max_depth;\n        config.files_enabled = !options.disable_files;\n        config.images_enabled = !options.disable_images;\n        config.scripts_enabled = !options.disable_javascript;\n        config.styles_enabled = !options.disable_styles;\n        config.fonts_enabled = !options.disable_fonts;\n        config.disable_javascript = options.disable_javascript;\n        config.remove_all_anchor_listeners = options.remove_all_anchor_listeners;\n        config.ignore_regex = options.ignore_regex.clone();\n        config.disable_astro_inline_modules = options.disable_astro_inline_modules;\n        config.offline_export_preserve_urls = options.offline_export_preserve_urls;\n        config.compile_ignore_regex();\n\n        let mut cpm = ContentProcessorManager::new();\n\n        // Register processors\n        let _ = cpm.register_processor(Box::new(AstroProcessor::new(config.clone())));\n        let _ = cpm.register_processor(Box::new(HtmlProcessor::new(config.clone())));\n        let _ = cpm.register_processor(Box::new(JavaScriptProcessor::new(config.clone())));\n        let _ = cpm.register_processor(Box::new(CssProcessor::new(config.clone())));\n        let _ = cpm.register_processor(Box::new(XmlProcessor::new(config.clone())));\n        let _ = cpm.register_processor(Box::new(NextJsProcessor::new(config.clone())));\n        let _ = cpm.register_processor(Box::new(SvelteProcessor::new(config)));\n\n        cpm\n    }\n}\n"
  },
  {
    "path": "src/engine/mod.rs",
    "content": "// Engine module - core crawling engine\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\npub mod crawler;\r\npub mod found_url;\r\npub mod found_urls;\r\npub mod http_client;\r\npub mod http_response;\r\npub mod initiator;\r\npub mod manager;\r\npub mod parsed_url;\r\npub mod robots_txt;\r\n"
  },
  {
    "path": "src/engine/parsed_url.rs",
    "content": "// SiteOne Crawler - ParsedUrl\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\nuse std::path::Path;\nuse std::sync::Mutex;\n\nuse once_cell::sync::Lazy;\nuse regex::Regex;\n\n/// Regex for detecting HTML page extensions (not static files)\nstatic HTML_EXTENSIONS_RE: Lazy<Regex> = Lazy::new(|| {\n    Regex::new(r\"(?i)\\.(htm|html|shtml|php|phtml|ashx|xhtml|asp|aspx|jsp|jspx|do|cfm|cgi|pl|rb|erb|gsp)$\").unwrap()\n});\n\n/// Regex for detecting file extension at end of path\nstatic FILE_EXTENSION_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)\\.([a-z0-9]{1,10})$\").unwrap());\n\n/// Regex for detecting image extensions in path\nstatic IMAGE_PATH_RE: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r\"(?i)\\.(png|gif|jpg|jpeg|ico|webp|avif|tif|bmp|svg)\").unwrap());\n\n/// Regex for detecting dynamic image query params\nstatic IMAGE_QUERY_RE: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r\"(?i)(png|gif|jpg|jpeg|ico|webp|avif|tif|bmp|svg|crop|size|landscape)\").unwrap());\n\n/// Regex for detecting font extensions\nstatic FONT_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)\\.(eot|ttf|woff2|woff|otf)\").unwrap());\n\n/// Regex for 2nd level domain extraction\nstatic DOMAIN_2ND_LEVEL_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)([a-z0-9\\-]+\\.[a-z][a-z0-9]{0,10})$\").unwrap());\n\n/// Regex for extracting extensions from path+query\nstatic ESTIMATE_EXT_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)\\.([0-9a-z]{1,5})\").unwrap());\n\n/// Regex for relative URL detection (starts with alphanumeric or underscore)\nstatic RELATIVE_URL_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)^[a-z0-9_]\").unwrap());\n\n/// Parsed URL struct with all URL components\n#[derive(Debug)]\npub struct ParsedUrl {\n    pub url: String,\n    pub scheme: Option<String>,\n    pub host: Option<String>,\n    pub port: Option<u16>,\n    pub path: String,\n    pub query: Option<String>,\n    pub fragment: Option<String>,\n    pub extension: Option<String>,\n    pub domain_2nd_level: Option<String>,\n\n    full_url_cache: Mutex<HashMap<String, String>>,\n    debug: bool,\n}\n\nimpl Clone for ParsedUrl {\n    fn clone(&self) -> Self {\n        Self {\n            url: self.url.clone(),\n            scheme: self.scheme.clone(),\n            host: self.host.clone(),\n            port: self.port,\n            path: self.path.clone(),\n            query: self.query.clone(),\n            fragment: self.fragment.clone(),\n            extension: self.extension.clone(),\n            domain_2nd_level: self.domain_2nd_level.clone(),\n            full_url_cache: Mutex::new(HashMap::new()),\n            debug: self.debug,\n        }\n    }\n}\n\nimpl PartialEq for ParsedUrl {\n    fn eq(&self, other: &Self) -> bool {\n        self.url == other.url\n            && self.scheme == other.scheme\n            && self.host == other.host\n            && self.port == other.port\n            && self.path == other.path\n            && self.query == other.query\n            && self.fragment == other.fragment\n    }\n}\n\nimpl Eq for ParsedUrl {}\n\nimpl std::hash::Hash for ParsedUrl {\n    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {\n        self.url.hash(state);\n        self.scheme.hash(state);\n        self.host.hash(state);\n        self.port.hash(state);\n        self.path.hash(state);\n        self.query.hash(state);\n        self.fragment.hash(state);\n    }\n}\n\nimpl ParsedUrl {\n    #[allow(clippy::too_many_arguments)]\n    pub fn new(\n        url: String,\n        scheme: Option<String>,\n        host: Option<String>,\n        port: Option<u16>,\n        path: String,\n        query: Option<String>,\n        fragment: Option<String>,\n        extension: Option<String>,\n        domain_2nd_level: Option<String>,\n    ) -> Self {\n        let fragment = match fragment.as_deref() {\n            Some(\"\") => None,\n            _ => fragment,\n        };\n        Self {\n            url,\n            scheme,\n            host,\n            port,\n            path,\n            query,\n            fragment,\n            extension,\n            domain_2nd_level,\n            full_url_cache: Mutex::new(HashMap::new()),\n            debug: false,\n        }\n    }\n\n    /// Get full URL with optional scheme+host and optional fragment\n    pub fn get_full_url(&self, include_scheme_and_host: bool, include_fragment: bool) -> String {\n        let cache_key = format!(\n            \"{}{}\",\n            if include_scheme_and_host { '1' } else { '0' },\n            if include_fragment { '1' } else { '0' }\n        );\n\n        if let Ok(cache) = self.full_url_cache.lock()\n            && let Some(cached) = cache.get(&cache_key)\n        {\n            return cached.clone();\n        }\n\n        let mut full_url = self.path.clone();\n        if let Some(ref q) = self.query {\n            full_url.push('?');\n            full_url.push_str(q);\n        }\n        if include_fragment && let Some(ref f) = self.fragment {\n            full_url.push('#');\n            full_url.push_str(f);\n        }\n\n        if include_scheme_and_host {\n            if let (Some(scheme), Some(host)) = (&self.scheme, &self.host) {\n                let mut port = self.port;\n                if (port == Some(80) && scheme == \"http\") || (port == Some(443) && scheme == \"https\") {\n                    port = None;\n                }\n                let port_str = match port {\n                    Some(p) => format!(\":{}\", p),\n                    None => String::new(),\n                };\n                full_url = format!(\"{}://{}{}{}\", scheme, host, port_str, full_url);\n            } else if self.scheme.is_none()\n                && let Some(ref host) = self.host\n            {\n                let port = match self.port {\n                    Some(p) if p != 80 && p != 443 => Some(p),\n                    _ => None,\n                };\n                let port_str = match port {\n                    Some(p) => format!(\":{}\", p),\n                    None => String::new(),\n                };\n                full_url = format!(\"//{}{}{}\", host, port_str, full_url);\n            }\n        }\n\n        if let Ok(mut cache) = self.full_url_cache.lock() {\n            cache.insert(cache_key, full_url.clone());\n        }\n\n        full_url\n    }\n\n    /// Is probably static file/asset and probably not the HTML page?\n    pub fn is_static_file(&self) -> bool {\n        if FILE_EXTENSION_RE.is_match(&self.path) {\n            // Has extension - check it's not numeric\n            let is_numeric = self\n                .extension\n                .as_ref()\n                .map(|e| e.parse::<f64>().is_ok())\n                .unwrap_or(false);\n\n            if !is_numeric && !HTML_EXTENSIONS_RE.is_match(&self.path) {\n                return true;\n            }\n        }\n\n        if self.is_image() || self.is_css() {\n            return true;\n        }\n\n        false\n    }\n\n    /// Is probably image? Has an image extension or is dynamic image\n    pub fn is_image(&self) -> bool {\n        let has_image_extension = IMAGE_PATH_RE.is_match(&self.path);\n        let is_dynamic_image = self.query.as_ref().map(|q| IMAGE_QUERY_RE.is_match(q)).unwrap_or(false);\n        has_image_extension || is_dynamic_image\n    }\n\n    /// Is font file?\n    pub fn is_font(&self) -> bool {\n        FONT_RE.is_match(&self.path)\n    }\n\n    /// Is CSS file?\n    pub fn is_css(&self) -> bool {\n        self.extension.as_deref() == Some(\"css\") || self.url.to_lowercase().contains(\"fonts.googleapis.com/css\")\n    }\n\n    /// Is Origin header required for this resource?\n    pub fn is_origin_required(&self) -> bool {\n        self.is_font()\n    }\n\n    /// Estimate file extension from URL\n    pub fn estimate_extension(&self) -> Option<String> {\n        // if extension is numeric, it is probably not a real extension\n        if let Some(ref ext) = self.extension {\n            if ext.parse::<f64>().is_ok() {\n                return None;\n            }\n            return Some(ext.to_lowercase());\n        }\n\n        let combined = format!(\"{}?{}\", self.path, self.query.as_deref().unwrap_or(\"\"));\n        let mut last_ext = None;\n        for caps in ESTIMATE_EXT_RE.captures_iter(&combined) {\n            if let Some(m) = caps.get(1) {\n                last_ext = Some(m.as_str().to_lowercase());\n            }\n        }\n        last_ext\n    }\n\n    /// Copy scheme/host/port from another ParsedUrl\n    pub fn set_attributes(&mut self, url: &ParsedUrl, scheme: bool, host: bool, port: bool) {\n        if scheme {\n            self.scheme = url.scheme.clone();\n        }\n        if host {\n            self.host = url.host.clone();\n        }\n        if port {\n            self.port = url.port;\n        }\n        self.clear_cache();\n    }\n\n    pub fn set_path(&mut self, path: String) {\n        self.path = path;\n        self.extension = extract_extension(&self.path);\n        self.clear_cache();\n    }\n\n    /// Change depth by adding/removing ../ prefixes\n    pub fn change_depth(&mut self, change: i32) {\n        let mut new_path = self.path.clone();\n        if change > 0 {\n            let clean_path = new_path.trim_start_matches('/');\n            new_path = format!(\"{}{}\", \"../\".repeat(change as usize), clean_path);\n        } else if change < 0 {\n            let count = change.unsigned_abs() as usize;\n            for _ in 0..count {\n                if let Some(rest) = new_path.strip_prefix(\"../\") {\n                    new_path = rest.to_string();\n                } else {\n                    break;\n                }\n            }\n        }\n\n        if new_path != self.path {\n            self.set_path(new_path);\n        }\n        self.clear_cache();\n    }\n\n    pub fn set_query(&mut self, query: Option<String>) {\n        self.query = query;\n        self.clear_cache();\n    }\n\n    pub fn set_fragment(&mut self, fragment: Option<String>) {\n        self.fragment = fragment;\n        self.clear_cache();\n    }\n\n    pub fn set_extension(&mut self, extension: Option<String>) {\n        self.extension = extension;\n        self.clear_cache();\n    }\n\n    pub fn set_debug(&mut self, debug: bool) {\n        self.debug = debug;\n    }\n\n    /// URL is only a fragment reference (#something)\n    pub fn is_only_fragment(&self) -> bool {\n        self.path.is_empty() && self.query.is_none() && self.host.is_none() && self.fragment.is_some()\n    }\n\n    /// Get full homepage URL (scheme://host[:port]) without trailing slash\n    pub fn get_full_homepage_url(&self) -> String {\n        let port_str = match self.port {\n            Some(p) => format!(\":{}\", p),\n            None => String::new(),\n        };\n        format!(\n            \"{}://{}{}\",\n            self.scheme.as_deref().unwrap_or(\"https\"),\n            self.host.as_deref().unwrap_or(\"\"),\n            port_str\n        )\n    }\n\n    /// Parse URL string and return ParsedUrl object\n    /// When base_url is provided, it fills in missing parts (scheme, host, port)\n    pub fn parse(url: &str, base_url: Option<&ParsedUrl>) -> Self {\n        let mut url = url.to_string();\n\n        if let Some(base) = base_url {\n            if url.starts_with(\"./\") {\n                // Relative URL via ./xyz\n                if base.path.ends_with('/') {\n                    url = format!(\"{}{}\", base.path, &url[2..]);\n                } else {\n                    let dir = parent_path(&base.path);\n                    let file = &url[2..];\n                    if dir == \"/\" {\n                        url = format!(\"/{}\", file);\n                    } else {\n                        url = format!(\"{}/{}\", dir, file);\n                    }\n                }\n            } else if !url.starts_with(\"http:\") && !url.starts_with(\"https:\") && RELATIVE_URL_RE.is_match(&url) {\n                // Relative URL via xyz/abc\n                if base.path.ends_with('/') {\n                    url = format!(\"{}{}\", base.path, url);\n                } else {\n                    url = format!(\"{}{}\", parent_path(&base.path), url);\n                }\n            } else if url.starts_with('/') && !url.starts_with(\"//\") {\n                // Absolute path /xyz/abc\n                url = format!(\"{}{}\", base.get_full_homepage_url(), url);\n            }\n        }\n\n        // Use url::Url for parsing when it's a full URL, otherwise manual parse\n        let (scheme, host, port_parsed, path, query, fragment) =\n            if url.starts_with(\"http://\") || url.starts_with(\"https://\") || url.starts_with(\"//\") {\n                // For protocol-relative URLs, prepend a scheme for parsing\n                let parse_url = if url.starts_with(\"//\") {\n                    format!(\"https:{}\", url)\n                } else {\n                    url.clone()\n                };\n\n                match url::Url::parse(&parse_url) {\n                    Ok(parsed) => {\n                        let s = if url.starts_with(\"//\") {\n                            None\n                        } else {\n                            Some(parsed.scheme().to_string())\n                        };\n                        let h = parsed.host_str().map(|h| h.to_string());\n                        let p = parsed.port();\n                        let path = if parsed.path().is_empty() {\n                            \"/\".to_string()\n                        } else {\n                            parsed.path().to_string()\n                        };\n                        let q = parsed.query().map(|q| q.to_string());\n                        let f = parsed.fragment().map(|f| f.to_string());\n                        (s, h, p, path, q, f)\n                    }\n                    Err(_) => parse_url_manually(&url),\n                }\n            } else {\n                parse_url_manually(&url)\n            };\n\n        let scheme = scheme.or_else(|| base_url.and_then(|b| b.scheme.clone()));\n        let has_parsed_host = host.is_some();\n        let host = host.or_else(|| base_url.and_then(|b| b.host.clone()));\n        let port = port_parsed.or_else(|| {\n            if !has_parsed_host {\n                base_url.and_then(|b| b.port)\n            } else {\n                None\n            }\n        });\n        let port = port.or(match scheme.as_deref() {\n            Some(\"http\") => Some(80),\n            _ => Some(443),\n        });\n\n        let path = if path.is_empty() && has_parsed_host {\n            \"/\".to_string()\n        } else {\n            path\n        };\n\n        let extension = if !path.is_empty() && path.contains('.') {\n            extract_extension(&path)\n        } else {\n            None\n        };\n\n        let domain_2nd_level = host.as_ref().and_then(|h| {\n            DOMAIN_2ND_LEVEL_RE\n                .captures(h)\n                .and_then(|c| c.get(1))\n                .map(|m| m.as_str().to_string())\n        });\n\n        Self::new(\n            url,\n            scheme,\n            host,\n            port,\n            path,\n            query,\n            fragment,\n            extension,\n            domain_2nd_level,\n        )\n    }\n\n    pub fn is_https(&self) -> bool {\n        self.scheme.as_deref() == Some(\"https\")\n    }\n\n    /// Extract 2nd-level domain from a host string (e.g., \"www.example.com\" -> \"example.com\")\n    pub fn extract_2nd_level_domain(host: &str) -> Option<String> {\n        DOMAIN_2ND_LEVEL_RE\n            .captures(host)\n            .and_then(|c| c.get(1))\n            .map(|m| m.as_str().to_string())\n    }\n\n    /// Get base name (last path part) of the URL\n    pub fn get_base_name(&self) -> Option<String> {\n        if self.path.is_empty() || self.path == \"/\" {\n            return None;\n        }\n\n        let path = self.path.trim_end_matches('/');\n        let result = path.rsplit('/').next().filter(|s| !s.is_empty());\n\n        result.map(|r| {\n            // if query string contains path, return path with this query\n            if let Some(ref q) = self.query\n                && (q.contains('/') || q.contains(\"%2F\"))\n            {\n                return format!(\"{}?{}\", r, q);\n            }\n            r.to_string()\n        })\n    }\n\n    /// Get depth of the URL path\n    /// / -> 0, /about -> 1, /about/me -> 2, etc.\n    pub fn get_depth(&self) -> usize {\n        let trimmed = self.path.trim_end_matches('/');\n        let slash_count = trimmed.matches('/').count();\n        let dotdot_count = self.path.matches(\"/..\").count();\n        slash_count.saturating_sub(dotdot_count)\n    }\n\n    fn clear_cache(&self) {\n        if let Ok(mut cache) = self.full_url_cache.lock() {\n            cache.clear();\n        }\n    }\n}\n\nimpl std::fmt::Display for ParsedUrl {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        write!(f, \"{}\", self.get_full_url(true, true))\n    }\n}\n\n/// Extract file extension from path.\nfn extract_extension(path: &str) -> Option<String> {\n    Path::new(path)\n        .extension()\n        .and_then(|e| e.to_str())\n        .filter(|e| !e.is_empty())\n        .map(|e| e.to_string())\n}\n\n/// Get parent directory of a path.\nfn parent_path(path: &str) -> String {\n    match path.rfind('/') {\n        Some(0) => \"/\".to_string(),\n        Some(pos) => path[..pos].to_string(),\n        None => \".\".to_string(),\n    }\n}\n\n/// Manual URL parsing for non-standard URLs (relative paths, fragments, etc.)\n#[allow(clippy::type_complexity)]\nfn parse_url_manually(\n    url: &str,\n) -> (\n    Option<String>,\n    Option<String>,\n    Option<u16>,\n    String,\n    Option<String>,\n    Option<String>,\n) {\n    let mut remaining = url;\n\n    // Extract fragment\n    let fragment = if let Some(hash_pos) = remaining.find('#') {\n        let f = &remaining[hash_pos + 1..];\n        remaining = &remaining[..hash_pos];\n        if f.is_empty() { None } else { Some(f.to_string()) }\n    } else {\n        None\n    };\n\n    // Extract query\n    let query = if let Some(q_pos) = remaining.find('?') {\n        let q = &remaining[q_pos + 1..];\n        remaining = &remaining[..q_pos];\n        if q.is_empty() { None } else { Some(q.to_string()) }\n    } else {\n        None\n    };\n\n    let path = remaining.to_string();\n\n    (None, None, None, path, query, fragment)\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_parse_full_url() {\n        let parsed = ParsedUrl::parse(\"https://example.com/path/to/page?q=1#section\", None);\n        assert_eq!(parsed.scheme.as_deref(), Some(\"https\"));\n        assert_eq!(parsed.host.as_deref(), Some(\"example.com\"));\n        assert_eq!(parsed.path, \"/path/to/page\");\n        assert_eq!(parsed.query.as_deref(), Some(\"q=1\"));\n        assert_eq!(parsed.fragment.as_deref(), Some(\"section\"));\n    }\n\n    #[test]\n    fn test_depth() {\n        assert_eq!(ParsedUrl::parse(\"/\", None).get_depth(), 0);\n        assert_eq!(ParsedUrl::parse(\"/about\", None).get_depth(), 1);\n        assert_eq!(ParsedUrl::parse(\"/about/\", None).get_depth(), 1);\n        assert_eq!(ParsedUrl::parse(\"/about/me\", None).get_depth(), 2);\n        assert_eq!(ParsedUrl::parse(\"/about/me/\", None).get_depth(), 2);\n    }\n\n    #[test]\n    fn test_is_static_file() {\n        let css = ParsedUrl::parse(\"https://example.com/style.css\", None);\n        assert!(css.is_static_file());\n\n        let html = ParsedUrl::parse(\"https://example.com/page.html\", None);\n        assert!(!html.is_static_file());\n\n        let page = ParsedUrl::parse(\"https://example.com/about\", None);\n        assert!(!page.is_static_file());\n    }\n\n    #[test]\n    fn test_relative_url_resolution() {\n        let base = ParsedUrl::parse(\"https://example.com/dir/page\", None);\n        let relative = ParsedUrl::parse(\"./other\", Some(&base));\n        assert_eq!(relative.path, \"/dir/other\");\n    }\n\n    #[test]\n    fn test_get_full_url() {\n        let parsed = ParsedUrl::parse(\"https://example.com/path?q=1#frag\", None);\n        assert_eq!(parsed.get_full_url(true, true), \"https://example.com/path?q=1#frag\");\n        assert_eq!(parsed.get_full_url(true, false), \"https://example.com/path?q=1\");\n        assert_eq!(parsed.get_full_url(false, true), \"/path?q=1#frag\");\n    }\n\n    #[test]\n    fn test_get_base_name() {\n        let p1 = ParsedUrl::parse(\"https://example.com/foo/bar\", None);\n        assert_eq!(p1.get_base_name(), Some(\"bar\".to_string()));\n\n        let p2 = ParsedUrl::parse(\"https://example.com/\", None);\n        assert_eq!(p2.get_base_name(), None);\n    }\n\n    #[test]\n    fn test_domain_2nd_level() {\n        let parsed = ParsedUrl::parse(\"https://sub.example.com/page\", None);\n        assert_eq!(parsed.domain_2nd_level.as_deref(), Some(\"example.com\"));\n    }\n}\n"
  },
  {
    "path": "src/engine/robots_txt.rs",
    "content": "// SiteOne Crawler - robots.txt parser\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse once_cell::sync::Lazy;\nuse regex::Regex;\n\n/// Regex for matching frontend asset extensions that are always allowed\nstatic ASSET_EXTENSION_RE: Lazy<Regex> = Lazy::new(|| {\n    Regex::new(r\"(?i)\\.(js|css|json|eot|ttf|woff2|woff|otf|png|gif|jpg|jpeg|ico|webp|avif|tif|bmp|svg)\").unwrap()\n});\n\n/// Regex for User-agent directive\nstatic USER_AGENT_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)^User-agent:\\s*(.*)\").unwrap());\n\n/// Regex for Disallow directive\nstatic DISALLOW_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)^Disallow:\\s*(.*)\").unwrap());\n\n/// Regex for Allow directive\nstatic ALLOW_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)^Allow:\\s*(.*)\").unwrap());\n\n/// Regex for Sitemap directive\nstatic SITEMAP_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)^Sitemap:\\s*(.*)\").unwrap());\n\n/// Regex to strip comments\nstatic COMMENT_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"#.*\").unwrap());\n\n/// Parsed robots.txt data for a single domain\n#[derive(Debug, Clone)]\npub struct RobotsTxt {\n    /// Disallowed paths for the relevant user agents (* and SiteOne-Crawler)\n    disallowed_paths: Vec<String>,\n    /// Allowed paths (override disallows) for the relevant user agents\n    allowed_paths: Vec<String>,\n    /// Sitemap URLs declared in robots.txt\n    sitemaps: Vec<String>,\n    /// Raw content of robots.txt\n    raw_content: String,\n}\n\nimpl RobotsTxt {\n    /// Parse robots.txt content and extract rules for * and SiteOne-Crawler user agents\n    pub fn parse(content: &str) -> Self {\n        let mut disallowed_paths = Vec::new();\n        let mut allowed_paths = Vec::new();\n        let mut sitemaps = Vec::new();\n        let mut current_user_agent: Option<String> = None;\n\n        for line in content.lines() {\n            // Remove comments\n            let line = COMMENT_RE.replace(line, \"\");\n            let line = line.trim();\n\n            if line.is_empty() {\n                continue;\n            }\n\n            if let Some(caps) = USER_AGENT_RE.captures(line) {\n                if let Some(m) = caps.get(1) {\n                    current_user_agent = Some(m.as_str().trim().to_string());\n                }\n            } else if let Some(ref ua) = current_user_agent\n                && (ua == \"*\" || ua == \"SiteOne-Crawler\")\n            {\n                if let Some(caps) = DISALLOW_RE.captures(line) {\n                    if let Some(m) = caps.get(1) {\n                        let path = m.as_str().trim().to_string();\n                        if !path.is_empty() {\n                            disallowed_paths.push(path);\n                        }\n                    }\n                } else if let Some(caps) = ALLOW_RE.captures(line)\n                    && let Some(m) = caps.get(1)\n                {\n                    let path = m.as_str().trim().to_string();\n                    if !path.is_empty() {\n                        allowed_paths.push(path);\n                    }\n                }\n            }\n\n            // Sitemaps are always parsed regardless of user-agent section\n            if let Some(caps) = SITEMAP_RE.captures(line)\n                && let Some(m) = caps.get(1)\n            {\n                let sitemap_url = m.as_str().trim().to_string();\n                if !sitemap_url.is_empty() {\n                    sitemaps.push(sitemap_url);\n                }\n            }\n        }\n\n        Self {\n            disallowed_paths,\n            allowed_paths,\n            sitemaps,\n            raw_content: content.to_string(),\n        }\n    }\n\n    /// Check if a URL path is allowed by the robots.txt rules.\n    /// Frontend assets (js, css, images, fonts) are always allowed.\n    ///\n    /// A URL is disallowed if its path starts with any disallowed path\n    /// (case-insensitive prefix match).\n    pub fn is_allowed(&self, url: &str) -> bool {\n        // Frontend assets are always allowed\n        if ASSET_EXTENSION_RE.is_match(url) {\n            return true;\n        }\n\n        // If no disallowed paths, everything is allowed\n        if self.disallowed_paths.is_empty() {\n            return true;\n        }\n\n        // Extract path from URL\n        let url_path = url::Url::parse(url).ok().map(|u| u.path().to_string()).or_else(|| {\n            // If it's not a full URL, try treating it as a path\n            let path_part = if let Some(q_pos) = url.find('?') {\n                &url[..q_pos]\n            } else {\n                url\n            };\n            Some(path_part.to_string())\n        });\n\n        let url_path = match url_path {\n            Some(p) => p,\n            None => return true,\n        };\n\n        // Check allowed paths first (they override disallows)\n        for allowed_path in &self.allowed_paths {\n            if path_matches(&url_path, allowed_path) {\n                return true;\n            }\n        }\n\n        // Check disallowed paths\n        for disallowed_path in &self.disallowed_paths {\n            if path_matches(&url_path, disallowed_path) {\n                return false;\n            }\n        }\n\n        true\n    }\n\n    /// Get sitemap URLs declared in robots.txt\n    pub fn get_sitemaps(&self) -> &[String] {\n        &self.sitemaps\n    }\n\n    /// Get disallowed paths\n    pub fn get_disallowed_paths(&self) -> &[String] {\n        &self.disallowed_paths\n    }\n\n    /// Get allowed paths\n    pub fn get_allowed_paths(&self) -> &[String] {\n        &self.allowed_paths\n    }\n\n    /// Get raw robots.txt content\n    pub fn get_raw_content(&self) -> &str {\n        &self.raw_content\n    }\n}\n\n/// Check if a URL path matches a robots.txt path pattern.\n/// Supports:\n/// - Simple prefix matching\n/// - Wildcard (*) matching\n/// - End-of-string ($) anchor\nfn path_matches(url_path: &str, pattern: &str) -> bool {\n    // Handle $ anchor at end\n    if let Some(pattern_without_anchor) = pattern.strip_suffix('$') {\n        if pattern_without_anchor.contains('*') {\n            return wildcard_match(url_path, pattern_without_anchor, true);\n        }\n        return url_path.to_lowercase() == pattern_without_anchor.to_lowercase();\n    }\n\n    // Handle wildcard patterns\n    if pattern.contains('*') {\n        return wildcard_match(url_path, pattern, false);\n    }\n\n    // Simple case-insensitive prefix match\n    url_path.to_lowercase().starts_with(&pattern.to_lowercase())\n}\n\n/// Match a URL path against a wildcard pattern (* matches any sequence of characters)\nfn wildcard_match(url_path: &str, pattern: &str, exact_end: bool) -> bool {\n    let parts: Vec<&str> = pattern.split('*').collect();\n    let url_lower = url_path.to_lowercase();\n    let mut search_from = 0;\n\n    for (i, part) in parts.iter().enumerate() {\n        if part.is_empty() {\n            continue;\n        }\n        let part_lower = part.to_lowercase();\n\n        match url_lower[search_from..].find(&part_lower) {\n            Some(pos) => {\n                // First part must match at start\n                if i == 0 && pos != 0 {\n                    return false;\n                }\n                search_from += pos + part_lower.len();\n            }\n            None => return false,\n        }\n    }\n\n    if exact_end {\n        // The last part must end at the end of the URL path\n        return search_from == url_lower.len();\n    }\n\n    true\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_parse_basic() {\n        let content = r#\"\nUser-agent: *\nDisallow: /admin/\nDisallow: /private/\nAllow: /admin/public/\n\nSitemap: https://example.com/sitemap.xml\n\"#;\n        let robots = RobotsTxt::parse(content);\n        assert_eq!(robots.disallowed_paths.len(), 2);\n        assert_eq!(robots.allowed_paths.len(), 1);\n        assert_eq!(robots.sitemaps.len(), 1);\n        assert_eq!(robots.sitemaps[0], \"https://example.com/sitemap.xml\");\n    }\n\n    #[test]\n    fn test_is_allowed() {\n        let content = r#\"\nUser-agent: *\nDisallow: /admin/\nDisallow: /private/\n\"#;\n        let robots = RobotsTxt::parse(content);\n        assert!(robots.is_allowed(\"/public/page\"));\n        assert!(!robots.is_allowed(\"/admin/settings\"));\n        assert!(!robots.is_allowed(\"/private/data\"));\n        assert!(robots.is_allowed(\"/\"));\n    }\n\n    #[test]\n    fn test_assets_always_allowed() {\n        let content = r#\"\nUser-agent: *\nDisallow: /\n\"#;\n        let robots = RobotsTxt::parse(content);\n        assert!(robots.is_allowed(\"/style.css\"));\n        assert!(robots.is_allowed(\"/script.js\"));\n        assert!(robots.is_allowed(\"/image.png\"));\n        assert!(!robots.is_allowed(\"/page\"));\n    }\n\n    #[test]\n    fn test_wildcard_matching() {\n        assert!(path_matches(\"/search?q=test\", \"/search\"));\n        assert!(path_matches(\"/admin/page\", \"/admin/\"));\n        assert!(!path_matches(\"/public/page\", \"/admin/\"));\n    }\n\n    #[test]\n    fn test_wildcard_star() {\n        assert!(path_matches(\"/path/to/file.pdf\", \"/*.pdf\"));\n        assert!(!path_matches(\"/path/to/file.html\", \"/*.pdf\"));\n    }\n\n    #[test]\n    fn test_anchor_matching() {\n        assert!(path_matches(\"/page.html\", \"/page.html$\"));\n        assert!(!path_matches(\"/page.html?q=1\", \"/page.html$\"));\n    }\n\n    #[test]\n    fn test_siteone_crawler_user_agent() {\n        let content = r#\"\nUser-agent: SiteOne-Crawler\nDisallow: /blocked/\n\nUser-agent: Googlebot\nDisallow: /google-only/\n\"#;\n        let robots = RobotsTxt::parse(content);\n        assert!(!robots.is_allowed(\"/blocked/page\"));\n        // /google-only/ is not disallowed for SiteOne-Crawler or *\n        assert!(robots.is_allowed(\"/google-only/page\"));\n    }\n\n    #[test]\n    fn test_comments_stripped() {\n        let content = r#\"\nUser-agent: * # all bots\nDisallow: /admin/ # admin panel\n# Disallow: /not-really-disallowed/\n\"#;\n        let robots = RobotsTxt::parse(content);\n        assert_eq!(robots.disallowed_paths.len(), 1);\n        assert_eq!(robots.disallowed_paths[0], \"/admin/\");\n    }\n\n    #[test]\n    fn test_empty_disallow() {\n        let content = r#\"\nUser-agent: *\nDisallow:\n\"#;\n        let robots = RobotsTxt::parse(content);\n        assert!(robots.disallowed_paths.is_empty());\n        assert!(robots.is_allowed(\"/anything\"));\n    }\n\n    #[test]\n    fn test_multiple_sitemaps() {\n        let content = r#\"\nUser-agent: *\nDisallow:\n\nSitemap: https://example.com/sitemap1.xml\nSitemap: https://example.com/sitemap2.xml\n\"#;\n        let robots = RobotsTxt::parse(content);\n        assert_eq!(robots.sitemaps.len(), 2);\n    }\n}\n"
  },
  {
    "path": "src/error.rs",
    "content": "// SiteOne Crawler - Error types\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\nuse thiserror::Error;\r\n\r\n#[derive(Error, Debug)]\r\npub enum CrawlerError {\r\n    #[error(\"IO error: {0}\")]\r\n    Io(#[from] std::io::Error),\r\n\r\n    #[error(\"HTTP error: {0}\")]\r\n    Http(#[from] reqwest::Error),\r\n\r\n    #[error(\"URL parse error: {0}\")]\r\n    UrlParse(#[from] url::ParseError),\r\n\r\n    #[error(\"Parse error: {0}\")]\r\n    Parse(String),\r\n\r\n    #[error(\"Config error: {0}\")]\r\n    Config(String),\r\n\r\n    #[error(\"Regex error: {0}\")]\r\n    Regex(#[from] regex::Error),\r\n\r\n    #[error(\"JSON error: {0}\")]\r\n    Json(#[from] serde_json::Error),\r\n\r\n    #[error(\"XML error: {0}\")]\r\n    Xml(#[from] quick_xml::Error),\r\n\r\n    #[error(\"DNS resolution error: {0}\")]\r\n    Dns(String),\r\n\r\n    #[error(\"TLS/SSL error: {0}\")]\r\n    Tls(String),\r\n\r\n    #[error(\"Mail error: {0}\")]\r\n    Mail(String),\r\n\r\n    #[error(\"Export error: {0}\")]\r\n    Export(String),\r\n\r\n    #[error(\"Analysis error: {0}\")]\r\n    Analysis(String),\r\n\r\n    #[error(\"{0}\")]\r\n    Other(String),\r\n}\r\n\r\npub type CrawlerResult<T> = std::result::Result<T, CrawlerError>;\r\n"
  },
  {
    "path": "src/export/base_exporter.rs",
    "content": "// SiteOne Crawler - BaseExporter (shared helpers for all exporters)\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n\nuse std::fs;\nuse std::path::Path;\n\nuse chrono::Local;\nuse regex::Regex;\n\nuse crate::error::{CrawlerError, CrawlerResult};\n\n/// Get the export file path with optional host and timestamp suffixes.\n///\n/// - If the file has no extension, the given `default_extension` is appended.\n/// - If `add_host` is true, the host is inserted before the extension.\n/// - If `add_timestamp` is true, a timestamp is inserted before the extension.\npub fn get_export_file_path(\n    file: &str,\n    default_extension: &str,\n    add_host: bool,\n    host: Option<&str>,\n    add_timestamp: bool,\n) -> CrawlerResult<String> {\n    let mut file = file.to_string();\n\n    // Add default extension if missing\n    let has_extension = Regex::new(r\"\\.[a-zA-Z0-9]{1,10}$\")\n        .map(|re| re.is_match(&file))\n        .unwrap_or(false);\n    if !has_extension {\n        file = format!(\"{}.{}\", file, default_extension);\n    }\n\n    // Add host before extension\n    if add_host\n        && let Some(h) = host\n        && let Ok(re) = Regex::new(r\"\\.[a-zA-Z0-9]{1,10}$\")\n    {\n        file = re\n            .replace(&file, |caps: &regex::Captures| {\n                format!(\".{}{}\", h, caps.get(0).map_or(\"\", |m| m.as_str()))\n            })\n            .to_string();\n    }\n\n    // Add timestamp before extension\n    if add_timestamp {\n        let timestamp = Local::now().format(\"%Y-%m-%d.%H-%M-%S\").to_string();\n        if let Ok(re) = Regex::new(r\"\\.[a-zA-Z0-9]{1,10}$\") {\n            file = re\n                .replace(&file, |caps: &regex::Captures| {\n                    format!(\".{}{}\", timestamp, caps.get(0).map_or(\"\", |m| m.as_str()))\n                })\n                .to_string();\n        }\n    }\n\n    // Ensure parent directory exists and is writable\n    let path = Path::new(&file);\n    if let Some(parent) = path.parent()\n        && !parent.exists()\n    {\n        fs::create_dir_all(parent).map_err(|e| {\n            CrawlerError::Export(format!(\"Cannot create output directory '{}': {}\", parent.display(), e))\n        })?;\n    }\n\n    Ok(file)\n}\n"
  },
  {
    "path": "src/export/exporter.rs",
    "content": "// SiteOne Crawler - Exporter trait\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n\nuse crate::error::CrawlerResult;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\n\n/// Trait for all exporters (file, sitemap, upload, mailer, offline, markdown).\n/// Each exporter can save crawl results in a different format or send them somewhere.\npub trait Exporter: Send + Sync {\n    /// Get the name of this exporter (for logging/debugging).\n    fn get_name(&self) -> &str;\n\n    /// Should this exporter be activated based on the provided options?\n    fn should_be_activated(&self) -> bool;\n\n    /// Perform the export (save to file, send to server, etc.).\n    /// Uses the Output trait to report progress/results to the user.\n    fn export(&mut self, status: &Status, output: &dyn Output) -> CrawlerResult<()>;\n}\n"
  },
  {
    "path": "src/export/file_exporter.rs",
    "content": "// SiteOne Crawler - FileExporter\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Saves crawl results to HTML, JSON, and/or text files.\n\nuse std::fs;\nuse std::time::Instant;\n\nuse crate::error::{CrawlerError, CrawlerResult};\nuse crate::export::base_exporter;\nuse crate::export::exporter::Exporter;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::utils;\n\npub struct FileExporter {\n    /// Path for HTML report output (--output-html-report)\n    pub output_html_report: Option<String>,\n    /// Comma-separated list of sections for HTML report (--html-report-options)\n    pub html_report_options: Option<String>,\n    /// Path for JSON output (--output-json-file)\n    pub output_json_file: Option<String>,\n    /// Path for text output (--output-text-file)\n    pub output_text_file: Option<String>,\n    /// Add timestamp to output filename (--add-timestamp-to-output-file)\n    pub add_timestamp_to_output_file: bool,\n    /// Add host to output filename (--add-host-to-output-file)\n    pub add_host_to_output_file: bool,\n    /// Initial host from the crawled URL (for filename generation)\n    pub initial_host: Option<String>,\n    /// Cached text output to save to file\n    pub text_output_content: Option<String>,\n    /// Cached JSON output to save to file\n    pub json_output_content: Option<String>,\n    /// Cached HTML report content to save to file\n    pub html_report_content: Option<String>,\n}\n\nimpl FileExporter {\n    pub fn new(\n        output_html_report: Option<String>,\n        html_report_options: Option<String>,\n        output_json_file: Option<String>,\n        output_text_file: Option<String>,\n        add_timestamp_to_output_file: bool,\n        add_host_to_output_file: bool,\n        initial_host: Option<String>,\n    ) -> Self {\n        Self {\n            output_html_report,\n            html_report_options,\n            output_json_file,\n            output_text_file,\n            add_timestamp_to_output_file,\n            add_host_to_output_file,\n            initial_host,\n            text_output_content: None,\n            json_output_content: None,\n            html_report_content: None,\n        }\n    }\n\n    /// Set the text output content to be saved (from TextOutput)\n    pub fn set_text_output_content(&mut self, content: String) {\n        self.text_output_content = Some(content);\n    }\n\n    /// Set the JSON output content to be saved (from JsonOutput)\n    pub fn set_json_output_content(&mut self, content: String) {\n        self.json_output_content = Some(content);\n    }\n\n    /// Set the HTML report content to be saved (from HtmlReport)\n    pub fn set_html_report_content(&mut self, content: String) {\n        self.html_report_content = Some(content);\n    }\n\n    /// Get the export file path with host/timestamp modifications.\n    fn get_export_file_path(&self, file: &str, extension: &str) -> CrawlerResult<String> {\n        base_exporter::get_export_file_path(\n            file,\n            extension,\n            self.add_host_to_output_file,\n            self.initial_host.as_deref(),\n            self.add_timestamp_to_output_file,\n        )\n    }\n}\n\nimpl Exporter for FileExporter {\n    fn get_name(&self) -> &str {\n        \"FileExporter\"\n    }\n\n    fn should_be_activated(&self) -> bool {\n        self.output_html_report.is_some() || self.output_json_file.is_some() || self.output_text_file.is_some()\n    }\n\n    fn export(&mut self, status: &Status, _output: &dyn Output) -> CrawlerResult<()> {\n        // Export text file\n        if let Some(ref output_text_file) = self.output_text_file.clone() {\n            let start = Instant::now();\n            let report_file = self.get_export_file_path(output_text_file, \"txt\")?;\n\n            let content = match &self.text_output_content {\n                Some(c) => utils::remove_ansi_colors(c),\n                None => {\n                    return Err(CrawlerError::Export(\n                        \"Text output content not available for FileExporter\".to_string(),\n                    ));\n                }\n            };\n\n            fs::write(&report_file, &content).map_err(|e| {\n                CrawlerError::Export(format!(\"Failed to write text report to '{}': {}\", report_file, e))\n            })?;\n\n            let elapsed = start.elapsed().as_secs_f64();\n            let report_file_display = utils::get_output_formatted_path(&report_file);\n            status.add_info_to_summary(\n                \"export-to-text\",\n                &format!(\n                    \"Text report saved to '{}' and took {}\",\n                    report_file_display,\n                    utils::get_formatted_duration(elapsed)\n                ),\n            );\n        }\n\n        // Export JSON file\n        if let Some(ref output_json_file) = self.output_json_file.clone() {\n            let start = Instant::now();\n            let report_file = self.get_export_file_path(output_json_file, \"json\")?;\n\n            let content = match &self.json_output_content {\n                Some(c) => c.clone(),\n                None => {\n                    return Err(CrawlerError::Export(\n                        \"JSON output content not available for FileExporter\".to_string(),\n                    ));\n                }\n            };\n\n            fs::write(&report_file, &content).map_err(|e| {\n                CrawlerError::Export(format!(\"Failed to write JSON report to '{}': {}\", report_file, e))\n            })?;\n\n            let elapsed = start.elapsed().as_secs_f64();\n            let report_file_display = utils::get_output_formatted_path(&report_file);\n            status.add_info_to_summary(\n                \"export-to-json\",\n                &format!(\n                    \"JSON report saved to '{}' and took {}\",\n                    report_file_display,\n                    utils::get_formatted_duration(elapsed)\n                ),\n            );\n        }\n\n        // Export HTML report\n        if let Some(ref output_html_report) = self.output_html_report.clone() {\n            let start = Instant::now();\n            let report_file = self.get_export_file_path(output_html_report, \"html\")?;\n\n            let content = match &self.html_report_content {\n                Some(c) => c.clone(),\n                None => {\n                    return Err(CrawlerError::Export(\n                        \"HTML report content not available. Set it via set_html_report_content() before export.\"\n                            .to_string(),\n                    ));\n                }\n            };\n\n            fs::write(&report_file, &content).map_err(|e| {\n                CrawlerError::Export(format!(\"Failed to write HTML report to '{}': {}\", report_file, e))\n            })?;\n\n            let elapsed = start.elapsed().as_secs_f64();\n            let report_file_display = utils::get_output_formatted_path(&report_file);\n            status.add_info_to_summary(\n                \"export-to-html\",\n                &format!(\n                    \"HTML report saved to '{}' and took {}\",\n                    report_file_display,\n                    utils::get_formatted_duration(elapsed)\n                ),\n            );\n        }\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "src/export/html_report/badge.rs",
    "content": "// SiteOne Crawler - Badge for HTML Report\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\n/// Badge colors used in HTML report tabs and content\r\n#[derive(Debug, Clone, PartialEq, Eq)]\r\npub enum BadgeColor {\r\n    Red,\r\n    Orange,\r\n    Green,\r\n    Blue,\r\n    Neutral,\r\n}\r\n\r\nimpl BadgeColor {\r\n    pub fn as_css_class(&self) -> &'static str {\r\n        match self {\r\n            BadgeColor::Red => \"red\",\r\n            BadgeColor::Orange => \"orange\",\r\n            BadgeColor::Green => \"green\",\r\n            BadgeColor::Blue => \"blue\",\r\n            BadgeColor::Neutral => \"neutral\",\r\n        }\r\n    }\r\n}\r\n\r\n/// Badge displayed in tab titles or content to show counts/status\r\n#[derive(Debug, Clone)]\r\npub struct Badge {\r\n    pub value: String,\r\n    pub color: BadgeColor,\r\n    pub title: Option<String>,\r\n}\r\n\r\nimpl Badge {\r\n    pub fn new(value: String, color: BadgeColor) -> Self {\r\n        Self {\r\n            value,\r\n            color,\r\n            title: None,\r\n        }\r\n    }\r\n\r\n    pub fn with_title(value: String, color: BadgeColor, title: &str) -> Self {\r\n        Self {\r\n            value,\r\n            color,\r\n            title: Some(title.to_string()),\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "src/export/html_report/mod.rs",
    "content": "// SiteOne Crawler - HTML Report module\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\npub mod badge;\r\npub mod report;\r\npub mod tab;\r\n"
  },
  {
    "path": "src/export/html_report/report.rs",
    "content": "// SiteOne Crawler - HTML Report Generator\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse regex::Regex;\n\nuse crate::components::summary::item_status::ItemStatus;\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::output::output::BasicStats;\nuse crate::result::status::Status;\nuse crate::result::visited_url;\nuse crate::scoring::scorer;\nuse crate::utils;\nuse crate::version;\n\nuse super::badge::{Badge, BadgeColor};\nuse super::tab::Tab;\n\n// SuperTable apl_code constants (matching the analyzer module constants)\nconst SUPER_TABLE_VISITED_URLS: &str = \"visited-urls\";\n\n// Analysis manager\nconst ST_ANALYSIS_STATS: &str = \"analysis-stats\";\n\n// Content processor\nconst ST_CONTENT_PROCESSORS_STATS: &str = \"content-processors-stats\";\n\n// Analyzers\nconst ST_HEADERS: &str = \"headers\";\nconst ST_HEADERS_VALUES: &str = \"headers-values\";\nconst ST_SEO: &str = \"seo\";\nconst ST_OPEN_GRAPH: &str = \"open-graph\";\nconst ST_SEO_HEADINGS: &str = \"seo-headings\";\nconst ST_DNS: &str = \"dns\";\nconst ST_CERTIFICATE_INFO: &str = \"certificate-info\";\nconst ST_NON_UNIQUE_TITLES: &str = \"non-unique-titles\";\nconst ST_NON_UNIQUE_DESCRIPTIONS: &str = \"non-unique-descriptions\";\nconst ST_CONTENT_TYPES: &str = \"content-types\";\nconst ST_CONTENT_MIME_TYPES: &str = \"content-types-raw\";\nconst ST_SKIPPED_SUMMARY: &str = \"skipped-summary\";\nconst ST_SKIPPED: &str = \"skipped\";\nconst ST_CACHING_PER_CONTENT_TYPE: &str = \"caching-per-content-type\";\nconst ST_CACHING_PER_DOMAIN: &str = \"caching-per-domain\";\nconst ST_CACHING_PER_DOMAIN_AND_CONTENT_TYPE: &str = \"caching-per-domain-and-content-type\";\nconst ST_REDIRECTS: &str = \"redirects\";\nconst ST_404: &str = \"404\";\nconst ST_FASTEST_URLS: &str = \"fastest-urls\";\nconst ST_SLOWEST_URLS: &str = \"slowest-urls\";\nconst ST_BEST_PRACTICES: &str = \"best-practices\";\nconst ST_ACCESSIBILITY: &str = \"accessibility\";\nconst ST_EXTERNAL_URLS: &str = \"external-urls\";\nconst ST_SECURITY: &str = \"security\";\nconst ST_SOURCE_DOMAINS: &str = \"source-domains\";\n\n/// Analysis names for Best Practices\nconst BEST_PRACTICE_ANALYSIS_NAMES: &[&str] = &[\n    \"Large inline SVGs\",\n    \"Duplicate inline SVGs\",\n    \"Invalid inline SVGs\",\n    \"Missing quotes on attributes\",\n    \"DOM depth\",\n    \"Heading structure\",\n    \"Non-clickable phone numbers\",\n    \"Title uniqueness\",\n    \"Description uniqueness\",\n];\n\n/// Analysis names for Accessibility\nconst ACCESSIBILITY_ANALYSIS_NAMES: &[&str] = &[\n    \"Valid HTML\",\n    \"Missing image alt attributes\",\n    \"Missing form labels\",\n    \"Missing aria labels\",\n    \"Missing roles\",\n    \"Missing html lang attribute\",\n];\n\n/// Analysis names for Security\nconst SECURITY_ANALYSIS_NAMES: &[&str] = &[\"Security headers\"];\n\n/// Severity order for sorting\nconst SEVERITY_ORDER_CRITICAL: i32 = 1;\nconst SEVERITY_ORDER_WARNING: i32 = 2;\nconst SEVERITY_ORDER_NOTICE: i32 = 3;\n\n/// Max example URLs to show per finding\nconst MAX_EXAMPLE_URLS: usize = 5;\n\n/// HTML template embedded at compile time\nconst TEMPLATE_HTML: &str = include_str!(\"template.html\");\n\n/// SuperTable apl_codes that are handled by dedicated tabs (not shown as generic tabs)\nconst SKIPPED_SUPER_TABLES: &[&str] = &[\n    ST_ANALYSIS_STATS,\n    ST_HEADERS_VALUES,\n    ST_SEO,\n    ST_OPEN_GRAPH,\n    ST_DNS,\n    ST_CERTIFICATE_INFO,\n    ST_NON_UNIQUE_TITLES,\n    ST_NON_UNIQUE_DESCRIPTIONS,\n    ST_CONTENT_MIME_TYPES,\n    ST_SKIPPED,\n    ST_CACHING_PER_DOMAIN,\n    ST_CACHING_PER_DOMAIN_AND_CONTENT_TYPE,\n    ST_CONTENT_PROCESSORS_STATS,\n];\n\n/// Lightweight extracted info from a SuperTable (since SuperTable is not Clone)\nstruct SuperTableInfo {\n    apl_code: String,\n    title: String,\n    forced_tab_label: Option<String>,\n    html_output: String,\n    total_rows: usize,\n    data: Vec<HashMap<String, String>>,\n}\n\n/// Extract info from a SuperTable reference\nfn extract_info(st: &SuperTable) -> SuperTableInfo {\n    SuperTableInfo {\n        apl_code: st.apl_code.clone(),\n        title: st.title.clone(),\n        forced_tab_label: st.forced_tab_label.clone(),\n        html_output: st.get_html_output(),\n        total_rows: st.get_total_rows(),\n        data: st.get_data().to_vec(),\n    }\n}\n\n/// SuperTable tab order\nfn get_super_table_order(apl_code: &str) -> i32 {\n    const ORDERS: &[&str] = &[\n        SUPER_TABLE_VISITED_URLS,\n        ST_BEST_PRACTICES,\n        ST_ACCESSIBILITY,\n        ST_SECURITY,\n        ST_SEO,\n        ST_SEO_HEADINGS,\n        ST_404,\n        ST_REDIRECTS,\n        ST_SKIPPED_SUMMARY,\n        ST_EXTERNAL_URLS,\n        ST_FASTEST_URLS,\n        ST_SLOWEST_URLS,\n        ST_CONTENT_TYPES,\n        ST_SOURCE_DOMAINS,\n        ST_HEADERS,\n        ST_CACHING_PER_CONTENT_TYPE,\n        ST_DNS,\n    ];\n\n    ORDERS\n        .iter()\n        .position(|&code| code == apl_code)\n        .map(|i| i as i32)\n        .unwrap_or(1000)\n}\n\n/// Map SuperTable apl_code to section name for filtering\nfn get_section_name_by_apl_code(apl_code: &str) -> Option<&'static str> {\n    match apl_code {\n        \"accessibility\" => Some(\"accessibility\"),\n        \"404\" => Some(\"404-pages\"),\n        \"source-domains\" => Some(\"source-domains\"),\n        \"caching-per-content-type\" | \"caching-per-domain\" | \"caching-per-domain-and-content-type\" => Some(\"caching\"),\n        \"headers\" | \"headers-values\" => Some(\"headers\"),\n        \"slowest-urls\" => Some(\"slowest-urls\"),\n        \"fastest-urls\" => Some(\"fastest-urls\"),\n        \"best-practices\" => Some(\"best-practices\"),\n        \"skipped-summary\" | \"skipped\" => Some(\"skipped-urls\"),\n        \"external-urls\" => Some(\"external-urls\"),\n        \"redirects\" => Some(\"redirects\"),\n        \"security\" => Some(\"security\"),\n        \"content-types\" | \"content-types-raw\" => Some(\"content-types\"),\n        \"dns\" | \"certificate-info\" => Some(\"dns-ssl\"),\n        \"seo\" | \"open-graph\" | \"seo-headings\" | \"non-unique-titles\" | \"non-unique-descriptions\" => {\n            Some(\"seo-opengraph\")\n        }\n        _ => None,\n    }\n}\n\n/// HTML Report generator\npub struct HtmlReport<'a> {\n    status: &'a Status,\n    #[allow(dead_code)]\n    max_example_urls: usize,\n    allowed_sections: Option<Vec<String>>,\n}\n\nimpl<'a> HtmlReport<'a> {\n    pub fn new(status: &'a Status, max_example_urls: usize, html_report_options: Option<&str>) -> Self {\n        let allowed_sections = html_report_options.filter(|s| !s.is_empty()).map(|opts| {\n            opts.split(',')\n                .map(|s| s.trim().to_string())\n                .filter(|s| !s.is_empty())\n                .collect()\n        });\n\n        Self {\n            status,\n            max_example_urls,\n            allowed_sections,\n        }\n    }\n\n    /// Generate the complete HTML report\n    pub fn get_html(&self) -> String {\n        let mut html = TEMPLATE_HTML.to_string();\n        let template_variables = self.get_template_variables();\n\n        for (var_name, var_value) in &template_variables {\n            let placeholder = format!(\"{{${}}}\", var_name);\n            html = html.replace(&placeholder, var_value);\n        }\n\n        self.finalize_html(html)\n    }\n\n    /// Check if a section is allowed based on html_report_options\n    fn is_section_allowed(&self, section_name: &str) -> bool {\n        match &self.allowed_sections {\n            None => true,\n            Some(sections) => sections.iter().any(|s| s == section_name),\n        }\n    }\n\n    /// Build template variables\n    fn get_template_variables(&self) -> HashMap<String, String> {\n        let info = self.status.get_crawler_info();\n        let initial_host = self.get_initial_host();\n        let initial_url = self.get_initial_url();\n        let tabs = self.get_tabs();\n\n        let command = info.command.clone();\n        // Strip leading binary name prefix (e.g. \"crawler.php \")\n        let command = match Regex::new(r\"^\\S+\\.php\\s+\") {\n            Ok(re) => re.replace(&command, \"\").to_string(),\n            _ => command,\n        };\n        let command = utils::get_safe_command(&command);\n\n        let mut vars = HashMap::new();\n        vars.insert(\"initialHost\".to_string(), initial_host);\n        vars.insert(\"initialUrl\".to_string(), initial_url);\n        vars.insert(\"version\".to_string(), version::CODE.to_string());\n        vars.insert(\"executedAt\".to_string(), info.executed_at.clone());\n        vars.insert(\"command\".to_string(), command);\n        vars.insert(\"hostname\".to_string(), info.hostname.clone());\n        vars.insert(\"userAgent\".to_string(), info.final_user_agent.clone());\n        vars.insert(\"tabs\".to_string(), self.get_tabs_html(&tabs));\n        vars.insert(\"tabsRadios\".to_string(), self.get_tabs_radios(&tabs));\n        vars.insert(\"tabsContent\".to_string(), self.get_tabs_content_html(&tabs));\n        vars.insert(\"tabsCss\".to_string(), self.get_tabs_css(&tabs));\n        vars\n    }\n\n    /// Post-process the HTML: convert colors, add badge classes, etc.\n    fn finalize_html(&self, mut html: String) -> String {\n        // Add badge class to colored spans\n        if let Ok(re) = Regex::new(r#\"(<span)\\s+(style=\"background-color:[^\"]+\">)\"#) {\n            html = re.replace_all(&html, r#\"$1 class=\"badge\" $2\"#).to_string();\n        }\n        if let Ok(re) = Regex::new(r#\"(<span)\\s+(style=\"color:[^\"]+\">)\"#) {\n            html = re.replace_all(&html, r#\"$1 class=\"badge in-table\" $2\"#).to_string();\n        }\n\n        html = html.replace(\n            r#\"style=\"background-color: #ffff00\"\"#,\n            r#\"style=\"background-color: #ffff00; color: #1F2937\"\"#,\n        );\n\n        if let Ok(re) = Regex::new(r\"(<td data-value='[0-9]+'[^>]*>)([0-9\\-]+)(</td>)\") {\n            html = re\n                .replace_all(&html, r#\"$1<span class=\"badge\">$2</span>$3\"#)\n                .to_string();\n        }\n\n        // Change magenta to orange\n        html = html.replace(\"color: #ff00ff\", \"color: #ff9234\");\n\n        // Add spaces around slashes in content-type cells\n        if let Ok(re) = Regex::new(r\"(?i)(<td[^>]*>)(\\s*[a-z0-9. /]+/[a-z0-9. /]+\\s*)(</td>)\") {\n            html = re\n                .replace_all(&html, |caps: &regex::Captures| {\n                    let td_open = caps.get(1).map_or(\"\", |m| m.as_str());\n                    let content = caps.get(2).map_or(\"\", |m| m.as_str());\n                    let td_close = caps.get(3).map_or(\"\", |m| m.as_str());\n                    match Regex::new(r\"\\s*/\\s*\") {\n                        Ok(slash_re) => {\n                            let cleaned = slash_re.replace_all(content, \" / \");\n                            format!(\"{}{}{}\", td_open, cleaned, td_close)\n                        }\n                        _ => {\n                            format!(\"{}{}{}\", td_open, content, td_close)\n                        }\n                    }\n                })\n                .to_string();\n        }\n\n        // Replace specific badge color styles with CSS classes\n        let color_replacements = [\n            (\n                r#\"<span class=\"badge in-table\" style=\"color: #00ff00\">\"#,\n                r#\"<span class=\"badge green\">\"#,\n            ),\n            (\n                r#\"<span class=\"badge in-table\" style=\"color: #ff9234\">\"#,\n                r#\"<span class=\"badge orange\">\"#,\n            ),\n            (\n                r#\"<span class=\"badge in-table\" style=\"color: #ff0000\">\"#,\n                r#\"<span class=\"badge red\">\"#,\n            ),\n            (\n                r#\"<span class=\"badge in-table\" style=\"background-color: #ffff00; color: #1F2937\">\"#,\n                r#\"<span class=\"badge yellow\">\"#,\n            ),\n            (\n                r#\"<span class=\"badge\" style=\"background-color: #ffff00; color: #1F2937\">\"#,\n                r#\"<span class=\"badge yellow\">\"#,\n            ),\n            (\n                r#\"<span class=\"badge in-table\" style=\"color: #ffff00\">\"#,\n                r#\"<span class=\"badge yellow\">\"#,\n            ),\n            (\n                r#\"<span class=\"badge in-table\" style=\"color: #0000ff\">\"#,\n                r#\"<span class=\"badge blue\">\"#,\n            ),\n        ];\n\n        for (from, to) in &color_replacements {\n            html = html.replace(from, to);\n        }\n\n        // Remove excess whitespace from HTML\n        html = remove_whitespaces_from_html(&html);\n\n        html\n    }\n\n    /// Extract info from all SuperTables (within mutex closures) so we can work with them freely.\n    fn extract_all_super_table_infos(&self) -> Vec<SuperTableInfo> {\n        let mut all = Vec::new();\n        let host = Some(self.get_initial_host());\n        let scheme = Some(self.get_initial_scheme());\n        let initial_url = Some(self.get_initial_url());\n        self.status.with_super_tables_at_beginning_mut(|tables| {\n            for st in tables.iter_mut() {\n                st.set_host_to_strip_from_urls(host.clone(), scheme.clone());\n                st.set_initial_url(initial_url.clone());\n                all.push(extract_info(st));\n            }\n        });\n        self.status.with_super_tables_at_end_mut(|tables| {\n            for st in tables.iter_mut() {\n                st.set_host_to_strip_from_urls(host.clone(), scheme.clone());\n                st.set_initial_url(initial_url.clone());\n                all.push(extract_info(st));\n            }\n        });\n        all\n    }\n\n    /// Gather all tabs for the report\n    fn get_tabs(&self) -> Vec<Tab> {\n        let mut tabs: Vec<Tab> = Vec::new();\n\n        if self.is_section_allowed(\"summary\")\n            && let Some(tab) = self.get_summary_tab()\n        {\n            tabs.push(tab);\n        }\n        if self.is_section_allowed(\"seo-opengraph\")\n            && let Some(tab) = self.get_seo_and_opengraph_tab()\n        {\n            tabs.push(tab);\n        }\n        if self.is_section_allowed(\"image-gallery\")\n            && let Some(tab) = self.get_image_gallery_tab()\n        {\n            tabs.push(tab);\n        }\n        if self.is_section_allowed(\"video-gallery\")\n            && let Some(tab) = self.get_video_gallery_tab()\n        {\n            tabs.push(tab);\n        }\n        if self.is_section_allowed(\"visited-urls\") {\n            tabs.push(self.get_visited_urls_tab());\n        }\n        if self.is_section_allowed(\"dns-ssl\")\n            && let Some(tab) = self.get_dns_and_ssl_tls_tab()\n        {\n            tabs.push(tab);\n        }\n        if self.is_section_allowed(\"crawler-stats\") {\n            tabs.push(self.get_crawler_stats_tab());\n        }\n        if self.is_section_allowed(\"crawler-info\") {\n            tabs.push(self.get_crawler_info_tab());\n        }\n\n        // Add tabs from SuperTables (analysis results)\n        let super_table_tabs = self.get_super_table_tabs();\n        tabs.extend(super_table_tabs);\n\n        // Remove empty tabs\n        tabs.retain(|tab| !tab.tab_content.is_empty());\n\n        // Sort tabs by order\n        tabs.sort_by_key(|tab| tab.get_final_sort_order());\n\n        tabs\n    }\n\n    /// Build tabs from SuperTables that are not in SKIPPED_SUPER_TABLES\n    fn get_super_table_tabs(&self) -> Vec<Tab> {\n        let all_infos = self.extract_all_super_table_infos();\n        let mut result = Vec::new();\n\n        // Build analysis detail sub-tables\n        let analysis_detail_html = self.build_analysis_detail_tables();\n\n        for info in &all_infos {\n            if SKIPPED_SUPER_TABLES.contains(&info.apl_code.as_str()) {\n                continue;\n            }\n\n            // Check if this SuperTable's section is allowed\n            if let Some(section_name) = get_section_name_by_apl_code(&info.apl_code)\n                && !self.is_section_allowed(section_name)\n            {\n                continue;\n            }\n\n            let badges = get_super_table_badges_by_apl_code(info, &all_infos);\n            let tab_label = info.forced_tab_label.as_deref().unwrap_or(&info.title);\n            let content = get_tab_content_by_super_table(info, &all_infos, &analysis_detail_html);\n            let order = get_super_table_order(&info.apl_code);\n\n            result.push(Tab::new(tab_label, None, content, false, badges, Some(order)));\n        }\n\n        result\n    }\n\n    /// Generate hidden radio buttons for tabs\n    fn get_tabs_radios(&self, tabs: &[Tab]) -> String {\n        let mut html = String::new();\n        let mut is_first = true;\n\n        for tab in tabs {\n            html.push_str(&format!(\n                \"<input type=\\\"radio\\\" id=\\\"{}\\\" name=\\\"tabs\\\" arial-label=\\\"Show tab {}\\\" class=\\\"tabs__radio\\\"{}>\\n\",\n                html_escape(&tab.radio_html_id),\n                html_escape(&tab.name),\n                if is_first { \" checked\" } else { \"\" }\n            ));\n            if is_first {\n                is_first = false;\n            }\n        }\n\n        html\n    }\n\n    /// Generate tab navigation labels with badges\n    fn get_tabs_html(&self, tabs: &[Tab]) -> String {\n        let mut html = String::new();\n\n        for tab in tabs {\n            let mut badges_html = String::new();\n            for badge in &tab.badges {\n                let title_attr = if let Some(ref title) = badge.title {\n                    format!(\" style=\\\"cursor: help\\\" title=\\\"{}\\\"\", html_escape(title))\n                } else {\n                    String::new()\n                };\n                badges_html.push_str(&format!(\n                    \"<span class=\\\"badge {}\\\"{}>{}</span> \",\n                    badge.color.as_css_class(),\n                    title_attr,\n                    html_escape(&badge.value),\n                ));\n            }\n\n            let badges_part = if !badges_html.is_empty() {\n                format!(\" {}\", badges_html)\n            } else {\n                String::new()\n            };\n\n            html.push_str(&format!(\n                \"<label for=\\\"{}\\\" class=\\\"tabs__title {}\\\">{}{}</label>\\n\",\n                html_escape(&tab.radio_html_id),\n                html_escape(&tab.radio_html_id),\n                html_escape(&tab.name),\n                badges_part,\n            ));\n        }\n\n        html\n    }\n\n    /// Generate tab content panels\n    fn get_tabs_content_html(&self, tabs: &[Tab]) -> String {\n        let mut html = String::new();\n        let line_prefix = \"                \";\n\n        for tab in tabs {\n            html.push_str(&format!(\n                \"{}<div class=\\\"tabs__tab {}\\\">\\n\",\n                line_prefix,\n                html_escape(&tab.content_html_id),\n            ));\n            if tab.add_heading {\n                html.push_str(&format!(\"{}    <h2>{}</h2>\\n\", line_prefix, html_escape(&tab.name),));\n            }\n\n            let indented_content = tab.tab_content.replace('\\n', &format!(\"\\n{}    \", line_prefix));\n            html.push_str(&format!(\"{}    {}\\n\", line_prefix, indented_content));\n            html.push_str(&format!(\"{}</div>\\n\", line_prefix));\n        }\n\n        html\n    }\n\n    /// Generate CSS for tab radio button selectors\n    fn get_tabs_css(&self, tabs: &[Tab]) -> String {\n        let line_prefix = \"        \";\n\n        // Content visibility selectors\n        let content_selectors: Vec<String> = tabs\n            .iter()\n            .map(|tab| {\n                format!(\n                    \"#{radio}:checked ~ .tabs__content .{content}\",\n                    radio = tab.radio_html_id,\n                    content = tab.content_html_id,\n                )\n            })\n            .collect();\n\n        let mut css = format!(\"{} {{\\n\", content_selectors.join(\", \"));\n        css.push_str(&format!(\"{}    display: block;\\n\", line_prefix));\n        css.push_str(&format!(\"{}}}\\n\", line_prefix));\n\n        // Active tab title selectors\n        let title_selectors: Vec<String> = tabs\n            .iter()\n            .map(|tab| {\n                format!(\n                    \"#{radio}:checked ~ .tabs__navigation .{radio}\",\n                    radio = tab.radio_html_id,\n                )\n            })\n            .collect();\n\n        css.push_str(&format!(\"{} {{\\n\", title_selectors.join(\", \")));\n        css.push_str(&format!(\n            \"{}    background-color: var(--color-blue-600);\\n\",\n            line_prefix\n        ));\n        css.push_str(&format!(\"{}    color: var(--color-white);\\n\", line_prefix));\n        css.push_str(&format!(\"{}}}\\n\", line_prefix));\n\n        css\n    }\n\n    // -------------------------------------------------------------------------\n    // Individual tab generators\n    // -------------------------------------------------------------------------\n\n    /// Summary tab\n    fn get_summary_tab(&self) -> Option<Tab> {\n        let mut summary = self.status.get_summary();\n        if summary.get_items().is_empty() {\n            return None;\n        }\n\n        let color_to_count = [\n            (BadgeColor::Red, summary.get_count_by_item_status(ItemStatus::Critical)),\n            (\n                BadgeColor::Orange,\n                summary.get_count_by_item_status(ItemStatus::Warning),\n            ),\n            (BadgeColor::Blue, summary.get_count_by_item_status(ItemStatus::Notice)),\n            (BadgeColor::Green, summary.get_count_by_item_status(ItemStatus::Ok)),\n            (BadgeColor::Neutral, summary.get_count_by_item_status(ItemStatus::Info)),\n        ];\n\n        let badges: Vec<Badge> = color_to_count\n            .into_iter()\n            .filter(|(_, count)| *count > 0)\n            .map(|(color, count)| Badge::new(count.to_string(), color))\n            .collect();\n\n        // Build quality scores HTML\n        let basic_stats = self.status.get_basic_stats();\n        let output_stats = BasicStats {\n            total_urls: basic_stats.total_urls,\n            total_size: basic_stats.total_size,\n            total_size_formatted: basic_stats.total_size_formatted.clone(),\n            total_execution_time: basic_stats.total_execution_time,\n            total_requests_times: basic_stats.total_requests_times,\n            total_requests_times_avg: basic_stats.total_requests_times_avg,\n            total_requests_times_min: basic_stats.total_requests_times_min,\n            total_requests_times_max: basic_stats.total_requests_times_max,\n            count_by_status: basic_stats.count_by_status.clone(),\n            count_by_content_type: basic_stats.count_by_content_type.clone(),\n        };\n        let scores = scorer::calculate_scores(&summary, &output_stats);\n        let quality_html = build_quality_scores_html(&scores);\n\n        let content = format!(\"{}\\n{}\", quality_html, summary.get_as_html());\n\n        Some(Tab::new(\"Summary\", None, content, true, badges, Some(-100)))\n    }\n\n    /// SEO and OpenGraph tab\n    fn get_seo_and_opengraph_tab(&self) -> Option<Tab> {\n        let all_infos = self.extract_all_super_table_infos();\n\n        let mut html = String::new();\n        let super_table_codes = [ST_NON_UNIQUE_TITLES, ST_NON_UNIQUE_DESCRIPTIONS, ST_SEO, ST_OPEN_GRAPH];\n\n        let mut badge_count = 0usize;\n        let mut order: Option<i32> = None;\n\n        for code in &super_table_codes {\n            if let Some(info) = all_infos.iter().find(|i| i.apl_code == *code) {\n                html.push_str(&info.html_output);\n                html.push_str(\"<br/>\");\n                if badge_count == 0 {\n                    badge_count = info.total_rows;\n                }\n                if *code == ST_SEO {\n                    order = Some(get_super_table_order(ST_SEO));\n                }\n            }\n        }\n\n        if html.is_empty() {\n            return None;\n        }\n\n        let mut badges = Vec::new();\n\n        if let Some(info) = all_infos.iter().find(|i| i.apl_code == ST_NON_UNIQUE_TITLES)\n            && info.total_rows > 0\n        {\n            badges.push(Badge::with_title(\n                info.total_rows.to_string(),\n                BadgeColor::Orange,\n                \"Non-unique titles\",\n            ));\n        }\n\n        if let Some(info) = all_infos.iter().find(|i| i.apl_code == ST_NON_UNIQUE_DESCRIPTIONS)\n            && info.total_rows > 0\n        {\n            badges.push(Badge::with_title(\n                info.total_rows.to_string(),\n                BadgeColor::Orange,\n                \"Non-unique descriptions\",\n            ));\n        }\n\n        badges.push(Badge::with_title(\n            badge_count.to_string(),\n            BadgeColor::Neutral,\n            \"Total URL with SEO info\",\n        ));\n\n        Some(Tab::new(\"SEO and OpenGraph\", None, html, false, badges, order))\n    }\n\n    /// Image Gallery tab\n    fn get_image_gallery_tab(&self) -> Option<Tab> {\n        let summary = self.status.get_summary();\n        if summary.get_items().is_empty() {\n            return None;\n        }\n\n        let visited_urls = self.status.get_visited_urls();\n        let images: Vec<_> = visited_urls\n            .iter()\n            .filter(|v| {\n                v.is_image()\n                    && v.status_code == 200\n                    && matches!(\n                        v.source_attr,\n                        visited_url::SOURCE_IMG_SRC | visited_url::SOURCE_INPUT_SRC | visited_url::SOURCE_CSS_URL\n                    )\n            })\n            .collect();\n\n        if images.is_empty() {\n            return None;\n        }\n\n        let mut html = self.get_image_gallery_form_html();\n        html.push_str(\"<div id=\\\"igc\\\" class=\\\"small\\\"><div id=\\\"igcf\\\" class=\\\"scaleDown\\\"><div id=\\\"image-gallery\\\" class=\\\"image-gallery\\\">\");\n\n        for image in &images {\n            let size = image.size.unwrap_or(0);\n            let content_type = image.content_type_header.as_deref().unwrap_or(\"\");\n            let source_url = self.status.get_url_by_uq_id(&image.source_uq_id);\n            let source_url_str = source_url.as_deref().unwrap_or(\"\");\n\n            let image_description = format!(\n                \"{} ({}), found as {}\",\n                utils::get_formatted_size(size, 0),\n                content_type,\n                image.get_source_description(Some(source_url_str)),\n            );\n\n            let image_type = content_type.replace(\"image/\", \"\");\n\n            html.push_str(&format!(\n                \"<a href=\\\"{}\\\" target=\\\"_blank\\\" data-size=\\\"{}\\\" data-source=\\\"{}\\\" data-type=\\\"{}\\\" data-sizematch=\\\"1\\\" data-typematch=\\\"1\\\" data-sourcematch=\\\"1\\\">\",\n                html_escape(&image.url),\n                size,\n                html_escape(image.get_source_short_name()),\n                html_escape(&image_type),\n            ));\n            html.push_str(&format!(\n                \"<img loading=\\\"lazy\\\" width=\\\"140\\\" height=\\\"140\\\" src=\\\"{}\\\" alt=\\\"{}\\\" title=\\\"{}\\\">\",\n                html_escape(&image.url),\n                html_escape(&image_description),\n                html_escape(&image_description),\n            ));\n            html.push_str(\"</a>\\n\");\n        }\n        html.push_str(\"</div></div></div>\");\n\n        let badges = vec![Badge::with_title(\n            images.len().to_string(),\n            BadgeColor::Neutral,\n            \"Found images\",\n        )];\n\n        Some(Tab::new(\"Image Gallery\", None, html, true, badges, Some(6)))\n    }\n\n    /// Video Gallery tab\n    fn get_video_gallery_tab(&self) -> Option<Tab> {\n        let summary = self.status.get_summary();\n        if summary.get_items().is_empty() {\n            return None;\n        }\n\n        let visited_urls = self.status.get_visited_urls();\n        let videos: Vec<_> = visited_urls\n            .iter()\n            .filter(|v| v.is_video() && v.status_code == 200)\n            .collect();\n\n        if videos.is_empty() {\n            return None;\n        }\n\n        let mut html = String::from(\n            \"<button onclick=\\\"playVideos()\\\" class=\\\"btn\\\">&#9654; Play the first 2 seconds of each video</button>\",\n        );\n        html.push_str(\"<div id=\\\"vgc\\\" class=\\\"small\\\"><div id=\\\"vgcf\\\" class=\\\"scaleDown\\\"><div id=\\\"video-gallery\\\" class=\\\"video-container\\\">\");\n\n        for video in &videos {\n            let size = video.size.unwrap_or(0);\n            let content_type = video.content_type_header.as_deref().unwrap_or(\"\");\n            let source_url = self.status.get_url_by_uq_id(&video.source_uq_id);\n            let source_url_str = source_url.as_deref().unwrap_or(\"\");\n\n            let video_description = format!(\n                \"{} ({}), <a href=\\\"{}\\\" target=\\\"_blank\\\">video</a> found on <a href=\\\"{}\\\" target=\\\"_blank\\\">this page</a>\",\n                utils::get_formatted_size(size, 0),\n                content_type,\n                html_escape(&video.url),\n                html_escape(source_url_str),\n            );\n\n            html.push_str(&format!(\n                \"<div class=\\\"video-card\\\">\\\n                    <video data-src=\\\"{}\\\" preload=\\\"metadata\\\" controls></video>\\\n                    <div class=\\\"video-caption\\\">{}</div>\\\n                </div>\\n\",\n                html_escape(&video.url),\n                video_description,\n            ));\n        }\n        html.push_str(\"</div></div></div>\");\n\n        html.push_str(VIDEO_GALLERY_SCRIPT);\n\n        let badges = vec![Badge::with_title(\n            videos.len().to_string(),\n            BadgeColor::Neutral,\n            \"Found videos\",\n        )];\n\n        Some(Tab::new(\"Video Gallery\", None, html, true, badges, Some(6)))\n    }\n\n    /// DNS and SSL/TLS tab\n    fn get_dns_and_ssl_tls_tab(&self) -> Option<Tab> {\n        let all_infos = self.extract_all_super_table_infos();\n\n        let mut html = String::new();\n        let mut order: Option<i32> = None;\n        let mut badges = Vec::new();\n\n        // DNS table\n        if let Some(dns_info) = all_infos.iter().find(|i| i.apl_code == ST_DNS) {\n            html.push_str(&dns_info.html_output);\n            html.push_str(\"<br/>\");\n            order = Some(get_super_table_order(ST_DNS));\n\n            let mut ipv4 = 0usize;\n            let mut ipv6 = 0usize;\n            for row in &dns_info.data {\n                if let Some(info_val) = row.get(\"info\") {\n                    let info_lower = info_val.to_lowercase();\n                    if info_lower.contains(\"ipv4\") {\n                        ipv4 += 1;\n                    } else if info_lower.contains(\"ipv6\") {\n                        ipv6 += 1;\n                    }\n                }\n            }\n            if ipv4 > 0 {\n                let color = if ipv4 > 1 {\n                    BadgeColor::Green\n                } else {\n                    BadgeColor::Neutral\n                };\n                badges.push(Badge::new(format!(\"{}x IPv4\", ipv4), color));\n            }\n            if ipv6 > 0 {\n                let color = if ipv6 > 1 {\n                    BadgeColor::Green\n                } else {\n                    BadgeColor::Neutral\n                };\n                badges.push(Badge::new(format!(\"{}x IPv6\", ipv6), color));\n            }\n        }\n\n        // SSL/TLS certificate table\n        if let Some(cert_info) = all_infos.iter().find(|i| i.apl_code == ST_CERTIFICATE_INFO) {\n            html.push_str(&cert_info.html_output);\n            html.push_str(\"<br/>\");\n\n            let mut errors = 0usize;\n            for row in &cert_info.data {\n                if let Some(info_val) = row.get(\"info\")\n                    && info_val == \"Errors\"\n                    && let Some(value) = row.get(\"value\")\n                    && !value.is_empty()\n                    && value != \"[]\"\n                {\n                    errors += 1;\n                }\n            }\n            let tls_color = if errors > 0 { BadgeColor::Red } else { BadgeColor::Green };\n            let tls_title = if errors > 0 {\n                format!(\"SSL/TLS certificate: {} error(s)\", errors)\n            } else {\n                \"SSL/TLS certificate OK\".to_string()\n            };\n            badges.push(Badge::with_title(\"TLS\".to_string(), tls_color, &tls_title));\n        }\n\n        if html.is_empty() {\n            return None;\n        }\n\n        Some(Tab::new(\"DNS and SSL\", None, html, false, badges, order))\n    }\n\n    /// Crawler stats tab\n    fn get_crawler_stats_tab(&self) -> Tab {\n        let stats = self.status.get_basic_stats();\n        let all_infos = self.extract_all_super_table_infos();\n\n        let badges = vec![\n            Badge::with_title(stats.total_urls.to_string(), BadgeColor::Neutral, \"Total visited URLs\"),\n            Badge::with_title(\n                stats.total_size_formatted.clone(),\n                BadgeColor::Neutral,\n                \"Total size of all visited URLs\",\n            ),\n            Badge::with_title(\n                utils::get_formatted_duration(stats.total_execution_time),\n                BadgeColor::Neutral,\n                \"Total execution time\",\n            ),\n        ];\n\n        let mut html = stats.get_as_html();\n\n        if let Some(analysis_stats) = all_infos.iter().find(|i| i.apl_code == ST_ANALYSIS_STATS) {\n            html.push_str(\"<br/>\");\n            html.push_str(&analysis_stats.html_output);\n        }\n\n        if let Some(cp_stats) = all_infos.iter().find(|i| i.apl_code == ST_CONTENT_PROCESSORS_STATS) {\n            html.push_str(\"<br/>\");\n            html.push_str(&cp_stats.html_output);\n        }\n\n        Tab::new(\"Crawler stats\", None, html, true, badges, Some(900))\n    }\n\n    /// Crawler info tab\n    fn get_crawler_info_tab(&self) -> Tab {\n        let info = self.status.get_crawler_info();\n        let command = utils::get_safe_command(&info.command);\n\n        let html = format!(\n            r#\"\n            <h2>Crawler info</h2>\n            <div class=\"info__wrapper\">\n                <table style=\"border-collapse: collapse;\">\n                    <tr>\n                        <th>Version</th>\n                        <td>{}</td>\n                    </tr>\n                    <tr>\n                        <th>Executed At</th>\n                        <td>{}</td>\n                    </tr>\n                    <tr>\n                        <th>Command</th>\n                        <td>{}</td>\n                    </tr>\n                    <tr>\n                        <th>Hostname</th>\n                        <td>{}</td>\n                    </tr>\n                    <tr>\n                        <th>User-Agent</th>\n                        <td>{}</td>\n                    </tr>\n                </table>\n            </div>\"#,\n            html_escape(&info.version),\n            html_escape(&info.executed_at),\n            html_escape(&command),\n            html_escape(&info.hostname),\n            html_escape(&info.final_user_agent),\n        );\n\n        let badges = vec![Badge::with_title(\n            format!(\"v{}\", version::CODE),\n            BadgeColor::Neutral,\n            \"Crawler version\",\n        )];\n\n        Tab::new(\"Crawler info\", None, html, false, badges, Some(5000))\n    }\n\n    /// Visited URLs tab\n    fn get_visited_urls_tab(&self) -> Tab {\n        let mut visited_urls_table = self.get_visited_urls_table();\n        visited_urls_table.set_host_to_strip_from_urls(Some(self.get_initial_host()), Some(self.get_initial_scheme()));\n        let badges = get_visited_urls_badges(&visited_urls_table);\n        let order = get_super_table_order(SUPER_TABLE_VISITED_URLS);\n\n        Tab::new(\n            &visited_urls_table.title,\n            visited_urls_table.description.as_deref(),\n            visited_urls_table.get_html_output(),\n            false,\n            badges,\n            Some(order),\n        )\n    }\n\n    /// Build the visited URLs SuperTable\n    fn get_visited_urls_table(&self) -> SuperTable {\n        let visited_urls = self.status.get_visited_urls();\n\n        let mut data: Vec<HashMap<String, String>> = Vec::new();\n        for vu in &visited_urls {\n            if vu.status_code == visited_url::ERROR_SKIPPED {\n                continue;\n            }\n\n            let mut row = HashMap::new();\n            row.insert(\"url\".to_string(), vu.url.clone());\n            row.insert(\"status\".to_string(), vu.status_code.to_string());\n            row.insert(\n                \"type\".to_string(),\n                utils::get_content_type_name_by_id(vu.content_type).to_string(),\n            );\n            row.insert(\"time\".to_string(), format!(\"{:.3}\", vu.request_time));\n            row.insert(\"size\".to_string(), vu.size.unwrap_or(0).to_string());\n            row.insert(\n                \"sizeFormatted\".to_string(),\n                vu.size_formatted.clone().unwrap_or_default(),\n            );\n            row.insert(\"cacheTypeFlags\".to_string(), vu.cache_type_flags.to_string());\n            row.insert(\n                \"cacheLifetime\".to_string(),\n                vu.cache_lifetime.map(|v| v.to_string()).unwrap_or_default(),\n            );\n\n            if let Some(ref extras) = vu.extras {\n                for (key, value) in extras {\n                    row.insert(key.clone(), value.clone());\n                }\n            }\n\n            data.push(row);\n        }\n\n        let initial_host = self.get_initial_host();\n        let initial_scheme = self.get_initial_scheme();\n\n        let columns = vec![\n            SuperTableColumn::new(\n                \"url\".to_string(),\n                \"URL\".to_string(),\n                -1,\n                None,\n                Some(Box::new(move |row: &HashMap<String, String>, _render_into: &str| {\n                    let url = row.get(\"url\").map(|s| s.as_str()).unwrap_or(\"\");\n                    let truncated =\n                        utils::truncate_url(url, 80, \"\\u{2026}\", Some(&initial_host), Some(&initial_scheme), None);\n                    format!(\n                        \"<a href=\\\"{}\\\" target=\\\"_blank\\\">{}</a>\",\n                        url.replace('&', \"&amp;\")\n                            .replace('\"', \"&quot;\")\n                            .replace('<', \"&lt;\")\n                            .replace('>', \"&gt;\"),\n                        truncated,\n                    )\n                })),\n                true,\n                false,\n                false,\n                false,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"status\".to_string(),\n                \"Status\".to_string(),\n                6,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<i32>() {\n                        utils::get_colored_status_code(v, 6)\n                    } else {\n                        value.to_string()\n                    }\n                })),\n                None,\n                false,\n                true,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"type\".to_string(),\n                \"Type\".to_string(),\n                8,\n                None,\n                None,\n                true,\n                false,\n                false,\n                false,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"time\".to_string(),\n                \"Time (s)\".to_string(),\n                8,\n                None,\n                Some(Box::new(|row: &HashMap<String, String>, _render_into: &str| {\n                    let time_str = row.get(\"time\").map(|s| s.as_str()).unwrap_or(\"0\");\n                    if let Ok(v) = time_str.parse::<f64>() {\n                        utils::get_colored_request_time(v, 6)\n                    } else {\n                        time_str.to_string()\n                    }\n                })),\n                false,\n                true,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"size\".to_string(),\n                \"Size\".to_string(),\n                8,\n                None,\n                Some(Box::new(|row: &HashMap<String, String>, _render_into: &str| {\n                    let size_str = row.get(\"size\").map(|s| s.as_str()).unwrap_or(\"0\");\n                    let size: i64 = size_str.parse().unwrap_or(0);\n                    let formatted = row.get(\"sizeFormatted\").map(|s| s.as_str()).unwrap_or(\"\");\n                    if size > 1024 * 1024 {\n                        utils::get_color_text(formatted, \"red\", true)\n                    } else {\n                        formatted.to_string()\n                    }\n                })),\n                false,\n                true,\n                false,\n                true,\n                None,\n            ),\n            {\n                let mut col = SuperTableColumn::new(\n                    \"cacheLifetime\".to_string(),\n                    \"Cache\".to_string(),\n                    8,\n                    None,\n                    Some(Box::new(|row: &HashMap<String, String>, _render_into: &str| {\n                        let cache_lifetime_str = row.get(\"cacheLifetime\").map(|s| s.as_str()).unwrap_or(\"\");\n                        let cache_type_flags: u32 = row.get(\"cacheTypeFlags\").and_then(|s| s.parse().ok()).unwrap_or(0);\n                        let str_pad_to = 6;\n\n                        if let Ok(lifetime) = cache_lifetime_str.parse::<i64>() {\n                            utils::get_colored_cache_lifetime(lifetime, str_pad_to)\n                        } else if cache_type_flags & visited_url::CACHE_TYPE_HAS_NO_STORE != 0 {\n                            utils::get_color_text(\n                                &format!(\"{:<width$}\", \"0s (no-store)\", width = str_pad_to),\n                                \"red\",\n                                true,\n                            )\n                        } else if cache_type_flags & visited_url::CACHE_TYPE_HAS_NO_CACHE != 0 {\n                            utils::get_color_text(\n                                &format!(\"{:<width$}\", \"0s (no-cache)\", width = str_pad_to),\n                                \"red\",\n                                false,\n                            )\n                        } else if cache_type_flags & visited_url::CACHE_TYPE_HAS_ETAG != 0 {\n                            utils::get_color_text(\n                                &format!(\"{:<width$}\", \"ETag-only\", width = str_pad_to),\n                                \"magenta\",\n                                false,\n                            )\n                        } else if cache_type_flags & visited_url::CACHE_TYPE_HAS_LAST_MODIFIED != 0 {\n                            utils::get_color_text(\n                                &format!(\"{:<width$}\", \"Last-Mod-only\", width = str_pad_to),\n                                \"magenta\",\n                                false,\n                            )\n                        } else {\n                            utils::get_color_text(&format!(\"{:<width$}\", \"None\", width = str_pad_to), \"red\", false)\n                        }\n                    })),\n                    false,\n                    true,\n                    false,\n                    true,\n                    Some(Box::new(|row: &HashMap<String, String>| {\n                        let cache_lifetime_str = row.get(\"cacheLifetime\").map(|s| s.as_str()).unwrap_or(\"\");\n                        let cache_type_flags: u32 = row.get(\"cacheTypeFlags\").and_then(|s| s.parse().ok()).unwrap_or(0);\n\n                        if let Ok(lifetime) = cache_lifetime_str.parse::<i64>() {\n                            lifetime.to_string()\n                        } else if cache_type_flags & visited_url::CACHE_TYPE_HAS_NO_STORE != 0 {\n                            \"-2\".to_string()\n                        } else if cache_type_flags & visited_url::CACHE_TYPE_HAS_NO_CACHE != 0 {\n                            \"-1\".to_string()\n                        } else if cache_type_flags & visited_url::CACHE_TYPE_HAS_ETAG != 0 {\n                            \"0.1\".to_string()\n                        } else if cache_type_flags & visited_url::CACHE_TYPE_HAS_LAST_MODIFIED != 0 {\n                            \"0.2\".to_string()\n                        } else {\n                            \"0.01\".to_string()\n                        }\n                    })),\n                );\n                col.forced_data_type = Some(\"number\".to_string());\n                col\n            },\n        ];\n\n        let mut super_table = SuperTable::new(\n            SUPER_TABLE_VISITED_URLS.to_string(),\n            \"Visited URLs\".to_string(),\n            \"No visited URLs.\".to_string(),\n            columns,\n            false,\n            None,\n            \"ASC\".to_string(),\n            None,\n            None,\n            None,\n        );\n        super_table.set_ignore_hard_rows_limit(true);\n        super_table.set_data(data);\n\n        super_table\n    }\n\n    // -------------------------------------------------------------------------\n    // Helpers\n    // -------------------------------------------------------------------------\n\n    /// Image gallery form HTML (size/mode/filter controls)\n    fn get_image_gallery_form_html(&self) -> String {\n        let mut html = String::from(\n            r#\"\n            <style>\n            #imageDisplayForm {\n                display: flex;\n                gap: 12px;\n                flex-wrap: wrap;\n                margin-bottom: 20px;\n            }\n            </style>\"#,\n        );\n\n        html.push_str(\n            r#\"<script>\n                function updateClassName(elementId, className) {\n                    document.getElementById(elementId).className = className;\n                    if (elementId === \"igc\") {\n                        var images = document.getElementById(elementId).getElementsByTagName(\"img\");\n                        for (var i = 0; i < images.length; i++) {\n                            var image = images[i];\n                            image.width = className === \"small\" ? 140 : (className === \"medium\" ? 200 : 360);\n                            image.height = className === \"small\" ? 140 : (className === \"medium\" ? 200 : 360);\n                        }\n                    }\n                }\n            </script>\"#,\n        );\n\n        html.push_str(IMAGE_GALLERY_FILTER_SCRIPT);\n\n        html.push_str(r#\"<form id=\"imageDisplayForm\">\n                <div class=\"form-group\">\n                    <div class=\"btn-group\">\n                        <input class=\"idf\" type=\"radio\" id=\"sizeSmall\" name=\"thumbnailSize\" value=\"small\" data-key=\"igc\" checked>\n                        <label for=\"sizeSmall\">small</label>\n                        <input class=\"idf\" type=\"radio\" id=\"sizeMedium\" name=\"thumbnailSize\" value=\"medium\" data-key=\"igc\">\n                        <label for=\"sizeMedium\">medium</label>\n                        <input class=\"idf\" type=\"radio\" id=\"sizeLarge\" name=\"thumbnailSize\" value=\"large\" data-key=\"igc\">\n                        <label for=\"sizeLarge\">large</label>\n                    </div>\n                </div>\n                <div class=\"form-group\">\n                    <div class=\"btn-group\">\n                        <input class=\"idf\" type=\"radio\" id=\"modeScaleDown\" name=\"thumbnailMode\" value=\"scaleDown\" data-key=\"igcf\" checked>\n                        <label for=\"modeScaleDown\">scale-down</label>\n                        <input class=\"idf\" type=\"radio\" id=\"modeContain\" name=\"thumbnailMode\" value=\"contain\" data-key=\"igcf\">\n                        <label for=\"modeContain\">contain</label>\n                        <input class=\"idf\" type=\"radio\" id=\"modeCover\" name=\"thumbnailMode\" value=\"cover\" data-key=\"igcf\">\n                        <label for=\"modeCover\">cover</label>\n                    </div>\n                </div>\n                <div class=\"form-group\">\n                    <div class=\"btn-group\" id=\"typeFilters\">\n                    </div>\n                </div>\n                <div class=\"form-group\">\n                    <div class=\"btn-group\" id=\"sourceFilters\">\n                    </div>\n                </div>\n                <div class=\"form-group\">\n                    <div class=\"btn-group\" id=\"sizeFilters\">\n                    </div>\n                </div>\n            </form>\"#);\n\n        html\n    }\n\n    /// Get initial host from the URL\n    fn get_initial_host(&self) -> String {\n        let url = self.get_initial_url();\n        url::Url::parse(&url)\n            .ok()\n            .and_then(|u| u.host_str().map(|h| h.to_string()))\n            .unwrap_or_default()\n    }\n\n    /// Get initial URL from status\n    fn get_initial_url(&self) -> String {\n        self.status.get_crawler_info().initial_url.clone()\n    }\n\n    /// Get initial scheme from the URL\n    fn get_initial_scheme(&self) -> String {\n        let url = self.get_initial_url();\n        url::Url::parse(&url)\n            .ok()\n            .map(|u| u.scheme().to_string())\n            .unwrap_or_else(|| \"https\".to_string())\n    }\n\n    /// Build analysis detail sub-tables for Best Practices, Accessibility, and Security tabs.\n    /// Returns a map of analysis_name -> rendered HTML table.\n    fn build_analysis_detail_tables(&self) -> HashMap<String, String> {\n        let initial_host = self.get_initial_host();\n        let initial_scheme = self.get_initial_scheme();\n\n        // Gather all per-URL analysis details, aggregated by analysis_name\n        let aggregated = self.get_data_for_super_tables_with_details();\n\n        let mut result = HashMap::new();\n\n        // Build all analysis names from all three analyzers\n        let all_names: Vec<&str> = BEST_PRACTICE_ANALYSIS_NAMES\n            .iter()\n            .chain(ACCESSIBILITY_ANALYSIS_NAMES.iter())\n            .chain(SECURITY_ANALYSIS_NAMES.iter())\n            .copied()\n            .collect();\n\n        for analysis_name in all_names {\n            let mut data = aggregated.get(analysis_name).cloned().unwrap_or_default();\n\n            // Sort by severity (ascending) then by count (descending)\n            data.sort_by(|a, b| {\n                let sev_a: i32 = a.get(\"severity\").and_then(|s| s.parse().ok()).unwrap_or(999);\n                let sev_b: i32 = b.get(\"severity\").and_then(|s| s.parse().ok()).unwrap_or(999);\n                if sev_a == sev_b {\n                    let count_a: usize = a.get(\"count\").and_then(|s| s.parse().ok()).unwrap_or(0);\n                    let count_b: usize = b.get(\"count\").and_then(|s| s.parse().ok()).unwrap_or(0);\n                    count_b.cmp(&count_a)\n                } else {\n                    sev_a.cmp(&sev_b)\n                }\n            });\n\n            let apl_code = analysis_name.to_lowercase().replace(' ', \"-\");\n            let initial_host_clone = initial_host.clone();\n            let initial_scheme_clone = initial_scheme.clone();\n\n            let columns = vec![\n                SuperTableColumn::new(\n                    \"severity\".to_string(),\n                    \"Severity\".to_string(),\n                    10,\n                    None,\n                    Some(Box::new(|row: &HashMap<String, String>, _render_into: &str| {\n                        let sev = row.get(\"severityFormatted\").map(|s| s.as_str()).unwrap_or(\"\");\n                        utils::get_colored_severity(sev)\n                    })),\n                    false,\n                    false,\n                    false,\n                    true,\n                    None,\n                ),\n                SuperTableColumn::new(\n                    \"count\".to_string(),\n                    \"Occurs\".to_string(),\n                    8,\n                    None,\n                    None,\n                    false,\n                    false,\n                    false,\n                    true,\n                    None,\n                ),\n                SuperTableColumn::new(\n                    \"detail\".to_string(),\n                    \"Detail\".to_string(),\n                    200,\n                    Some(Box::new(|value: &str, _render_into: &str| {\n                        // HTML-escape for safety, then convert newlines to <br>\n                        let escaped = html_escape(value);\n                        escaped.replace('\\n', \"<br>\")\n                    })),\n                    None,\n                    false,\n                    true,\n                    false,\n                    false,\n                    None,\n                ),\n                SuperTableColumn::new(\n                    \"exampleUrls\".to_string(),\n                    format!(\"Affected URLs (max {})\", MAX_EXAMPLE_URLS),\n                    60,\n                    None,\n                    Some(Box::new(move |row: &HashMap<String, String>, _render_into: &str| {\n                        let urls_str = row.get(\"exampleUrls\").map(|s| s.as_str()).unwrap_or(\"\");\n                        if urls_str.is_empty() {\n                            return String::new();\n                        }\n                        let urls: Vec<&str> = urls_str.split('\\x1E').collect(); // record separator\n                        let mut html_out = String::new();\n                        if urls.len() == 1 {\n                            for url in &urls {\n                                let truncated = utils::truncate_url(\n                                    url,\n                                    60,\n                                    \"\\u{2026}\",\n                                    Some(&initial_host_clone),\n                                    Some(&initial_scheme_clone),\n                                    None,\n                                );\n                                html_out.push_str(&format!(\n                                    \"<a href=\\\"{}\\\" target=\\\"_blank\\\">{}</a><br />\",\n                                    html_escape(url),\n                                    html_escape(&truncated),\n                                ));\n                            }\n                        } else {\n                            for (i, url) in urls.iter().enumerate() {\n                                html_out.push_str(&format!(\n                                    \"<a href=\\\"{}\\\" target=\\\"_blank\\\">URL {}</a>, \",\n                                    html_escape(url),\n                                    i + 1,\n                                ));\n                            }\n                        }\n                        html_out.trim_end_matches(\", \").to_string()\n                    })),\n                    false,\n                    true,\n                    false,\n                    false,\n                    None,\n                ),\n            ];\n\n            let mut super_table = SuperTable::new(\n                apl_code,\n                analysis_name.to_string(),\n                \"No problems found.\".to_string(),\n                columns,\n                false,\n                None,\n                \"ASC\".to_string(),\n                None,\n                Some(100),\n                None,\n            );\n\n            super_table.set_data(data);\n            let html = super_table.get_html_output();\n            result.insert(analysis_name.to_string(), html);\n        }\n\n        result\n    }\n\n    /// Gather per-URL analysis details, aggregated by analysis_name.\n    /// Returns analysis_name -> Vec of aggregated rows.\n    fn get_data_for_super_tables_with_details(&self) -> HashMap<String, Vec<HashMap<String, String>>> {\n        let analysis_results = self.status.get_visited_url_to_analysis_result();\n        let mut raw_data: HashMap<String, Vec<(String, String, i32, String)>> = HashMap::new();\n\n        for (uq_id, entries) in &analysis_results {\n            let url = self.status.get_url_by_uq_id(uq_id).unwrap_or_default();\n\n            for entry in entries {\n                let result = &entry.result;\n\n                // Critical details\n                for (analysis_name, details) in result.get_critical_details() {\n                    for detail in details {\n                        raw_data.entry(analysis_name.clone()).or_default().push((\n                            url.clone(),\n                            \"critical\".to_string(),\n                            SEVERITY_ORDER_CRITICAL,\n                            detail.clone(),\n                        ));\n                    }\n                }\n\n                // Warning details\n                for (analysis_name, details) in result.get_warning_details() {\n                    for detail in details {\n                        raw_data.entry(analysis_name.clone()).or_default().push((\n                            url.clone(),\n                            \"warning\".to_string(),\n                            SEVERITY_ORDER_WARNING,\n                            detail.clone(),\n                        ));\n                    }\n                }\n\n                // Notice details\n                for (analysis_name, details) in result.get_notice_details() {\n                    for detail in details {\n                        raw_data.entry(analysis_name.clone()).or_default().push((\n                            url.clone(),\n                            \"notice\".to_string(),\n                            SEVERITY_ORDER_NOTICE,\n                            detail.clone(),\n                        ));\n                    }\n                }\n            }\n        }\n\n        // Aggregate: group identical (severity, aggregated_detail) pairs and count occurrences.\n        let mut aggregated: HashMap<String, Vec<HashMap<String, String>>> = HashMap::new();\n\n        for (analysis_name, rows) in &raw_data {\n            let mut groups: HashMap<String, HashMap<String, String>> = HashMap::new();\n            let mut group_urls: HashMap<String, Vec<String>> = HashMap::new();\n\n            for (url, severity_formatted, severity_order, detail) in rows {\n                let agg_detail = aggregate_detail(detail);\n                let agg_key = aggregate_detail_key(severity_formatted, &agg_detail);\n                let entry = groups.entry(agg_key.clone()).or_insert_with(|| {\n                    let mut row = HashMap::new();\n                    row.insert(\"severityFormatted\".to_string(), severity_formatted.clone());\n                    row.insert(\"severity\".to_string(), severity_order.to_string());\n                    row.insert(\"detail\".to_string(), agg_detail.clone());\n                    row.insert(\"count\".to_string(), \"0\".to_string());\n                    row\n                });\n                let count: usize = entry.get(\"count\").and_then(|c| c.parse().ok()).unwrap_or(0);\n                entry.insert(\"count\".to_string(), (count + 1).to_string());\n\n                let urls = group_urls.entry(agg_key).or_default();\n                if urls.len() < MAX_EXAMPLE_URLS && !urls.contains(url) {\n                    urls.push(url.clone());\n                }\n            }\n\n            let mut result_rows: Vec<HashMap<String, String>> = Vec::new();\n            for (key, mut row) in groups {\n                if let Some(urls) = group_urls.get(&key) {\n                    row.insert(\"exampleUrls\".to_string(), urls.join(\"\\x1E\"));\n                }\n                result_rows.push(row);\n            }\n\n            aggregated.insert(analysis_name.clone(), result_rows);\n        }\n\n        aggregated\n    }\n}\n\n// =============================================================================\n// Free functions that work on extracted SuperTableInfo (no &self needed)\n// =============================================================================\n\n/// Generate tab content for a SuperTable, potentially including related sub-tables\nfn get_tab_content_by_super_table(\n    info: &SuperTableInfo,\n    all_infos: &[SuperTableInfo],\n    analysis_detail_html: &HashMap<String, String>,\n) -> String {\n    let mut html = info.html_output.clone();\n\n    // Add related sub-tables based on apl_code\n    let related_codes: Vec<&str> = match info.apl_code.as_str() {\n        \"skipped-summary\" => vec![ST_SKIPPED],\n        \"headers\" => vec![ST_HEADERS_VALUES],\n        \"content-types\" => vec![ST_CONTENT_MIME_TYPES],\n        \"caching-per-content-type\" => vec![ST_CACHING_PER_DOMAIN, ST_CACHING_PER_DOMAIN_AND_CONTENT_TYPE],\n        _ => vec![],\n    };\n\n    for related_code in related_codes {\n        if let Some(related) = all_infos.iter().find(|i| i.apl_code == related_code) {\n            html.push_str(\"<br/>\");\n            html.push_str(&related.html_output);\n        }\n    }\n\n    // Add analysis detail sub-tables for best-practices, accessibility, security\n    let analysis_names: &[&str] = match info.apl_code.as_str() {\n        \"best-practices\" => BEST_PRACTICE_ANALYSIS_NAMES,\n        \"accessibility\" => ACCESSIBILITY_ANALYSIS_NAMES,\n        \"security\" => SECURITY_ANALYSIS_NAMES,\n        _ => &[],\n    };\n\n    for analysis_name in analysis_names {\n        if let Some(detail_html) = analysis_detail_html.get(*analysis_name) {\n            html.push_str(\"<br/>\");\n            html.push_str(detail_html);\n        }\n    }\n\n    html\n}\n\n/// Get badges for visited URLs table\nfn get_visited_urls_badges(super_table: &SuperTable) -> Vec<Badge> {\n    let mut badges = Vec::new();\n    let mut red = 0usize;\n    let mut orange = 0usize;\n    let mut green = 0usize;\n\n    for row in super_table.get_data() {\n        let status_code: i32 = row.get(\"status\").and_then(|s| s.parse().ok()).unwrap_or(0);\n\n        if status_code <= 0 || status_code >= 400 {\n            red += 1;\n        } else if status_code >= 300 {\n            orange += 1;\n        } else {\n            green += 1;\n        }\n    }\n\n    if red > 0 {\n        badges.push(Badge::with_title(\n            red.to_string(),\n            BadgeColor::Red,\n            \"Errors (40x, 50x, timeout, etc.)\",\n        ));\n    }\n    if orange > 0 {\n        badges.push(Badge::with_title(\n            orange.to_string(),\n            BadgeColor::Orange,\n            \"Redirects (30x)\",\n        ));\n    }\n    if green > 0 {\n        badges.push(Badge::with_title(green.to_string(), BadgeColor::Green, \"OK (20x)\"));\n    }\n\n    badges\n}\n\n/// Get badges for a SuperTable based on its apl_code\nfn get_super_table_badges_by_apl_code(info: &SuperTableInfo, all_infos: &[SuperTableInfo]) -> Vec<Badge> {\n    let mut badges = Vec::new();\n\n    match info.apl_code.as_str() {\n        \"redirects\" => {\n            let redirects = info.total_rows;\n            let color = if redirects > 100 {\n                BadgeColor::Red\n            } else if redirects > 0 {\n                BadgeColor::Orange\n            } else {\n                BadgeColor::Green\n            };\n            badges.push(Badge::new(redirects.to_string(), color));\n        }\n        \"404\" => {\n            let not_found = info.total_rows;\n            let color = if not_found > 10 {\n                BadgeColor::Red\n            } else if not_found > 0 {\n                BadgeColor::Orange\n            } else {\n                BadgeColor::Green\n            };\n            badges.push(Badge::new(not_found.to_string(), color));\n        }\n        \"skipped-summary\" => {\n            let skipped = info.total_rows;\n            let color = if skipped > 10 {\n                BadgeColor::Orange\n            } else {\n                BadgeColor::Green\n            };\n            badges.push(Badge::with_title(skipped.to_string(), color, \"Skipped URL domains\"));\n            if let Some(skipped_urls) = all_infos.iter().find(|i| i.apl_code == ST_SKIPPED) {\n                badges.push(Badge::with_title(\n                    skipped_urls.total_rows.to_string(),\n                    BadgeColor::Neutral,\n                    \"Total skipped URLs\",\n                ));\n            }\n        }\n        \"source-domains\" => {\n            let domains = info.total_rows;\n            let color = if domains > 10 {\n                BadgeColor::Orange\n            } else {\n                BadgeColor::Neutral\n            };\n            badges.push(Badge::new(domains.to_string(), color));\n        }\n        \"content-types\" => {\n            let content_types = info.total_rows;\n            badges.push(Badge::with_title(\n                content_types.to_string(),\n                BadgeColor::Neutral,\n                \"Total content types\",\n            ));\n            if let Some(mime_types) = all_infos.iter().find(|i| i.apl_code == ST_CONTENT_MIME_TYPES) {\n                badges.push(Badge::with_title(\n                    mime_types.total_rows.to_string(),\n                    BadgeColor::Neutral,\n                    \"Total MIME types\",\n                ));\n            }\n        }\n        \"fastest-urls\" => {\n            let fastest_time = info\n                .data\n                .iter()\n                .filter_map(|row| row.get(\"time\").and_then(|s| s.parse::<f64>().ok()))\n                .fold(None, |acc: Option<f64>, t| Some(acc.map_or(t, |a| a.min(t))));\n            if let Some(time) = fastest_time {\n                let color = if time < 0.5 {\n                    BadgeColor::Green\n                } else if time < 2.0 {\n                    BadgeColor::Orange\n                } else {\n                    BadgeColor::Red\n                };\n                badges.push(Badge::new(utils::get_formatted_duration(time), color));\n            }\n        }\n        \"slowest-urls\" => {\n            let slowest_time = info\n                .data\n                .iter()\n                .filter_map(|row| row.get(\"time\").and_then(|s| s.parse::<f64>().ok()))\n                .fold(None, |acc: Option<f64>, t| Some(acc.map_or(t, |a| a.max(t))));\n            if let Some(time) = slowest_time {\n                let color = if time < 0.5 {\n                    BadgeColor::Green\n                } else if time < 2.0 {\n                    BadgeColor::Orange\n                } else {\n                    BadgeColor::Red\n                };\n                badges.push(Badge::new(utils::get_formatted_duration(time), color));\n            }\n        }\n        \"headers\" => {\n            let headers = info.total_rows;\n            let color = if headers > 50 {\n                BadgeColor::Red\n            } else {\n                BadgeColor::Neutral\n            };\n            badges.push(Badge::new(headers.to_string(), color));\n        }\n        \"external-urls\" => {\n            let count = info.total_rows;\n            let color = if count > 0 {\n                BadgeColor::Neutral\n            } else {\n                BadgeColor::Green\n            };\n            badges.push(Badge::with_title(count.to_string(), color, \"External URLs\"));\n        }\n        \"caching-per-content-type\" => {\n            let mut min_cache_lifetime: Option<i64> = None;\n            let mut max_cache_lifetime: Option<i64> = None;\n\n            for row in &info.data {\n                let content_type = row.get(\"contentType\").map(|s| s.as_str()).unwrap_or(\"\");\n                if ![\"Image\", \"CSS\", \"JS\", \"Font\"].contains(&content_type) {\n                    continue;\n                }\n                if let Some(min_val) = row.get(\"minLifetime\").and_then(|s| s.parse::<i64>().ok()) {\n                    min_cache_lifetime = Some(min_cache_lifetime.map_or(min_val, |v: i64| v.min(min_val)));\n                }\n                if let Some(max_val) = row.get(\"maxLifetime\").and_then(|s| s.parse::<i64>().ok()) {\n                    max_cache_lifetime = Some(max_cache_lifetime.map_or(max_val, |v: i64| v.max(max_val)));\n                }\n            }\n\n            if let Some(min_lt) = min_cache_lifetime {\n                let color = if min_lt < 60 {\n                    BadgeColor::Red\n                } else if min_lt < 3600 {\n                    BadgeColor::Orange\n                } else {\n                    BadgeColor::Green\n                };\n                badges.push(Badge::with_title(\n                    utils::get_formatted_cache_lifetime(min_lt),\n                    color,\n                    \"Minimal cache lifetime for images/css/js/fonts\",\n                ));\n            }\n            if let Some(max_lt) = max_cache_lifetime {\n                let color = if max_lt < 60 {\n                    BadgeColor::Red\n                } else if max_lt < 3600 {\n                    BadgeColor::Orange\n                } else {\n                    BadgeColor::Green\n                };\n                badges.push(Badge::with_title(\n                    utils::get_formatted_cache_lifetime(max_lt),\n                    color,\n                    \"Maximal cache lifetime for images/css/js/fonts\",\n                ));\n            }\n        }\n        _ => {\n            // Use generic badges for other tables\n            badges = get_super_table_generic_badges(info);\n        }\n    }\n\n    badges\n}\n\n/// Get generic badges by counting severity columns\nfn get_super_table_generic_badges(info: &SuperTableInfo) -> Vec<Badge> {\n    let mut badges = Vec::new();\n    let mut red = 0i64;\n    let mut orange = 0i64;\n    let mut blue = 0i64;\n    let mut green = 0i64;\n    let mut neutral = 0i64;\n\n    for row in &info.data {\n        if let Some(val) = row.get(\"ok\").and_then(|s| s.parse::<i64>().ok()) {\n            green += val;\n        }\n        if let Some(val) = row.get(\"notice\").and_then(|s| s.parse::<i64>().ok()) {\n            blue += val;\n        }\n        if let Some(val) = row.get(\"warning\").and_then(|s| s.parse::<i64>().ok()) {\n            orange += val;\n        }\n        if let Some(val) = row.get(\"critical\").and_then(|s| s.parse::<i64>().ok()) {\n            red += val;\n        }\n        if let Some(val) = row.get(\"error\").and_then(|s| s.parse::<i64>().ok()) {\n            red += val;\n        }\n        if let Some(val) = row.get(\"info\").and_then(|s| s.parse::<i64>().ok()) {\n            neutral += val;\n        }\n    }\n\n    if red > 0 {\n        badges.push(Badge::with_title(red.to_string(), BadgeColor::Red, \"Critical\"));\n    }\n    if orange > 0 {\n        badges.push(Badge::with_title(orange.to_string(), BadgeColor::Orange, \"Warning\"));\n    }\n    if blue > 0 {\n        badges.push(Badge::with_title(blue.to_string(), BadgeColor::Blue, \"Notice\"));\n    }\n    if green > 0 {\n        badges.push(Badge::with_title(green.to_string(), BadgeColor::Green, \"OK\"));\n    }\n    if neutral > 0 {\n        badges.push(Badge::with_title(neutral.to_string(), BadgeColor::Neutral, \"Info\"));\n    }\n\n    badges\n}\n\n/// HTML-escape a string\nfn html_escape(s: &str) -> String {\n    s.replace('&', \"&amp;\")\n        .replace('<', \"&lt;\")\n        .replace('>', \"&gt;\")\n        .replace('\"', \"&quot;\")\n        .replace('\\'', \"&#039;\")\n}\n\n/// Remove excessive whitespace from HTML.\n/// Build the quality scores HTML block (donut chart + category bars) for the Summary tab.\nfn build_quality_scores_html(scores: &crate::scoring::quality_score::QualityScores) -> String {\n    let overall = &scores.overall;\n    let deg = overall.score / 10.0 * 360.0;\n    let color = overall.color_hex();\n\n    let mut html = String::new();\n\n    // Embedded styles for light (default) and dark (checked) modes\n    html.push_str(concat!(\n        \"<style>\\n\",\n        \".qs-box{margin-bottom:24px;padding:20px;border-radius:12px;background:#F3F4F6;}\\n\",\n        \".qs-title{margin:0 0 16px;font-size:18px;color:#111827;}\\n\",\n        \".qs-donut-inner{background:#F3F4F6;}\\n\",\n        \".qs-bar-track{background:#E5E7EB;}\\n\",\n        \".qs-cat-name{color:#4B5563;}\\n\",\n        \"html:has(.theme-switch__input:checked) .qs-box{background:#1F2937;}\\n\",\n        \"html:has(.theme-switch__input:checked) .qs-title{color:#F9FAFB;}\\n\",\n        \"html:has(.theme-switch__input:checked) .qs-donut-inner{background:#1F2937;}\\n\",\n        \"html:has(.theme-switch__input:checked) .qs-bar-track{background:#374151;}\\n\",\n        \"html:has(.theme-switch__input:checked) .qs-cat-name{color:#D1D5DB;}\\n\",\n        \"</style>\\n\",\n    ));\n\n    // Container\n    html.push_str(\"<div class=\\\"qs-box\\\">\\n\");\n    html.push_str(\"<h3 class=\\\"qs-title\\\">Website Quality Score</h3>\\n\");\n\n    // Flex container for donut + categories\n    html.push_str(\"<div style=\\\"display:flex;align-items:center;gap:32px;flex-wrap:wrap;\\\">\\n\");\n\n    // Donut chart — track color via qs-bar-track on outer ring\n    html.push_str(&format!(\n        concat!(\n            \"<div class=\\\"qs-bar-track\\\" style=\\\"position:relative;width:140px;height:140px;border-radius:50%;\",\n            \"background:conic-gradient({color} 0deg {deg:.1}deg,transparent {deg:.1}deg 360deg);\",\n            \"flex-shrink:0;\\\">\\n\",\n            \"<div class=\\\"qs-donut-inner\\\" style=\\\"position:absolute;top:50%;left:50%;transform:translate(-50%,-50%);\",\n            \"width:100px;height:100px;border-radius:50%;\",\n            \"display:flex;flex-direction:column;align-items:center;justify-content:center;\\\">\\n\",\n            \"<span style=\\\"font-size:28px;font-weight:bold;color:{color};\\\">{score:.1}</span>\\n\",\n            \"<span style=\\\"font-size:13px;color:{color};\\\">{label}</span>\\n\",\n            \"</div>\\n</div>\\n\",\n        ),\n        color = color,\n        deg = deg,\n        score = overall.score,\n        label = overall.label,\n    ));\n\n    // Category bars container\n    html.push_str(\"<div style=\\\"flex:1;min-width:200px;\\\">\\n\");\n\n    for cat in &scores.categories {\n        let pct = cat.score / 10.0 * 100.0;\n        let cat_color = cat.color_hex();\n        html.push_str(&format!(\n            concat!(\n                \"<div style=\\\"display:flex;align-items:center;margin-bottom:8px;\\\">\\n\",\n                \"<span class=\\\"qs-cat-name\\\" style=\\\"width:120px;font-size:13px;\\\">{name}</span>\\n\",\n                \"<div class=\\\"qs-bar-track\\\" style=\\\"flex:1;height:12px;border-radius:6px;margin:0 10px;overflow:hidden;\\\">\\n\",\n                \"<div style=\\\"width:{pct:.0}%;height:100%;background:{color};border-radius:6px;\\\"></div>\\n\",\n                \"</div>\\n\",\n                \"<span style=\\\"width:36px;color:{color};font-weight:bold;font-size:13px;text-align:right;\\\">{score:.1}</span>\\n\",\n                \"</div>\\n\",\n            ),\n            name = cat.name,\n            pct = pct,\n            color = cat_color,\n            score = cat.score,\n        ));\n    }\n\n    html.push_str(\"</div>\\n\"); // end bars container\n    html.push_str(\"</div>\\n\"); // end flex\n    html.push_str(\"</div>\\n\"); // end outer container\n\n    html\n}\n\n///   1. Inside <script>/<style> blocks: only replace \"> <\" with \"> <\"\n///   2. Collapse all whitespace to single space\n///   3. Replace \"> <\" with \"> <\"\nfn remove_whitespaces_from_html(html: &str) -> String {\n    use once_cell::sync::Lazy;\n    use regex::Regex;\n\n    // Separate regexes for script and style (no backreference needed)\n    static RE_SCRIPT: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?is)<script\\b[^>]*>.*?</script>\").unwrap());\n    static RE_STYLE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?is)<style\\b[^>]*>.*?</style>\").unwrap());\n    static RE_WHITESPACE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"\\s+\").unwrap());\n    static RE_TAG_WHITESPACE: Lazy<Regex> = Lazy::new(|| Regex::new(r\">\\s+<\").unwrap());\n\n    // Step 1: In script blocks, replace \"> <\" with \"> <\"\n    let html = RE_SCRIPT.replace_all(html, |caps: &regex::Captures| {\n        RE_TAG_WHITESPACE.replace_all(&caps[0], \"> <\").to_string()\n    });\n\n    // Step 1b: In style blocks, replace \"> <\" with \"> <\"\n    let html = RE_STYLE.replace_all(&html, |caps: &regex::Captures| {\n        RE_TAG_WHITESPACE.replace_all(&caps[0], \"> <\").to_string()\n    });\n\n    // Step 2: Collapse all whitespace to single space\n    let html = RE_WHITESPACE.replace_all(&html, \" \");\n\n    // Step 3: Replace \"> <\" with \"> <\"\n    let html = RE_TAG_WHITESPACE.replace_all(&html, \"> <\");\n\n    html.to_string()\n}\n\n/// Normalize a detail string for aggregation/deduplication.\n/// 1. For SVG details, return as-is\n/// 2. Remove all HTML attributes except id, class, name (replace with \" *** \")\n/// 3. Extract only the first HTML tag\n/// 4. Replace trailing numbers before quotes with ***\nfn aggregate_detail(detail: &str) -> String {\n    use once_cell::sync::Lazy;\n    use regex::Regex;\n\n    // SVG details pass through unchanged\n    if detail.starts_with(\"<svg\") || detail.contains(\"x SVG \") {\n        return detail.to_string();\n    }\n\n    // Step 1: Remove unwanted attributes, keeping only id, class, name\n    static RE_TAG_ATTRS: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?is)<([a-z][a-z0-9]*)\\s+([^>]*)>\").unwrap());\n    static RE_ATTR: Lazy<Regex> =\n        Lazy::new(|| Regex::new(r#\"(?is)([a-z][-a-z0-9_]*)\\s*=\\s*(\"(?:[^\"]*)\"?|'(?:[^']*)'?)\"#).unwrap());\n\n    let allowed_attrs = [\"id\", \"class\", \"name\"];\n    let svg_tags = [\n        \"svg\",\n        \"g\",\n        \"path\",\n        \"circle\",\n        \"rect\",\n        \"line\",\n        \"polyline\",\n        \"polygon\",\n        \"text\",\n        \"tspan\",\n        \"use\",\n        \"defs\",\n        \"clippath\",\n        \"mask\",\n        \"pattern\",\n        \"marker\",\n        \"lineargradient\",\n        \"radialgradient\",\n        \"stop\",\n        \"image\",\n        \"foreignobject\",\n    ];\n\n    let result = RE_TAG_ATTRS.replace_all(detail, |caps: &regex::Captures| {\n        let tag_name = &caps[1];\n        let attrs_string = &caps[2];\n\n        // Don't modify SVG tags\n        if svg_tags.contains(&tag_name.to_lowercase().as_str()) {\n            return caps[0].to_string();\n        }\n\n        let mut kept_attrs = String::new();\n        let mut any_removed = false;\n\n        for attr_match in RE_ATTR.captures_iter(attrs_string) {\n            let attr_name = &attr_match[1];\n            if allowed_attrs.contains(&attr_name.to_lowercase().as_str()) {\n                kept_attrs.push_str(&attr_match[0]);\n                kept_attrs.push(' ');\n            } else {\n                any_removed = true;\n            }\n        }\n\n        // Also check for valueless attributes (like \"disabled\", \"checked\")\n        // that weren't caught by the key=value regex\n        let kept_trimmed = kept_attrs.trim_end();\n        let suffix = if any_removed { \" *** \" } else { \"\" };\n        if kept_trimmed.is_empty() {\n            if any_removed {\n                format!(\"<{} ***>\", tag_name)\n            } else {\n                format!(\"<{}>\", tag_name)\n            }\n        } else {\n            format!(\"<{} {}{}>\", tag_name, kept_trimmed, suffix)\n        }\n    });\n\n    let mut result = result.to_string();\n\n    // Step 1b: Normalize class attribute values — for each class name containing\n    // a hyphen or underscore, keep only the first segment and replace the rest with *.\n    // E.g. class=\"astro-3ii7xxms\" → class=\"astro-*\", class=\"sl-flex astro-wy4te6ga\" → class=\"sl-* astro-*\"\n    static RE_CLASS_ATTR: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"class=\"([^\"]*)\"\"#).unwrap());\n    result = RE_CLASS_ATTR\n        .replace_all(&result, |caps: &regex::Captures| {\n            let class_value = &caps[1];\n            let normalized_classes: Vec<String> = class_value\n                .split_whitespace()\n                .map(|cls| {\n                    if let Some(pos) = cls.find(['-', '_']) {\n                        format!(\"{}*\", &cls[..=pos])\n                    } else {\n                        cls.to_string()\n                    }\n                })\n                .collect();\n            format!(\"class=\\\"{}\\\"\", normalized_classes.join(\" \"))\n        })\n        .to_string();\n\n    // Step 2: If result starts with '<', extract only the first HTML tag\n    if result.trim_start_matches(&['\"', '\\'', ' '][..]).starts_with('<') {\n        static RE_FIRST_TAG: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?s)^[\\s\"']*(<[^>]+>)\"#).unwrap());\n        if let Some(caps) = RE_FIRST_TAG.captures(&result) {\n            result = caps[1].to_string();\n        }\n    }\n\n    // Step 3: Replace trailing numbers before quotes with ***\n    static RE_TRAILING_NUMS: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"([0-9]+)([\"'])\"#).unwrap());\n    result = RE_TRAILING_NUMS.replace_all(&result, \"***$2\").to_string();\n\n    result\n}\n\n/// Build aggregation key for a detail (severity + md5 of normalized detail).\nfn aggregate_detail_key(severity: &str, detail: &str) -> String {\n    use md5::{Digest, Md5};\n\n    let mut clean_detail = detail.to_string();\n    // Remove clip-path from SVGs for comparison\n    if clean_detail.contains(\"<svg\") {\n        use once_cell::sync::Lazy;\n        use regex::Regex;\n        static RE_CLIPPATH_TAG: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)<clipPath[^>]+>\").unwrap());\n        static RE_CLIPPATH_ATTR: Lazy<Regex> = Lazy::new(|| Regex::new(r#\"(?i)clip-path=\"[^\"]+\"\"#).unwrap());\n        clean_detail = RE_CLIPPATH_TAG.replace_all(&clean_detail, \"\").to_string();\n        clean_detail = RE_CLIPPATH_ATTR.replace_all(&clean_detail, \"\").to_string();\n    }\n\n    let mut hasher = Md5::new();\n    hasher.update(clean_detail.as_bytes());\n    let hash = format!(\"{:x}\", hasher.finalize());\n    format!(\"{} | {}\", severity, hash)\n}\n\n/// JavaScript for image gallery filtering\nconst IMAGE_GALLERY_FILTER_SCRIPT: &str = r#\"<script> function initializeFilters() {\n                const links = document.querySelectorAll('#image-gallery a');\n                const types = new Set();\n                const sources = new Set();\n                const sizeCategories = [\n                    { label: 'any', filter: () => true },\n                    { label: '> 5 MB', filter: size => size > 5 * 1024 * 1024 },\n                    { label: '> 1MB', filter: size => size > 1 * 1024 * 1024 },\n                    { label: '> 500kB', filter: size => size > 500 * 1024 },\n                    { label: '> 100kB', filter: size => size > 100 * 1024 },\n                    { label: '> 10kB', filter: size => size > 10 * 1024 },\n                    { label: '< 10kB', filter: size => size < 10 * 1024 }\n                ];\n\n                links.forEach(link => {\n                    types.add(link.dataset.type);\n                    sources.add(link.dataset.source);\n                });\n\n                addSizeFilters('sizeFilters', sizeCategories, links, filterImagesBySize);\n                addToggleButtonsToFilter('typeFilters', ['any'].concat(Array.from(types).sort((a, b) => countLinksOfType(b, links) - countLinksOfType(a, links))), filterImagesByType, links);\n                addToggleButtonsToFilter('sourceFilters', ['any'].concat(Array.from(sources).sort((a, b) => countLinksOfSource(b, links) - countLinksOfSource(a, links))), filterImagesBySource, links);\n            }\n\n            function addToggleButtonsToFilter(filterId, categories, filterFunction, links) {\n                const filterDiv = document.getElementById(filterId);\n                categories.forEach((category, index) => {\n                    const radioId = filterId + category;\n                    const radioInput = document.createElement('input');\n                    radioInput.setAttribute('type', 'radio');\n                    radioInput.setAttribute('id', radioId);\n                    radioInput.setAttribute('name', filterId);\n                    radioInput.setAttribute('value', category);\n                    if (category === 'any') {\n                        radioInput.setAttribute('checked', 'checked');\n                    }\n                    radioInput.onchange = () => filterFunction(category);\n\n                    const label = document.createElement('label');\n                    label.setAttribute('for', radioId);\n\n                    let labelCountText = category;\n                    if (category !== 'any') {\n                        const count = filterId === 'typeFilters' ? countLinksOfType(category, links) : countLinksOfSource(category, links);\n                        labelCountText += ` (${count})`;\n                    } else {\n                        labelCountText += ' (' + links.length + ')';\n                    }\n                    label.textContent = labelCountText;\n\n                    filterDiv.appendChild(radioInput);\n                    filterDiv.appendChild(label);\n                });\n            }\n\n            function addToggleButton(filterDiv, filterId, value, labelText, filterFunction) {\n                const radioId = filterId + '-' + value.replace(/\\s/g, '-');\n\n                const radioInput = document.createElement('input');\n                radioInput.setAttribute('type', 'radio');\n                radioInput.setAttribute('id', radioId);\n                radioInput.setAttribute('name', filterId);\n                radioInput.setAttribute('value', value);\n                radioInput.addEventListener('change', () => filterFunction(value));\n\n                if (labelText === 'any') {\n                    radioInput.setAttribute('checked', 'checked');\n                }\n\n                const label = document.createElement('label');\n                label.setAttribute('for', radioId);\n                label.textContent = labelText;\n\n                filterDiv.appendChild(radioInput);\n                filterDiv.appendChild(label);\n            }\n\n            function countLinksOfType(type, links) {\n                return Array.from(links).filter(link => link.dataset.type === type).length;\n            }\n\n            function countLinksOfSource(source, links) {\n                return Array.from(links).filter(link => link.dataset.source === source).length;\n            }\n\n            function doesSizeMatchCategory(size, category) {\n                const sizeInKB = size / 1024;\n\n                switch (category) {\n                    case 'any':\n                        return true;\n                    case '> 5 MB':\n                        return sizeInKB > 5120;\n                    case '> 1MB':\n                        return sizeInKB > 1024;\n                    case '> 500kB':\n                        return sizeInKB > 500;\n                    case '> 100kB':\n                        return sizeInKB > 100;\n                    case '> 10kB':\n                        return sizeInKB > 10;\n                    case '< 10kB':\n                        return sizeInKB < 10;\n                    default:\n                        return false;\n                }\n            }\n\n            function filterImagesByType(selectedType) {\n                const links = document.querySelectorAll('#image-gallery a');\n                links.forEach(link => {\n                    if (selectedType === 'any' || link.dataset.type === selectedType) {\n                        link.dataset.typematch = '1';\n                    } else {\n                        link.dataset.typematch = '0';\n                    }\n                });\n                filterByMatched();\n            }\n\n            function filterImagesBySource(selectedSource) {\n                const links = document.querySelectorAll('#image-gallery a');\n                links.forEach(link => {\n                    if (selectedSource === 'any' || link.dataset.source === selectedSource) {\n                        link.dataset.sourcematch = '1';\n                    } else {\n                        link.dataset.sourcematch = '0';\n                    }\n                });\n                filterByMatched();\n            }\n\n            function filterImagesBySize(selectedSizeCategory) {\n                const links = document.querySelectorAll('#image-gallery a');\n                links.forEach(link => {\n                    const imageSize = parseInt(link.dataset.size, 10);\n\n                    if (doesSizeMatchCategory(imageSize, selectedSizeCategory)) {\n                        link.dataset.sizematch = '1';\n                    } else {\n                        link.dataset.sizematch = '0';\n                    }\n                });\n                filterByMatched();\n            }\n\n            function addSizeFilters(filterId, categories, links, filterFunction) {\n                const filterDiv = document.getElementById(filterId);\n                categories.forEach(category => {\n                    const count = Array.from(links).filter(link => category.filter(parseInt(link.dataset.size, 10))).length;\n                    const labelWithCount = `${category.label} (${count})`;\n                    if (count > 0) {\n                        addToggleButton(filterDiv, filterId, category.label, labelWithCount, filterFunction);\n                    }\n                });\n            }\n\n            function filterByMatched() {\n                const links = document.querySelectorAll('#image-gallery a');\n                links.forEach(link => {\n                    if (link.dataset.sizematch === '1' && link.dataset.typematch === '1' && link.dataset.sourcematch === '1') {\n                        link.style.display = 'inline-block'\n                    } else {\n                        link.style.display = 'none';\n                    }\n                });\n            }\n\n            document.addEventListener('DOMContentLoaded', function() {\n                initializeFilters();\n            });\n\n            </script>\"#;\n\n/// JavaScript for video gallery\nconst VIDEO_GALLERY_SCRIPT: &str = r#\"<script> function playVideos() {\n            const videos = document.querySelectorAll(\"video\");\n\n            function playVideoSequentially(index) {\n                if (index >= videos.length) return;\n\n                const video = videos[index];\n                video.load();\n                video.currentTime = 0;\n\n                video.addEventListener(\"loadeddata\", function() {\n                    video.play();\n\n                    setTimeout(() => {\n                        video.pause();\n                        setTimeout(() => playVideoSequentially(index + 1), 10);\n                    }, 2000);\n                }, { once: true });\n            }\n\n            playVideoSequentially(0);\n        }\n\n        /* init lazy loading */\n        document.addEventListener(\"DOMContentLoaded\", function() {\n            const videos = document.querySelectorAll(\"video\");\n\n            const observer = new IntersectionObserver(entries => {\n                entries.forEach(entry => {\n                    if (entry.isIntersecting) {\n                        const video = entry.target;\n                        if (!video.src) {\n                            video.src = video.dataset.src;\n                            video.load();\n                        }\n                        observer.unobserve(video);\n                    }\n                });\n            });\n\n            videos.forEach(video => {\n                observer.observe(video);\n            });\n        });\n\n        </script>\"#;\n"
  },
  {
    "path": "src/export/html_report/tab.rs",
    "content": "// SiteOne Crawler - Tab for HTML Report\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\nuse regex::Regex;\r\n\r\nuse super::badge::Badge;\r\n\r\n/// A tab in the HTML report\r\n#[derive(Debug, Clone)]\r\npub struct Tab {\r\n    pub name: String,\r\n    pub description: Option<String>,\r\n    pub radio_html_id: String,\r\n    pub content_html_id: String,\r\n    pub tab_content: String,\r\n    pub add_heading: bool,\r\n    pub fixed_order: Option<i32>,\r\n    pub order: Option<i32>,\r\n    pub badges: Vec<Badge>,\r\n}\r\n\r\nimpl Tab {\r\n    pub fn new(\r\n        name: &str,\r\n        description: Option<&str>,\r\n        tab_content: String,\r\n        add_heading: bool,\r\n        badges: Vec<Badge>,\r\n        fixed_order: Option<i32>,\r\n    ) -> Self {\r\n        let sanitized = sanitize_id(name);\r\n        let radio_html_id = format!(\"radio_{}\", sanitized);\r\n        let content_html_id = format!(\"content_{}\", sanitized);\r\n\r\n        Self {\r\n            name: name.to_string(),\r\n            description: description.map(|s| s.to_string()),\r\n            radio_html_id,\r\n            content_html_id,\r\n            tab_content,\r\n            add_heading,\r\n            fixed_order,\r\n            order: None,\r\n            badges,\r\n        }\r\n    }\r\n\r\n    pub fn set_order(&mut self, order: Option<i32>) {\r\n        self.order = order;\r\n    }\r\n\r\n    /// Returns the final sort order: order > fixed_order > 1000 (default)\r\n    pub fn get_final_sort_order(&self) -> i32 {\r\n        if let Some(order) = self.order {\r\n            order\r\n        } else {\r\n            self.fixed_order.unwrap_or(1000)\r\n        }\r\n    }\r\n}\r\n\r\n/// Sanitize a tab name into a valid HTML ID\r\nfn sanitize_id(name: &str) -> String {\r\n    let re = Regex::new(r\"[^a-zA-Z0-9\\-]+\").unwrap_or_else(|_| Regex::new(r\"\\W+\").unwrap());\r\n    re.replace_all(name, \"_\").to_lowercase()\r\n}\r\n"
  },
  {
    "path": "src/export/html_report/template.html",
    "content": "<!DOCTYPE html>\r\n<html lang=\"en\">\r\n<head>\r\n    <meta charset=\"UTF-8\">\r\n    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\r\n    <meta name=\"author\" content=\"Ján Regeš, https://www.siteone.io/\">\r\n    <title>SiteOne Crawler Report - {$initialHost} | {$executedAt}</title>\r\n\r\n    <style>\r\n        :root {\r\n            /* colors */\r\n            --color-white: #fff;\r\n            --color-black: #000000;\r\n            --color-gray-50: #f9fafb;\r\n            --color-gray-100: #eeecec;\r\n            --color-gray-200: #E5E7EB;\r\n            --color-gray-300: #D1D5DB;\r\n            --color-gray-400: #9CA3AF;\r\n            --color-gray-500: #6B7280;\r\n            --color-gray-600: #4B5563;\r\n            --color-gray-700: #374151;\r\n            --color-gray-800: #1F2937;\r\n            --color-gray-900: #111827;\r\n            --color-gray-950: rgb(9, 15, 32);\r\n            --color-blue-100: #DBEAFE;\r\n            --color-blue-500: #3B82F6;\r\n            --color-blue-600: #2563EB;\r\n            --color-blue-950: rgb(23, 37, 84);\r\n            /* text-colors */\r\n            --text-color-default: var(--color-gray-900);\r\n            --text-color-neutral: var(--color-gray-300);\r\n            --text-color-blue: var(--color-blue-500);\r\n            --text-color-blue-dark: var(--color-blue-600);\r\n            /* background-colors */\r\n            --background-color-default: var(--color-white);\r\n            --background-color-neutral: var(--color-gray-100);\r\n\r\n            --border-radius: 1.5rem;\r\n            --padding-inline: 2rem;\r\n        }\r\n\r\n        html * {\r\n            box-sizing: border-box;\r\n        }\r\n\r\n        body {\r\n            margin: 0;\r\n            padding: 0;\r\n            width: 100%;\r\n            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;\r\n            background-color: var(--background-color-neutral);\r\n            color: var(--text-color-default);\r\n            transition: background-color 0.2s ease-out, color 0.2s ease-out;\r\n        }\r\n\r\n        main {\r\n            display: flex;\r\n            flex-direction: column;\r\n            gap: 2rem;\r\n        }\r\n\r\n        section {\r\n            background-color: var(--background-color-default);\r\n            padding: 2rem var(--padding-inline);\r\n            border-radius: var(--border-radius);\r\n            transition: background-color 0.2s ease-out;\r\n        }\r\n\r\n        body h2 {\r\n            margin-top: 0;\r\n        }\r\n\r\n        body a {\r\n            color: var(--text-color-default);\r\n            transition: color 0.2s ease-out;\r\n        }\r\n\r\n        table {\r\n            border-collapse: collapse;\r\n            min-width: 300px;\r\n        }\r\n\r\n        table.table-compact {\r\n            font-size: 0.7em;\r\n        }\r\n\r\n        table,\r\n        table th,\r\n        table td {\r\n            border: 1px solid var(--color-gray-100);\r\n            padding: 2px 4px !important;\r\n            vertical-align: top;\r\n            text-align: left;\r\n            transition: border-color 0.2s ease-out, background-color 0.2s ease-out;\r\n        }\r\n\r\n        table td.warning {\r\n            text-align: center;\r\n            color: var(--text-color-neutral);\r\n        }\r\n\r\n        table td svg {\r\n            max-width: 100px;\r\n            max-height: 18px;\r\n        }\r\n\r\n        .table-sortable th {\r\n            cursor: pointer;\r\n        }\r\n\r\n        html:has(.theme-switch__input:checked) table,\r\n        html:has(.theme-switch__input:checked) table td,\r\n        html:has(.theme-switch__input:checked) table th {\r\n            border-color: var(--background-color-neutral);\r\n        }\r\n\r\n        table th {\r\n            background-color: var(--background-color-neutral);\r\n        }\r\n\r\n        html:has(.theme-switch__input:checked) table th {\r\n            background-color: var(--background-color-neutral);\r\n        }\r\n\r\n        table th table.table-two-col th {\r\n            width: 20%;\r\n        }\r\n\r\n        .container {\r\n            display: flex;\r\n            flex-direction: column;\r\n            gap: 2rem;\r\n            max-width: 187.5rem;\r\n            margin: 0 auto;\r\n            padding: 1.5rem 4rem;\r\n        }\r\n\r\n        header.header {\r\n            display: flex;\r\n            align-items: center;\r\n            justify-content: space-between;\r\n            background-color: var(--background-color-default);\r\n            padding-inline: var(--padding-inline);\r\n            padding-block: 2rem;\r\n            border-radius: 1000px;\r\n            border: 1px solid var(--color-gray-100);\r\n            transition: background-color 0.2s ease-out, border-color 0.2s ease-out;\r\n        }\r\n\r\n        header.header a.logo {\r\n            display: flex;\r\n        }\r\n\r\n        header.header h1 {\r\n            margin: 0;\r\n        }\r\n\r\n        html:has(.theme-switch__input:checked) .body {\r\n            --text-color-default: var(--color-gray-200);\r\n            --text-color-neutral: var(--color-gray-600);\r\n            --background-color-default: var(--color-gray-900);\r\n            --background-color-neutral: var(--color-gray-800);\r\n        }\r\n\r\n        html:has(.theme-switch__input:checked) header.header {\r\n            border-color: transparent;\r\n        }\r\n\r\n        .theme-switch__dot--light {\r\n            transform: translateX(0rem);\r\n        }\r\n\r\n        html:has(.theme-switch__input:checked) .theme-switch__dot{\r\n            transform: translateX(2rem);\r\n            background-color: var(--color-gray-300);\r\n        }\r\n\r\n        .theme-switch label {\r\n            display: flex;\r\n            align-items: center;\r\n            gap: .375rem;\r\n            cursor: pointer;\r\n        }\r\n\r\n        .theme-switch__wrapper {\r\n            display: flex;\r\n            align-items: center;\r\n            position: relative;\r\n            border-radius: var(--border-radius);\r\n            background-color: var(--color-gray-300);\r\n            width: 4rem;\r\n            height: 2rem;\r\n            transition: all 0.2s ease-out;\r\n        }\r\n\r\n        html:has(.theme-switch__input:checked) .theme-switch__wrapper {\r\n            background-color: var(--color-gray-600);\r\n        }\r\n\r\n        .theme-switch__input {\r\n            display: none;\r\n        }\r\n\r\n        .theme-switch__dot {\r\n            position: absolute;\r\n            height: 1.5rem;\r\n            width: 1.5rem;\r\n            left: .25rem;\r\n            background-color: var(--color-gray-600);\r\n            border-radius: 50%;\r\n            transition: all .2s ease-out;\r\n        }\r\n\r\n        .theme-switch__dot--light {\r\n            transform: translateX(0rem);\r\n        }\r\n\r\n        .theme-switch__dot--dark {\r\n            transform: translateX(2rem);\r\n            background-color: var(--color-gray-300);\r\n        }\r\n\r\n        .info__wrapper {\r\n            overflow-x: auto;\r\n        }\r\n\r\n        .summary ul {\r\n            padding-left: 20px;\r\n        }\r\n\r\n        .tabs {\r\n            display: grid;\r\n            grid-template-columns: minmax(0, 18.225rem) 1fr;\r\n            gap: 2rem;\r\n        }\r\n\r\n        .tabs__navigation {\r\n            display: flex;\r\n            flex-direction: column;\r\n            gap: 0.5rem;\r\n            border-right: 1px solid var(--color-gray-700);\r\n            padding-right: 2rem;\r\n        }\r\n\r\n        .tabs__radio {\r\n            display: none;\r\n        }\r\n\r\n        .tabs__tab {\r\n            display: none;\r\n        }\r\n\r\n        .tabs__content {\r\n            overflow: auto;\r\n            font-size: 0.8em;\r\n        }\r\n\r\n        .tabs__content ul {\r\n            padding-left: 0px;\r\n        }\r\n        .tabs__content ul li {\r\n            list-style: none;\r\n        }\r\n\r\n        .tabs__title {\r\n            margin: 0;\r\n            font-size: 1rem;\r\n            padding: .5rem .75rem;\r\n            background-color: var(--background-color-neutral);\r\n            border-radius: var(--border-radius);\r\n            transition: background-color .2s ease-out;\r\n            cursor: pointer;\r\n        }\r\n\r\n\r\n        .tabs__title:hover {\r\n            background-color: var(--color-blue-500);\r\n            color: var(--color-white);\r\n        }\r\n\r\n        html:has(.theme-switch__input:checked) .tabs__title {\r\n            background-color: var(--background-color-neutral);\r\n        }\r\n\r\n        html:has(.theme-switch__input:checked) .tabs__title:hover {\r\n            background-color: var(--color-blue-600);\r\n            color: var(--color-white);\r\n        }\r\n\r\n        .tabs__table {\r\n            border-radius: var(--border-radius);\r\n            font-size: 0.875rem;\r\n        }\r\n\r\n        .badge {\r\n            display: inline-block;\r\n            padding: 2px 4px;\r\n            color: var(--color-white);\r\n            border-radius: 8px;\r\n            font-size: 0.75rem;\r\n            line-height: 1;\r\n            min-width: 18px;\r\n            text-align: center;\r\n        }\r\n        html:has(.theme-switch__input:checked) .badge {\r\n            color: var(--color-white);\r\n        }\r\n        .badge.red {\r\n            background-color: #e3342f;\r\n        }\r\n        .badge.green {\r\n            background-color: #38c172;\r\n        }\r\n        .badge.blue {\r\n            background-color: #3490dc;\r\n        }\r\n        .badge.orange {\r\n            background-color: #ff9234;\r\n        }\r\n        .badge.yellow {\r\n            background-color: #ffed4a;\r\n            color: var(--color-gray-800) !important;\r\n        }\r\n        .badge.neutral {\r\n            background-color: #9ba7b4;\r\n        }\r\n\r\n        html:has(.theme-switch__input:checked) .badge.neutral {\r\n            background-color: #718096;\r\n        }\r\n\r\n        .badge.in-table {\r\n            background-color: var(--background-color-neutral);\r\n        }\r\n\r\n        .fulltext-container {\r\n            display: none;\r\n            margin-bottom: 8px;\r\n        }\r\n\r\n        .fulltext-container .found-rows {\r\n            margin-bottom: 8px;\r\n        }\r\n\r\n        .js-enabled .fulltext-container {\r\n            display: block;\r\n        }\r\n\r\n        input.fulltext {\r\n            width: 100%;\r\n            padding: 0.4rem;\r\n            border-radius: 2px;\r\n            border: 1px solid var(--background-color-neutral);\r\n            background-color: var(--background-color-default);\r\n            color: var(--text-color-blue);\r\n            transition: border-color 0.2s ease-out;\r\n            margin-right: 8px;\r\n        }\r\n\r\n        input.fulltext:focus, input.fulltext:focus-visible {\r\n            outline: none;\r\n            box-shadow: 0 0 0 1px rgba(255, 255, 255, 0.1);\r\n            border-color: var(--background-color-neutral) !important;\r\n        }\r\n\r\n        table tr.empty-fulltext {\r\n            display: none;\r\n        }\r\n\r\n        ::placeholder {\r\n            color: var(--text-color-neutral);\r\n            opacity: 1;\r\n        }\r\n\r\n        ::-ms-input-placeholder {\r\n            color: var(--text-color-neutral);\r\n        }\r\n\r\n        @media screen and (max-width: 64rem) {\r\n            main {\r\n                gap: 2.5rem;\r\n            }\r\n\r\n            h1 {\r\n                font-size: 1.25rem;\r\n            }\r\n\r\n            .container {\r\n                padding: 2.5rem;\r\n            }\r\n\r\n            .tabs {\r\n                grid-template-columns: 1fr;\r\n            }\r\n\r\n            .tabs__navigation {\r\n                justify-content: center;\r\n                flex-direction: row;\r\n                flex-wrap: wrap;\r\n                padding: 0;\r\n                border: none;\r\n                text-align: center;\r\n            }\r\n\r\n            .tabs__tab {\r\n                justify-content: center;\r\n            }\r\n        }\r\n\r\n        @media screen and (max-width: 40rem) {\r\n            :root {\r\n                --padding-inline: 1rem;\r\n            }\r\n\r\n            main {\r\n                gap: 2rem;\r\n            }\r\n\r\n            h1 {\r\n                font-size: 1.25rem;\r\n                text-align: center;\r\n            }\r\n\r\n            h2 {\r\n                font-size: 1.125rem;\r\n            }\r\n\r\n            .container {\r\n                padding: 1.25rem;\r\n                gap: 2rem;\r\n            }\r\n\r\n            header.header {\r\n                display: grid;\r\n                grid-template-columns: 1fr 1fr;\r\n                grid-template-rows: auto auto;\r\n                gap: 1rem;\r\n                padding-block: 1.5rem;\r\n                border-radius: var(--border-radius);\r\n            }\r\n\r\n            header.header .theme-switch {\r\n                justify-self: end;\r\n            }\r\n\r\n            header.header h1 {\r\n                grid-row: 2;\r\n                grid-column: 1 / 3;\r\n            }\r\n\r\n            .tabs__title {\r\n                font-size: .875rem;\r\n                padding: .25rem .5rem;\r\n            }\r\n        }\r\n\r\n        table.seo-headings td ul {\r\n            margin: 0;\r\n        }\r\n\r\n        table.seo-headings td ul ul {\r\n            padding-left: 20px;\r\n        }\r\n        table.seo-headings td ul li {\r\n            list-style: none;\r\n        }\r\n        table.seo-headings td.headings .badge {\r\n            text-align: left;\r\n            background-color: transparent;\r\n            color: #ff9234 !important;\r\n        }\r\n\r\n        .table-container.show-more {\r\n            display: block;\r\n            max-height: 658px !important; /* based on typical sidebar height */\r\n            overflow: hidden;\r\n            transition: max-height 0.5s cubic-bezier(0, 1, 0, 1);\r\n        }\r\n\r\n        .show-more-label {\r\n            display: block;\r\n            cursor: pointer;\r\n            text-align: left;\r\n            padding: 10px 50px 10px 0;\r\n            color: var(--color-blue-600);\r\n        }\r\n\r\n        .show-more-checkbox {\r\n            display: none;\r\n        }\r\n\r\n        .show-more-checkbox:checked ~ .table-container.show-more {\r\n            transition: max-height 1s ease;\r\n            max-height: none !important;\r\n            /* overflow: auto; */\r\n        }\r\n\r\n        .show-more-checkbox:checked ~ .show-more-label {\r\n            display: none;\r\n        }\r\n\r\n        table tr td.urlPathAndQuery, table tr td.url {\r\n            overflow: hidden;\r\n            text-overflow: ellipsis;\r\n            white-space: nowrap;\r\n        }\r\n\r\n        table tr td.urlPathAndQuery {\r\n            max-width: 300px;\r\n        }\r\n\r\n        /*\r\n        table tr td.url {\r\n            max-width: 800px;\r\n        }\r\n         */\r\n\r\n        table.large-inline-svgs tr td.detail, table.duplicate-inline-svgs tr td.detail, table.invalid-inline-svgs tr td.detail  {\r\n            min-width: 150px;\r\n            /*\r\n            display: flex;\r\n            justify-content: space-between;\r\n            align-items: center;\r\n             */\r\n        }\r\n\r\n        table.large-inline-svgs tr td.detail svg, table.duplicate-inline-svgs tr td.detail svg, table.invalid-inline-svgs tr td.detail svg {\r\n            float: right;\r\n            /* margin-left: auto; */\r\n        }\r\n\r\n        table tr td.header {\r\n            min-width: 120px;\r\n            white-space: nowrap;\r\n        }\r\n\r\n        td.status, td.type, td.time, td.size {\r\n            white-space: nowrap;\r\n        }\r\n\r\n        .text-muted {\r\n            color: var(--color-gray-600);\r\n        }\r\n\r\n        .help {\r\n            cursor: help;\r\n        }\r\n\r\n        .form-group {\r\n            flex-grow: 0;\r\n            flex-basis: auto;\r\n        }\r\n\r\n        .btn-group {\r\n            display: flex;\r\n            flex-wrap: wrap;\r\n            justify-content: flex-start;\r\n            gap: 1px;\r\n            border: 0;\r\n        }\r\n\r\n        .btn-group label {\r\n            padding: 3px 6px;\r\n            cursor: pointer;\r\n            background-color: var(--background-color-neutral);\r\n            color: var(--text-color-default);\r\n            transition: background-color 0.3s, color 0.3s;\r\n            white-space: nowrap;\r\n            z-index: 1000;\r\n        }\r\n\r\n        .btn-group label:hover,\r\n        .btn-group input[type=\"radio\"]:checked + label {\r\n            background-color: var(--color-blue-600);\r\n            color: var(--color-white);\r\n        }\r\n\r\n        .btn-group input[type=\"radio\"] {\r\n            display: none;\r\n        }\r\n\r\n        .btn-group label:first-of-type {\r\n            border-radius: 4px 0 0 4px;\r\n        }\r\n\r\n        .btn-group label:last-of-type {\r\n            border-radius: 0 4px 4px 0;\r\n        }\r\n\r\n        .btn {\r\n            padding: 0.5rem 1rem;\r\n            border-radius: 1rem;\r\n            border: 0;\r\n            background-color: var(--color-blue-600);\r\n            color: var(--color-white);\r\n            cursor: pointer;\r\n            margin-bottom: 1rem;\r\n\r\n        }\r\n\r\n        .image-gallery {\r\n            display: flex;\r\n            flex-wrap: wrap;\r\n            gap: 6px;\r\n        }\r\n\r\n        .image-gallery img {\r\n            object-fit: contain;\r\n            transition: transform 0.3s ease;\r\n            cursor: help;\r\n        }\r\n\r\n        .small .image-gallery img {\r\n            max-width: 140px;\r\n            max-height: 140px;\r\n        }\r\n\r\n        .medium .image-gallery img {\r\n            max-width: 200px;\r\n            max-height: 200px;\r\n        }\r\n\r\n        .large .image-gallery img {\r\n            max-width: 360px;\r\n            max-height: 360px;\r\n        }\r\n\r\n        .cover .image-gallery img {\r\n            object-fit: cover;\r\n        }\r\n\r\n        .contain .image-gallery img {\r\n            object-fit: contain;\r\n        }\r\n\r\n        .scaleDown .image-gallery img {\r\n            object-fit: scale-down;\r\n        }\r\n\r\n        .image-gallery img:hover {\r\n            transform: scale(1.3);\r\n            z-index: -1000;\r\n        }\r\n\r\n        .image-gallery .highlighted {\r\n            border: 2px solid red;\r\n            padding: 4px;\r\n            box-sizing: border-box;\r\n        }\r\n\r\n        .video-container {\r\n            display: flex;\r\n            flex-wrap: wrap;\r\n            gap: 10px;\r\n        }\r\n\r\n        .video-card {\r\n            display: flex;\r\n            flex-direction: column;\r\n            border: 1px solid var(--background-color-default);\r\n            padding: 10px;\r\n            background-color: var(--background-color-neutral);\r\n            box-sizing: border-box;\r\n            min-width: 400px;\r\n            flex: 1 1 400px;\r\n        }\r\n\r\n        .video-card video {\r\n            min-width: 100%;\r\n            min-height: 300px;\r\n        }\r\n\r\n        .video-caption {\r\n            margin-top: 10px;\r\n            text-align: center;\r\n            font-size: 14px;\r\n            color: var(--text-color-default);\r\n            flex-grow: 1;\r\n        }\r\n\r\n        .video-container {\r\n            align-items: stretch;\r\n        }\r\n\r\n        .iconset-preview {\r\n            display: flex;\r\n            flex-wrap: wrap;\r\n            width: 100%;\r\n            justify-content: flex-start;\r\n            align-items: center;\r\n            gap: 6px;\r\n        }\r\n\r\n        .iconset-icon {\r\n            max-width: 20px;\r\n            max-height: 20px;\r\n            height: auto;\r\n        }\r\n\r\n        {$tabsCss}\r\n\r\n    </style>\r\n</head>\r\n\r\n<body class=\"body\">\r\n<script>\r\n    document.getElementsByTagName('body')[0].setAttribute('class', 'body js-enabled');\r\n</script>\r\n<div class=\"container\">\r\n    <header class=\"header\">\r\n        <a href=\"https://crawler.siteone.io/?utm_source=siteone_crawler&utm_medium=logo&utm_campaign=crawler_report&utm_content=v{$version}\"\r\n           target=\"_blank\" class=\"logo\" aria-label=\"Clickable logo of SiteOne Crawler linking to crawler.siteone.io\">\r\n            <svg width=\"70px\" height=\"34px\" fill=\"none\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" x=\"0px\" y=\"0px\"\r\n                 viewBox=\"0 0 119 59\" xml:space=\"preserve\">\r\n                <path d=\"M49.4 29.1L49.4 29.1L49.4 29.1l8.8-8.8l0 0h0V0h-9.9v16.2l-5.9 5.9L29.1 8.9L15.9 22.1l-5.9-5.9V0H0v20.2h0l0 0l8.8 8.8\r\n                    l0 0l0 0L0 37.9l0 0h0v20.2h9.9V42l5.9-5.9l13.3 13.3l13.3-13.3l5.9 5.9v16.2h9.9V38h0l0 0L49.4 29.1z M29.1 35.4l-6.3-6.3l6.3-6.3\r\n                    l6.3 6.3L29.1 35.4z\" fill=\"var(&#45;&#45;text-color-default)\"/>\r\n                <path fill-rule=\"evenodd\" clip-rule=\"evenodd\"\r\n                      d=\"M92.3 15v33.2H75.5v10H119v-10h-16.4V0h-9.3L67.1 26.2l7 7C74.1 33.2 92.3 15 92.3 15z\"\r\n                      fill=\"var(&#45;&#45;text-color-neutral)\"/>\r\n            </svg>\r\n        </a>\r\n        <h1>Crawler Report for <a href=\"{$initialUrl}\" aria-label=\"Link to Crawler target website {$initialHost}\">{$initialHost}</a></h1>\r\n        <div class=\"theme-switch\">\r\n            <label>\r\n                <input type=\"checkbox\" class=\"theme-switch__input\" arial-label=\"Theme switch - dark mode is enabled by default\" checked>\r\n                <svg width=\"24\" height=\"24\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\"\r\n                     stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"feather feather-sun\">\r\n                    <circle cx=\"12\" cy=\"12\" r=\"5\"></circle>\r\n                    <line x1=\"12\" y1=\"1\" x2=\"12\" y2=\"3\"></line>\r\n                    <line x1=\"12\" y1=\"21\" x2=\"12\" y2=\"23\"></line>\r\n                    <line x1=\"4.22\" y1=\"4.22\" x2=\"5.64\" y2=\"5.64\"></line>\r\n                    <line x1=\"18.36\" y1=\"18.36\" x2=\"19.78\" y2=\"19.78\"></line>\r\n                    <line x1=\"1\" y1=\"12\" x2=\"3\" y2=\"12\"></line>\r\n                    <line x1=\"21\" y1=\"12\" x2=\"23\" y2=\"12\"></line>\r\n                    <line x1=\"4.22\" y1=\"19.78\" x2=\"5.64\" y2=\"18.36\"></line>\r\n                    <line x1=\"18.36\" y1=\"5.64\" x2=\"19.78\" y2=\"4.22\"></line>\r\n                </svg>\r\n                <div class=\"theme-switch__wrapper\">\r\n                    <div class=\"theme-switch__dot\"></div>\r\n                </div>\r\n                <svg width=\"24\" height=\"24\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\"\r\n                     stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"feather feather-moon\">\r\n                    <path d=\"M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z\"></path>\r\n                </svg>\r\n            </label>\r\n        </div>\r\n    </header>\r\n    <main>\r\n        <section class=\"tabs\">\r\n            {$tabsRadios}\r\n            <nav class=\"tabs__navigation\" aria-label=\"Navigation\">\r\n                {$tabs}\r\n            </nav>\r\n            <div class=\"tabs__content\">\r\n                {$tabsContent}\r\n            </div>\r\n        </section>\r\n    </main>\r\n    <footer aria-label=\"Footer with basic info about report and crawler\">\r\n        <br/>\r\n        <hr/>\r\n        The report was generated <strong>{$executedAt}</strong> using the <span style=\"color: red;\">♥</span>\r\n        <a href=\"https://github.com/janreges/siteone-crawler\" aria-label=\"Link to SiteOne Crawler Github repository\"><strong>SiteOne Crawler</strong></a>\r\n        by Ján Regeš from <a\r\n            href=\"https://www.siteone.io/?utm_source=siteone_crawler&utm_medium=email&utm_campaign=crawler_report&utm_content=v{$version}\" aria-label=\"Link to SiteOne.io - main author of this crawler\"><strong>SiteOne</strong></a>\r\n        (Czech Republic).<br/>\r\n        <br/>\r\n    </footer>\r\n    <script>\r\n        function sortTable(tableId, columnKey) {\r\n\r\n            const table = document.querySelector('#' + tableId);\r\n            const tbody = table.querySelector('tbody');\r\n            const rows = Array.from(tbody.querySelectorAll('tr'));\r\n            const headerCells = Array.from(table.querySelectorAll('thead th'));\r\n            const columnIndex = Array.from(table.querySelectorAll('thead th')).findIndex(th => th.getAttribute('data-key') === columnKey);\r\n            const direction = headerCells[columnIndex].getAttribute('data-direction');\r\n            const dataType = headerCells[columnIndex].getAttribute('data-type');\r\n\r\n            rows.sort((a, b) => {\r\n                if (a.children.length !== b.children.length) {\r\n                    return 0;\r\n                }\r\n                let aValue = a.children[columnIndex].getAttribute('data-value');\r\n                let bValue = b.children[columnIndex].getAttribute('data-value');\r\n\r\n                if (dataType === 'number') {\r\n                    aValue = parseFloat(aValue);\r\n                    bValue = parseFloat(bValue);\r\n                }\r\n\r\n                if (direction === 'ASC') {\r\n                    return aValue > bValue ? 1 : aValue < bValue ? -1 : 0;\r\n                } else {\r\n                    return aValue < bValue ? 1 : aValue > bValue ? -1 : 0;\r\n                }\r\n            });\r\n\r\n            rows.forEach(row => tbody.appendChild(row));\r\n\r\n            headerCells.forEach(th => {\r\n                let label = th.getAttribute('data-label');\r\n                if (th.getAttribute('data-key') === columnKey) {\r\n                    th.innerHTML = label + (direction === 'ASC' ? '&nbsp;🔼' : '&nbsp;🔽');\r\n                    th.setAttribute('data-direction', direction === 'ASC' ? 'DESC' : 'ASC');\r\n                } else {\r\n                    th.textContent = label;\r\n                }\r\n            });\r\n        }\r\n\r\n        function debounce(callback, timeout) {\r\n            let timerId;\r\n            return function (...args) {\r\n                clearTimeout(timerId);\r\n                timerId = setTimeout(() => {\r\n                    callback.apply(this, args);\r\n                }, timeout);\r\n            };\r\n        }\r\n\r\n        function tableFulltext(tableId, searchTerm) {\r\n            var table = document.getElementById(tableId);\r\n            if (!table) {\r\n                console.warn('Table with id \"' + tableId + '\" was not found.');\r\n                return;\r\n            }\r\n            searchTerm = searchTerm.trim().toLowerCase();\r\n\r\n            /* index table rows for effective fulltext search */\r\n            if (table.getAttribute('data-fulltext-ready') !== '1') {\r\n                var rows = table.getElementsByTagName('tr');\r\n                for (var i = 0; i < rows.length; i++) {\r\n                    var cells = rows[i].getElementsByTagName('td');\r\n                    var fulltextValue = '';\r\n                    for (var j = 0; j < cells.length; j++) {\r\n                        fulltextValue += cells[j].textContent + ' ';\r\n                    }\r\n                    fulltextValue = fulltextValue.trim();\r\n                    rows[i].setAttribute('data-fulltext', fulltextValue.toLowerCase());\r\n                }\r\n                table.setAttribute('data-fulltext-ready', '1');\r\n            }\r\n\r\n            var terms = searchTerm.split(' ');\r\n\r\n            var tbody = table.getElementsByTagName('tbody')[0];\r\n            var rows = tbody.getElementsByTagName('tr');\r\n            var foundRows = 0;\r\n            for (var i = 0; i < rows.length; i++) {\r\n                var rowFulltext = rows[i].getAttribute('data-fulltext') || '';\r\n                var display = terms.every(function(term) {\r\n                    return rowFulltext.includes(term);\r\n                }) ? '' : 'none';\r\n\r\n                if (display === '') {\r\n                    foundRows++;\r\n                }\r\n\r\n                rows[i].style.display = display;\r\n            }\r\n\r\n            var emptyFulltextRow = table.getElementsByClassName('empty-fulltext')[0];\r\n            emptyFulltextRow.style.display = foundRows > 0 ? 'none' : 'table-row';\r\n\r\n            document.getElementById('foundRows_' + tableId).textContent = 'Found ' + foundRows + ' row(s).';\r\n        }\r\n\r\n        function debouncedTableFulltext(tableId, searchTerm) {\r\n            debounce(tableFulltext, 250)(tableId, searchTerm);\r\n        }\r\n\r\n        document.addEventListener('DOMContentLoaded', function () {\r\n            /* add event listeners to fulltext inputs above super tables */\r\n            function onFulltextKeyup(event) {\r\n                const dataUqId = event.target.getAttribute('data-uq-id');\r\n                const inputValue = event.target.value;\r\n                debouncedTableFulltext(dataUqId, inputValue);\r\n            }\r\n\r\n            const inputs = document.querySelectorAll('input.fulltext[data-uq-id]');\r\n            inputs.forEach(input => {\r\n                input.addEventListener('keyup', onFulltextKeyup);\r\n            });\r\n\r\n            /* add event listeners to sortable table headers */\r\n            function onTableHeaderClick(event) {\r\n                const dataUqId = event.target.getAttribute('data-uq-id');\r\n                const dataKey = event.target.getAttribute('data-key');\r\n                sortTable(dataUqId, dataKey);\r\n            }\r\n\r\n            const tableHeaders = document.querySelectorAll('th.sortable-th[data-uq-id]');\r\n            tableHeaders.forEach(th => {\r\n                th.addEventListener('click', onTableHeaderClick);\r\n            });\r\n\r\n            /* add event listeners to image gallery filters */\r\n            function onImageFilterClick(event) {\r\n                const dataKey = event.target.getAttribute('data-key');\r\n                updateClassName(dataKey, event.target.value);\r\n            }\r\n\r\n            const imgFilters = document.querySelectorAll('input.idf[data-key]');\r\n            imgFilters.forEach(f => {\r\n                f.addEventListener('change', onImageFilterClick);\r\n            });\r\n        });\r\n\r\n    </script>\r\n</div>\r\n</body>\r\n\r\n</html>"
  },
  {
    "path": "src/export/mailer_exporter.rs",
    "content": "// SiteOne Crawler - MailerExporter\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Sends crawl report via SMTP email using the lettre crate.\n\nuse std::sync::atomic::{AtomicBool, Ordering};\n\nuse lettre::message::{Attachment, MultiPart, SinglePart, header::ContentType};\nuse lettre::transport::smtp::authentication::Credentials;\nuse lettre::{Message, SmtpTransport, Transport};\n\nuse crate::error::{CrawlerError, CrawlerResult};\nuse crate::export::exporter::Exporter;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::version;\n\n/// Global flag to prevent sending emails when crawler is interrupted (CTRL+C).\nstatic CRAWLER_INTERRUPTED: AtomicBool = AtomicBool::new(false);\n\npub fn set_crawler_interrupted(interrupted: bool) {\n    CRAWLER_INTERRUPTED.store(interrupted, Ordering::SeqCst);\n}\n\npub fn is_crawler_interrupted() -> bool {\n    CRAWLER_INTERRUPTED.load(Ordering::SeqCst)\n}\n\npub struct MailerExporter {\n    /// Recipient email addresses (--mail-to, can be multiple)\n    pub mail_to: Vec<String>,\n    /// Sender email address (--mail-from)\n    pub mail_from: String,\n    /// Sender display name (--mail-from-name)\n    pub mail_from_name: String,\n    /// SMTP host (--mail-smtp-host)\n    pub mail_smtp_host: String,\n    /// SMTP port (--mail-smtp-port)\n    pub mail_smtp_port: u16,\n    /// SMTP username (--mail-smtp-user)\n    pub mail_smtp_user: Option<String>,\n    /// SMTP password (--mail-smtp-pass)\n    pub mail_smtp_pass: Option<String>,\n    /// Email subject template (--mail-subject-template)\n    pub mail_subject_template: String,\n    /// Initial host from crawled URL (for subject/body interpolation)\n    pub initial_host: Option<String>,\n    /// HTML report content to attach (set before export)\n    pub html_report_content: Option<String>,\n}\n\nimpl MailerExporter {\n    #[allow(clippy::too_many_arguments)]\n    pub fn new(\n        mail_to: Vec<String>,\n        mail_from: String,\n        mail_from_name: String,\n        mail_smtp_host: String,\n        mail_smtp_port: u16,\n        mail_smtp_user: Option<String>,\n        mail_smtp_pass: Option<String>,\n        mail_subject_template: String,\n        initial_host: Option<String>,\n    ) -> Self {\n        Self {\n            mail_to,\n            mail_from,\n            mail_from_name,\n            mail_smtp_host,\n            mail_smtp_port,\n            mail_smtp_user,\n            mail_smtp_pass,\n            mail_subject_template,\n            initial_host,\n            html_report_content: None,\n        }\n    }\n\n    /// Set the HTML report content to be attached to the email.\n    pub fn set_html_report_content(&mut self, content: String) {\n        self.html_report_content = Some(content);\n    }\n\n    /// Build the email body HTML.\n    fn get_email_body(&self, host: &str) -> String {\n        let version_code = version::CODE;\n\n        format!(\n            r#\"Hello,<br>\n<br>\nWe are pleased to deliver the attached report detailing a thorough crawling and analysis of your website, <b>{host}</b>. Our advanced website crawler has identified key areas that require your attention, including found redirects, 404 error pages, and potential issues in accessibility, best practices, performance, and security.<br>\n<br>\nThe report is in HTML format and for full functionality, it should be opened in a JavaScript-enabled browser. This will allow you to access advanced features such as searching and sorting data within tables. Some mobile email clients may not support all interactive elements.<br>\n<br>\nIn case you have any suggestions for improvements and other useful features, feel free to send them as Feature requests to <a href=\"https://github.com/janreges/siteone-crawler/issues/\">our project's GitHub</a>.<br>\n<br>\nBest regards,<br>\n<br>\n<a href=\"https://crawler.siteone.io/?utm_source=siteone_crawler&utm_medium=email-report&utm_campaign=crawler_report&utm_content=v{version_code}\">SiteOne Crawler</a> Team\"#,\n            host = host,\n            version_code = version_code,\n        )\n    }\n\n    /// Add inline styles to the email body for better email client rendering.\n    fn style_html_body_for_email(&self, html: &str) -> String {\n        let styled_body = r#\"<body style=\"font-family: Arial, Helvetica, sans-serif;\">\n<style>\ntable {\n    border-collapse: collapse;\n}\nbody table, body table th, body table td {\n    border: 1px solid #555555;\n    padding: 3px !important;\n    vertical-align: top;\n    text-align: left;\n}\n</style>\n\"#;\n        html.replace(\"<body>\", styled_body)\n    }\n\n    /// Build the email subject from the template.\n    /// Replaces %domain%, %date%, %datetime% placeholders.\n    fn build_subject(&self) -> String {\n        let host = self.initial_host.as_deref().unwrap_or(\"unknown\");\n        let now = chrono::Local::now();\n        let date = now.format(\"%Y-%m-%d\").to_string();\n        let datetime = now.format(\"%Y-%m-%d %H:%M\").to_string();\n\n        self.mail_subject_template\n            .replace(\"%domain%\", host)\n            .replace(\"%date%\", &date)\n            .replace(\"%datetime%\", &datetime)\n    }\n\n    /// Resolve the sender address.\n    /// Replaces @your-hostname.com with @<actual-hostname>.\n    fn resolve_mail_from(&self) -> String {\n        let hostname = gethostname::gethostname().to_string_lossy().to_string();\n        self.mail_from.replace(\"@your-hostname.com\", &format!(\"@{}\", hostname))\n    }\n\n    /// Send the email via SMTP using the lettre crate.\n    fn send_email(\n        &self,\n        email_body_html: &str,\n        attachment_filename: Option<&str>,\n        attachment_content: Option<&str>,\n    ) -> CrawlerResult<()> {\n        let from_addr = self.resolve_mail_from();\n        let subject = self.build_subject();\n\n        // Build the message for the first recipient, then iterate\n        if self.mail_to.is_empty() {\n            return Err(CrawlerError::Mail(\"No recipients specified for email\".to_string()));\n        }\n\n        // Parse from address\n        let from_mailbox: lettre::message::Mailbox = format!(\"{} <{}>\", self.mail_from_name, from_addr)\n            .parse()\n            .map_err(|e| CrawlerError::Mail(format!(\"Invalid sender address '{}': {}\", from_addr, e)))?;\n\n        // Build email body with optional attachment\n        let styled_body = self.style_html_body_for_email(email_body_html);\n        let email_body = SinglePart::builder().header(ContentType::TEXT_HTML).body(styled_body);\n\n        let multipart = if let (Some(filename), Some(content)) = (attachment_filename, attachment_content) {\n            let attachment = Attachment::new(filename.to_string()).body(\n                content.as_bytes().to_vec(),\n                \"application/octet-stream\"\n                    .parse()\n                    .map_err(|_| CrawlerError::Mail(\"Failed to parse MIME type for attachment\".to_string()))?,\n            );\n            MultiPart::mixed().singlepart(email_body).singlepart(attachment)\n        } else {\n            MultiPart::mixed().singlepart(email_body)\n        };\n\n        // Send to each recipient\n        for recipient in &self.mail_to {\n            let to_mailbox = recipient\n                .parse()\n                .map_err(|e| CrawlerError::Mail(format!(\"Invalid recipient address '{}': {}\", recipient, e)))?;\n\n            let email = Message::builder()\n                .from(from_mailbox.clone())\n                .to(to_mailbox)\n                .subject(&subject)\n                .multipart(multipart.clone())\n                .map_err(|e| CrawlerError::Mail(format!(\"Failed to build email message: {}\", e)))?;\n\n            // Build SMTP transport\n            let mut smtp_builder = if self.mail_smtp_port == 465 {\n                // Port 465 = implicit TLS\n                SmtpTransport::relay(&self.mail_smtp_host)\n                    .map_err(|e| {\n                        CrawlerError::Mail(format!(\n                            \"Failed to connect to SMTP server '{}:{}': {}\",\n                            self.mail_smtp_host, self.mail_smtp_port, e\n                        ))\n                    })?\n                    .port(self.mail_smtp_port)\n            } else if self.mail_smtp_port == 587 {\n                // Port 587 = STARTTLS\n                SmtpTransport::starttls_relay(&self.mail_smtp_host)\n                    .map_err(|e| {\n                        CrawlerError::Mail(format!(\n                            \"Failed to connect to SMTP server '{}:{}': {}\",\n                            self.mail_smtp_host, self.mail_smtp_port, e\n                        ))\n                    })?\n                    .port(self.mail_smtp_port)\n            } else {\n                // Other ports (25, etc) = no encryption by default\n                SmtpTransport::builder_dangerous(&self.mail_smtp_host).port(self.mail_smtp_port)\n            };\n\n            // Add credentials if provided\n            if let (Some(user), Some(pass)) = (&self.mail_smtp_user, &self.mail_smtp_pass) {\n                smtp_builder = smtp_builder.credentials(Credentials::new(user.clone(), pass.clone()));\n            }\n\n            let mailer = smtp_builder.build();\n\n            mailer\n                .send(&email)\n                .map_err(|e| CrawlerError::Mail(format!(\"Failed to send email to '{}': {}\", recipient, e)))?;\n        }\n\n        Ok(())\n    }\n}\n\nimpl Exporter for MailerExporter {\n    fn get_name(&self) -> &str {\n        \"MailerExporter\"\n    }\n\n    fn should_be_activated(&self) -> bool {\n        !self.mail_to.is_empty()\n    }\n\n    fn export(&mut self, status: &Status, _output: &dyn Output) -> CrawlerResult<()> {\n        // Do not send emails if crawler was interrupted\n        if is_crawler_interrupted() {\n            return Ok(());\n        }\n\n        let host = self.initial_host.as_deref().unwrap_or(\"unknown\");\n        let datetime = chrono::Local::now().format(\"%Y%m%d%H%M%S\").to_string();\n        let email_body = self.get_email_body(host);\n        let attachment_filename = format!(\"report-{}-{}.html\", host, datetime);\n\n        let html_report = match &self.html_report_content {\n            Some(c) => c.clone(),\n            None => {\n                return Err(CrawlerError::Export(\n                    \"HTML report content not available. Set it via set_html_report_content() before export.\"\n                        .to_string(),\n                ));\n            }\n        };\n\n        match self.send_email(&email_body, Some(&attachment_filename), Some(&html_report)) {\n            Ok(()) => {\n                let recipients = self.mail_to.join(\", \");\n                status.add_info_to_summary(\n                    \"mail-report-sent\",\n                    &format!(\n                        \"HTML report sent to {} using {}:{}\",\n                        recipients, self.mail_smtp_host, self.mail_smtp_port\n                    ),\n                );\n            }\n            Err(e) => {\n                status.add_critical_to_summary(\"mail-report-failed\", &format!(\"Failed to send email report: {}\", e));\n            }\n        }\n\n        Ok(())\n    }\n}\n"
  },
  {
    "path": "src/export/markdown_exporter.rs",
    "content": "// SiteOne Crawler - MarkdownExporter\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Converts crawled HTML pages to Markdown format.\n\nuse std::collections::HashMap;\nuse std::fs;\nuse std::path::Path;\nuse std::sync::{Arc, Mutex};\nuse std::time::Instant;\n\nuse regex::Regex;\n\nuse crate::content_processor::manager::ContentProcessorManager;\nuse crate::engine::parsed_url::ParsedUrl;\nuse crate::error::{CrawlerError, CrawlerResult};\nuse crate::export::exporter::Exporter;\nuse crate::export::utils::html_to_markdown::HtmlToMarkdownConverter;\nuse crate::export::utils::markdown_site_aggregator::MarkdownSiteAggregator;\nuse crate::export::utils::offline_url_converter::OfflineUrlConverter;\nuse crate::export::utils::target_domain_relation::TargetDomainRelation;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::result::visited_url::{SOURCE_A_HREF, SOURCE_IMG_SRC, VisitedUrl};\nuse crate::types::ContentTypeId;\nuse crate::utils;\n\n/// Content types that require URL rewriting\nconst CONTENT_TYPES_REQUIRING_CHANGES: &[ContentTypeId] = &[ContentTypeId::Html, ContentTypeId::Redirect];\n\n/// Exports crawled HTML pages as Markdown files.\n/// Supports single-file combination, selector exclusion, content replacement.\npub struct MarkdownExporter {\n    markdown_export_directory: Option<String>,\n    markdown_export_single_file: Option<String>,\n    markdown_disable_images: bool,\n    markdown_disable_files: bool,\n    markdown_remove_links_and_images_from_single_file: bool,\n    markdown_exclude_selector: Vec<String>,\n    markdown_export_store_only_url_regex: Vec<String>,\n    markdown_ignore_store_file_error: bool,\n    markdown_replace_content: Vec<String>,\n    markdown_replace_query_string: Vec<String>,\n    markdown_move_content_before_h1_to_end: bool,\n    initial_parsed_url: Option<ParsedUrl>,\n    ignore_regexes: Vec<String>,\n    initial_url: String,\n    content_processor_manager: Option<Arc<Mutex<ContentProcessorManager>>>,\n    /// Maps URL -> relative file path for successfully exported files\n    exported_file_paths: HashMap<String, String>,\n}\n\nimpl Default for MarkdownExporter {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl MarkdownExporter {\n    pub fn new() -> Self {\n        Self {\n            markdown_export_directory: None,\n            markdown_export_single_file: None,\n            markdown_disable_images: false,\n            markdown_disable_files: false,\n            markdown_remove_links_and_images_from_single_file: false,\n            markdown_exclude_selector: Vec::new(),\n            markdown_export_store_only_url_regex: Vec::new(),\n            markdown_ignore_store_file_error: false,\n            markdown_replace_content: Vec::new(),\n            markdown_replace_query_string: Vec::new(),\n            markdown_move_content_before_h1_to_end: false,\n            initial_parsed_url: None,\n            ignore_regexes: Vec::new(),\n            initial_url: String::new(),\n            content_processor_manager: None,\n            exported_file_paths: HashMap::new(),\n        }\n    }\n\n    pub fn set_markdown_export_directory(&mut self, dir: Option<String>) {\n        self.markdown_export_directory = dir.map(|d| d.trim_end_matches('/').to_string());\n    }\n\n    pub fn set_markdown_export_single_file(&mut self, file: Option<String>) {\n        self.markdown_export_single_file = file;\n    }\n\n    pub fn set_markdown_disable_images(&mut self, disable: bool) {\n        self.markdown_disable_images = disable;\n    }\n\n    pub fn set_markdown_disable_files(&mut self, disable: bool) {\n        self.markdown_disable_files = disable;\n    }\n\n    pub fn set_markdown_remove_links_and_images_from_single_file(&mut self, remove: bool) {\n        self.markdown_remove_links_and_images_from_single_file = remove;\n    }\n\n    pub fn set_markdown_exclude_selector(&mut self, selectors: Vec<String>) {\n        self.markdown_exclude_selector = selectors;\n    }\n\n    pub fn set_markdown_export_store_only_url_regex(&mut self, regexes: Vec<String>) {\n        self.markdown_export_store_only_url_regex = regexes;\n    }\n\n    pub fn set_markdown_ignore_store_file_error(&mut self, ignore: bool) {\n        self.markdown_ignore_store_file_error = ignore;\n    }\n\n    pub fn set_markdown_replace_content(&mut self, replacements: Vec<String>) {\n        self.markdown_replace_content = replacements;\n    }\n\n    pub fn set_markdown_replace_query_string(&mut self, replacements: Vec<String>) {\n        self.markdown_replace_query_string = replacements;\n    }\n\n    pub fn set_markdown_move_content_before_h1_to_end(&mut self, move_content: bool) {\n        self.markdown_move_content_before_h1_to_end = move_content;\n    }\n\n    pub fn set_initial_parsed_url(&mut self, url: ParsedUrl) {\n        self.initial_parsed_url = Some(url);\n    }\n\n    pub fn set_ignore_regexes(&mut self, regexes: Vec<String>) {\n        self.ignore_regexes = regexes;\n    }\n\n    pub fn set_initial_url(&mut self, url: String) {\n        self.initial_url = url;\n    }\n\n    pub fn set_content_processor_manager(&mut self, cpm: Arc<Mutex<ContentProcessorManager>>) {\n        self.content_processor_manager = Some(cpm);\n    }\n\n    /// Get the mapping of URL -> relative file path for all successfully exported files.\n    pub fn get_exported_file_paths(&self) -> &HashMap<String, String> {\n        &self.exported_file_paths\n    }\n\n    /// Store a file to the markdown export directory.\n    fn store_file(&mut self, visited_url: &VisitedUrl, status: &Status) -> CrawlerResult<()> {\n        let export_dir = self\n            .markdown_export_directory\n            .as_ref()\n            .ok_or_else(|| CrawlerError::Export(\"Markdown export directory not set\".to_string()))?;\n\n        let body_bytes = status.get_url_body(&visited_url.uq_id).unwrap_or_default();\n\n        // For content types requiring URL rewriting (HTML, CSS, JS), work with text.\n        // For binary content (images, fonts), keep raw bytes to avoid UTF-8 corruption.\n        let final_bytes =\n            if !body_bytes.is_empty() && CONTENT_TYPES_REQUIRING_CHANGES.contains(&visited_url.content_type) {\n                let mut content = String::from_utf8_lossy(&body_bytes).into_owned();\n\n                // Apply content changes for offline version through content processors\n                if let Some(ref cpm) = self.content_processor_manager {\n                    let parsed_url = ParsedUrl::parse(&visited_url.url, None);\n                    if let Ok(mut manager) = cpm.lock() {\n                        manager.apply_content_changes_for_offline_version(\n                            &mut content,\n                            visited_url.content_type,\n                            &parsed_url,\n                            true,\n                        );\n                    }\n                }\n\n                // Apply custom content replacements\n                if !content.is_empty() && !self.markdown_replace_content.is_empty() {\n                    for replace in &self.markdown_replace_content {\n                        let parts: Vec<&str> = replace.splitn(2, \"->\").collect();\n                        let replace_from = parts[0].trim();\n                        let replace_to = if parts.len() > 1 { parts[1].trim() } else { \"\" };\n\n                        let is_regex = crate::utils::is_regex_pattern(replace_from);\n\n                        if is_regex {\n                            if let Some(pattern) = extract_regex_pattern(replace_from)\n                                && let Ok(re) = Regex::new(&pattern)\n                            {\n                                content = re.replace_all(&content, replace_to).to_string();\n                            }\n                        } else {\n                            content = content.replace(replace_from, replace_to);\n                        }\n                    }\n                }\n\n                content.into_bytes()\n            } else {\n                body_bytes\n            };\n\n        // Build store file path\n        let relative_path = self.get_relative_file_path_for_file_by_url(visited_url, status);\n        let sanitized_path = OfflineUrlConverter::sanitize_file_path(&relative_path, false);\n        // Path traversal protection: strip \"../\" sequences from sanitized path\n        let sanitized_path = sanitized_path.replace(\"../\", \"\").replace(\"..\\\\\", \"\");\n        let store_file_path = format!(\"{}/{}\", export_dir, sanitized_path);\n\n        // Create directory structure\n        let dir_path = Path::new(&store_file_path).parent().ok_or_else(|| {\n            CrawlerError::Export(format!(\"Cannot determine parent directory for '{}'\", store_file_path))\n        })?;\n\n        if !dir_path.exists() {\n            fs::create_dir_all(dir_path).map_err(|e| {\n                CrawlerError::Export(format!(\"Cannot create directory '{}': {}\", dir_path.display(), e))\n            })?;\n        }\n\n        // Check if we should overwrite\n        if Path::new(&store_file_path).exists()\n            && let Some(ref initial_url) = self.initial_parsed_url\n            && !visited_url.is_https()\n            && initial_url.is_https()\n        {\n            let message = format!(\n                \"File '{}' already exists and will not be overwritten because initial request was HTTPS and this request is HTTP: {}\",\n                store_file_path, visited_url.url\n            );\n            status.add_notice_to_summary(\"markdown-exporter-store-file-ignored\", &message);\n            return Ok(());\n        }\n\n        // Write the content (raw bytes to preserve binary data for images/fonts)\n        if let Err(e) = fs::write(&store_file_path, &final_bytes) {\n            let has_extension = Regex::new(r\"(?i)\\.[a-z0-9\\-]{1,15}$\")\n                .map(|re| re.is_match(&store_file_path))\n                .unwrap_or(false);\n\n            if has_extension && !self.markdown_ignore_store_file_error {\n                return Err(CrawlerError::Export(format!(\n                    \"Cannot store file '{}': {}\",\n                    store_file_path, e\n                )));\n            } else {\n                let message = format!(\n                    \"Cannot store file '{}' (undefined extension). Original URL: {}\",\n                    store_file_path, visited_url.url\n                );\n                status.add_notice_to_summary(\"markdown-exporter-store-file-error\", &message);\n                return Ok(());\n            }\n        }\n\n        // Convert HTML to Markdown\n        if store_file_path.ends_with(\".html\") {\n            let md_file_path = format!(\"{}md\", &store_file_path[..store_file_path.len() - 4]);\n\n            let html_content = fs::read_to_string(&store_file_path).unwrap_or_default();\n            let converter = HtmlToMarkdownConverter::new(&html_content, self.markdown_exclude_selector.clone());\n            let markdown = converter.get_markdown();\n\n            if let Err(_e) = fs::write(&md_file_path, &markdown) {\n                let message = format!(\n                    \"Cannot convert HTML file to Markdown file '{}'. Original URL: {}\",\n                    md_file_path, visited_url.url\n                );\n                status.add_notice_to_summary(\"markdown-exporter-store-file-error\", &message);\n                return Ok(());\n            }\n\n            // Remove the HTML file\n            let _ = fs::remove_file(&store_file_path);\n\n            if !Path::new(&md_file_path).exists() {\n                let message = format!(\n                    \"Cannot convert HTML file to Markdown file '{}'. Original URL: {}\",\n                    md_file_path, visited_url.url\n                );\n                status.add_notice_to_summary(\"markdown-exporter-store-file-error\", &message);\n                return Ok(());\n            }\n\n            // Normalize the markdown file\n            self.normalize_markdown_file(&md_file_path);\n        }\n\n        // Record the mapping — for HTML files, use the .md path\n        let final_relative_path = if sanitized_path.ends_with(\".html\") {\n            format!(\"{}md\", &sanitized_path[..sanitized_path.len() - 4])\n        } else {\n            sanitized_path.clone()\n        };\n        self.exported_file_paths\n            .insert(visited_url.url.clone(), final_relative_path);\n\n        Ok(())\n    }\n\n    /// Normalize a markdown file after conversion from HTML.\n    fn normalize_markdown_file(&self, md_file_path: &str) {\n        let md_content = match fs::read_to_string(md_file_path) {\n            Ok(content) => content,\n            Err(_) => return,\n        };\n\n        let normalized = self.normalize_markdown_content(&md_content, true);\n        let _ = fs::write(md_file_path, &normalized);\n    }\n\n    /// Normalize markdown content after conversion from HTML.\n    /// When `replace_html_links_with_md` is true, `.html` extensions in links are replaced with `.md`.\n    /// When false (standalone conversion mode), links are left as-is.\n    pub fn normalize_markdown_content(&self, content: &str, replace_html_links_with_md: bool) -> String {\n        let mut md_content = content.to_string();\n\n        // Replace .html with .md in links (only when exporting a full site)\n        if replace_html_links_with_md && let Ok(link_re) = Regex::new(r\"\\[([^\\]]*)\\]\\(([^)]+)\\)\") {\n            let ignore_regexes = &self.ignore_regexes;\n            md_content = link_re\n                .replace_all(&md_content, |caps: &regex::Captures| {\n                    let link_text = caps.get(1).map_or(\"\", |m| m.as_str());\n                    let url = caps.get(2).map_or(\"\", |m| m.as_str());\n\n                    // Check if URL matches any ignore pattern\n                    for ignore_regex in ignore_regexes {\n                        if let Ok(re) = Regex::new(ignore_regex)\n                            && re.is_match(url)\n                        {\n                            return format!(\"[{}]({})\", link_text, url);\n                        }\n                    }\n\n                    // Replace .html with .md\n                    let new_url = url.replace(\".html\", \".md\").replace(\".html#\", \".md#\");\n                    format!(\"[{}]({})\", link_text, new_url)\n                })\n                .to_string();\n        }\n\n        // Disable images if configured\n        if self.markdown_disable_images {\n            // Replace image in anchor text\n            if let Ok(re) = Regex::new(r\"\\[!\\[[^\\]]*\\]\\([^\\)]*\\)\\]\\([^\\)]*\\)\") {\n                md_content = re.replace_all(&md_content, \"\").to_string();\n            }\n            // Replace standard images\n            if let Ok(re) = Regex::new(r\"!\\[.*?\\]\\(.*?\\)\") {\n                md_content = re.replace_all(&md_content, \"\").to_string();\n            }\n            // Normalize leading whitespace inside link text: [ text](url) → [text](url)\n            if let Ok(re) = Regex::new(r\"\\[\\s+([^\\]]+)\\]\\(\") {\n                md_content = re.replace_all(&md_content, \"[$1](\").to_string();\n            }\n        }\n\n        // Disable files if configured\n        if self.markdown_disable_files\n            && let Ok(re) = Regex::new(r\"(?i)\\[([^\\]]+)\\]\\(([^)]+)\\)\")\n        {\n            let ignore_regexes = self.ignore_regexes.clone();\n            md_content = re\n                .replace_all(&md_content, |caps: &regex::Captures| {\n                    let url = caps.get(2).map_or(\"\", |m| m.as_str());\n\n                    // Skip http(s), tel:, mailto: and other protocol URLs\n                    if url.starts_with(\"http://\")\n                        || url.starts_with(\"https://\")\n                        || url.starts_with(\"tel:\")\n                        || url.starts_with(\"mailto:\")\n                    {\n                        return caps[0].to_string();\n                    }\n\n                    let full_url = url.to_string();\n                    let ext = url.rsplit('.').next().unwrap_or(\"\").to_lowercase();\n\n                    // Check ignore patterns\n                    for ignore_regex in &ignore_regexes {\n                        if let Ok(re) = Regex::new(ignore_regex)\n                            && re.is_match(&full_url)\n                        {\n                            return caps[0].to_string();\n                        }\n                    }\n\n                    // Keep page links and images (disable-files targets downloadable documents)\n                    if [\"md\", \"html\", \"htm\", \"jpg\", \"png\", \"gif\", \"webp\", \"avif\"].contains(&ext.as_str()) {\n                        return caps[0].to_string();\n                    }\n\n                    String::new()\n                })\n                .to_string();\n\n            md_content = md_content.replace(\"  \", \" \");\n        }\n\n        // Remove empty links\n        if let Ok(re) = Regex::new(r\"\\[[^\\]]*\\]\\(\\)\") {\n            md_content = re.replace_all(&md_content, \"\").to_string();\n        }\n\n        // Remove empty list items (e.g. after disabling images and files)\n        if let Ok(re) = Regex::new(r\"(?m)^\\s*[-*+]\\s*$\\n?\") {\n            md_content = re.replace_all(&md_content, \"\").to_string();\n        }\n\n        // Remove links where text is a bare filename (fallback from removed media like <video>)\n        // e.g. [some-page.html](some-page.md) — real link text never looks like a raw filename\n        if let Ok(re) = Regex::new(r\"(?m)^\\s*\\[([^\\]\\s]+\\.html?)\\]\\([^\\)]+\\)\\s*$\\n?\") {\n            md_content = re.replace_all(&md_content, \"\").to_string();\n        }\n\n        // Remove table rows where all cells are empty (e.g. after content removal)\n        if let Ok(re) = Regex::new(r\"(?m)^\\|\\s*(\\|\\s*)+$\\n?\") {\n            md_content = re.replace_all(&md_content, \"\").to_string();\n        }\n\n        // Remove empty lines in code blocks\n        md_content = md_content.replace(\"\\\\\\n\\n  -\", \"\\\\\\n  -\");\n\n        // Remove empty lines at beginning of code blocks\n        if let Ok(re) = Regex::new(r\"```\\n{2,}\") {\n            md_content = re.replace_all(&md_content, \"```\\n\").to_string();\n        }\n\n        // Apply additional fixes\n        md_content = self.remove_empty_lines_in_lists(&md_content);\n        md_content = self.move_content_before_main_heading_to_end(&md_content);\n        md_content = self.fix_multiline_images(&md_content);\n        md_content = self.detect_and_set_code_language(&md_content);\n\n        // Add backticks around --param inside tables\n        if let Ok(re) = Regex::new(r\"(?i)\\| -{1,2}([a-z0-9][a-z0-9-]*) \\|\") {\n            md_content = re.replace_all(&md_content, \"| `--$1` |\").to_string();\n        }\n\n        // Remove 3+ empty lines to 2 empty lines\n        if let Ok(re) = Regex::new(r\"\\n{3,}\") {\n            md_content = re.replace_all(&md_content, \"\\n\\n\").to_string();\n        }\n\n        // Trim special chars (only whitespace from start, all special chars from end\n        // to preserve markdown-significant characters like # headings and - lists at the start)\n        md_content = md_content\n            .trim_start_matches(['\\n', '\\t', ' '])\n            .trim_end_matches(['\\n', '\\t', ' ', '-', '#', '*'])\n            .to_string();\n\n        // Fix excessive whitespace\n        md_content = self.remove_excessive_whitespace(&md_content);\n\n        // Collapse large link lists into accordions (must run after all list normalization)\n        md_content = HtmlToMarkdownConverter::collapse_large_link_lists(&md_content);\n\n        md_content\n    }\n\n    /// Remove excessive whitespace from markdown content.\n    fn remove_excessive_whitespace(&self, md: &str) -> String {\n        let lines: Vec<&str> = md.split('\\n').collect();\n        let mut result: Vec<String> = Vec::new();\n        let mut in_code_block = false;\n        let mut last_line_was_empty = false;\n\n        let code_block_re = Regex::new(r\"^```\").ok();\n        let list_item_re = Regex::new(r\"^(\\s*)([-*+]|\\d+\\.)\\s\").ok();\n        let table_row_re = Regex::new(r\"^\\s*\\|.*\\|\\s*$\").ok();\n        let heading_re = Regex::new(r\"^#+\\s+\").ok();\n        let whitespace_re = Regex::new(r\"\\s+\").ok();\n\n        for line in &lines {\n            if code_block_re.as_ref().map(|re| re.is_match(line)).unwrap_or(false) {\n                in_code_block = !in_code_block;\n                result.push(line.to_string());\n                last_line_was_empty = false;\n                continue;\n            }\n\n            if in_code_block {\n                result.push(line.to_string());\n                last_line_was_empty = false;\n                continue;\n            }\n\n            let is_list_item = list_item_re.as_ref().map(|re| re.is_match(line)).unwrap_or(false);\n            let is_table_row = table_row_re.as_ref().map(|re| re.is_match(line)).unwrap_or(false);\n            let is_heading = heading_re.as_ref().map(|re| re.is_match(line)).unwrap_or(false);\n\n            if line.trim().is_empty() {\n                if !last_line_was_empty {\n                    result.push(String::new());\n                    last_line_was_empty = true;\n                }\n                continue;\n            }\n\n            if is_list_item || is_table_row || is_heading {\n                result.push(line.to_string());\n            } else {\n                let trimmed = whitespace_re\n                    .as_ref()\n                    .map(|re| re.replace_all(line.trim(), \" \").to_string())\n                    .unwrap_or_else(|| line.trim().to_string());\n                if !trimmed.is_empty() {\n                    result.push(trimmed);\n                }\n            }\n            last_line_was_empty = false;\n        }\n\n        let mut content = result.join(\"\\n\");\n\n        // Remove spaces at the end of lines\n        if let Ok(re) = Regex::new(r\"(?m)[ \\t]+$\") {\n            content = re.replace_all(&content, \"\").to_string();\n        }\n\n        content\n    }\n\n    /// Remove empty lines between list items.\n    fn remove_empty_lines_in_lists(&self, md: &str) -> String {\n        let lines: Vec<&str> = md.split('\\n').collect();\n        let mut result: Vec<String> = Vec::new();\n        let mut in_list = false;\n        let mut last_line_empty = false;\n        let mut last_indent_level: i32 = 0;\n\n        let list_re = Regex::new(r\"^[ ]{0,3}[-*+][ ]|^[ ]{0,3}\\d+\\.[ ]|^[ ]{2,}[-*+][ ]\").ok();\n\n        for line in &lines {\n            let trimmed_line = line.trim();\n            let is_empty = trimmed_line.is_empty();\n\n            let is_list_item = list_re.as_ref().map(|re| re.is_match(line)).unwrap_or(false);\n\n            if is_list_item {\n                in_list = true;\n\n                if last_line_empty {\n                    let leading_spaces: i32 = line.len() as i32 - line.trim_start().len() as i32;\n\n                    if (leading_spaces - last_indent_level).abs() > 2 {\n                        // Different nesting level, keep empty line\n                    } else {\n                        // Same nesting level, remove empty line\n                        result.pop();\n                    }\n                }\n\n                result.push(line.to_string());\n                last_line_empty = false;\n                let leading_spaces = line.len() as i32 - line.trim_start().len() as i32;\n                last_indent_level = leading_spaces;\n            } else if is_empty {\n                result.push(line.to_string());\n                last_line_empty = true;\n            } else {\n                let leading_spaces = line.len() as i32 - line.trim_start().len() as i32;\n                if in_list && leading_spaces < last_indent_level {\n                    in_list = false;\n                }\n                result.push(line.to_string());\n                last_line_empty = false;\n                last_indent_level = leading_spaces;\n            }\n        }\n\n        result.join(\"\\n\")\n    }\n\n    /// Move content before the main heading to the end.\n    fn move_content_before_main_heading_to_end(&self, md: &str) -> String {\n        if !self.markdown_move_content_before_h1_to_end {\n            return md.to_string();\n        }\n\n        let mut headings: Vec<(usize, usize)> = Vec::new(); // (offset, level)\n\n        // ATX headings\n        if let Ok(re) = Regex::new(r\"(?m)^(#{1,6})\\s.*$\") {\n            for mat in re.find_iter(md) {\n                let level = mat.as_str().chars().take_while(|c| *c == '#').count();\n                headings.push((mat.start(), level));\n            }\n        }\n\n        // Setext headings\n        if let Ok(re) = Regex::new(r\"(?m)^(.+?)\\n(=+|-+)\\s*$\") {\n            for caps in re.captures_iter(md) {\n                if let (Some(text_match), Some(underline_match)) = (caps.get(1), caps.get(2)) {\n                    if text_match.as_str().trim().is_empty() {\n                        continue;\n                    }\n                    let underline = underline_match.as_str();\n                    let level = if underline.starts_with('=') { 1 } else { 2 };\n                    headings.push((text_match.start(), level));\n                }\n            }\n        }\n\n        if headings.is_empty() {\n            return md.to_string();\n        }\n\n        // Find the highest level (lowest number)\n        let min_level = headings.iter().map(|(_, level)| *level).min().unwrap_or(6);\n\n        // Find first heading with that level\n        let main_heading = headings\n            .iter()\n            .filter(|(_, level)| *level == min_level)\n            .min_by_key(|(offset, _)| *offset);\n\n        if let Some((heading_pos, _)) = main_heading {\n            let content_before = &md[..*heading_pos];\n            let content_after = &md[*heading_pos..];\n\n            if content_before.trim().is_empty() {\n                return md.to_string();\n            }\n\n            format!(\"{}\\n\\n---\\n\\n{}\", content_after.trim(), content_before.trim())\n        } else {\n            md.to_string()\n        }\n    }\n\n    /// Fix multi-line images and links.\n    fn fix_multiline_images(&self, md: &str) -> String {\n        md.replace(\"[\\n![\", \"[![\").replace(\")\\n](\", \")](\")\n    }\n\n    /// Detect and set code language for unlabeled code blocks.\n    fn detect_and_set_code_language(&self, md: &str) -> String {\n        let code_block_re = match Regex::new(r\"(?s)```\\s*\\n((?:[^`]|`[^`]|``[^`])*?)\\n```\") {\n            Ok(re) => re,\n            Err(_) => return md.to_string(),\n        };\n\n        code_block_re\n            .replace_all(md, |caps: &regex::Captures| {\n                let code = caps.get(1).map_or(\"\", |m| m.as_str());\n                let detected = self.detect_language(code);\n                format!(\"```{}\\n{}\\n```\", detected, code)\n            })\n            .to_string()\n    }\n\n    /// Detect programming language from code content.\n    fn detect_language(&self, code: &str) -> String {\n        let patterns: Vec<(&str, Vec<&str>)> = vec![\n            (\n                \"php\",\n                vec![\n                    r\"^<\\?php\",\n                    r\"\\$[a-zA-Z_]\",\n                    r\"\\b(?:public|private|protected)\\s+function\\b\",\n                    r\"\\bnamespace\\s+[a-zA-Z\\\\]+;\",\n                ],\n            ),\n            (\n                \"javascript\",\n                vec![\n                    r\"\\bconst\\s+[a-zA-Z_][a-zA-Z0-9_]*\\s*=\",\n                    r\"\\bfunction\\s*\\([^)]*\\)\\s*\\{\",\n                    r\"\\blet\\s+[a-zA-Z_][a-zA-Z0-9_]*\\s*=\",\n                    r\"\\bconsole\\.log\\(\",\n                    r\"=>\\s*\\{\",\n                ],\n            ),\n            (\n                \"jsx\",\n                vec![\n                    r\"return\\s+\\(\",\n                    r\"import\\s+[a-zA-Z0-9_,\\{\\} ]+\\s+from\",\n                    r\"export\\s+(default|const)\",\n                ],\n            ),\n            (\n                \"typescript\",\n                vec![\n                    r\":\\s*(?:string|number|boolean|any)\\b\",\n                    r\"interface\\s+[A-Z][a-zA-Z0-9_]*\\s*\\{\",\n                    r\"type\\s+[A-Z][a-zA-Z0-9_]*\\s*=\",\n                ],\n            ),\n            (\n                \"python\",\n                vec![\n                    r\"(?m)def\\s+[a-zA-Z_][a-zA-Z0-9_]*\\s*\\([^)]*\\):\\s*$\",\n                    r\"(?m)^from\\s+[a-zA-Z_.]+\\s+import\\b\",\n                    r\"(?m)^if\\s+__name__\\s*==\\s*['\\x22]__main__['\\x22]:\\s*$\",\n                ],\n            ),\n            (\n                \"java\",\n                vec![\n                    r\"public\\s+class\\s+[A-Z][a-zA-Z0-9_]*\",\n                    r\"System\\.out\\.println\\(\",\n                    r\"private\\s+final\\s+\",\n                ],\n            ),\n            (\n                \"rust\",\n                vec![\n                    r\"fn\\s+[a-z_][a-z0-9_]*\\s*\\([^)]*\\)\\s*(?:->\\s*[a-zA-Z<>]+\\s*)?\\{\",\n                    r\"let\\s+mut\\s+\",\n                    r\"impl\\s+[A-Z][a-zA-Z0-9_]*\",\n                ],\n            ),\n            (\n                \"ruby\",\n                vec![\n                    r\"(?m)^require\\s+['\\x22][a-zA-Z0-9_/]+['\\x22]\",\n                    r\"def\\s+[a-z_][a-z0-9_]*\\b\",\n                    r\"\\battr_accessor\\b\",\n                ],\n            ),\n            (\n                \"css\",\n                vec![\n                    r\"(?m)^[.#][a-zA-Z\\-_][^\\{]*\\{\",\n                    r\"\\b(?:margin|padding|border|color|background):\\s*[^;]+;\",\n                    r\"@media\\s+\",\n                ],\n            ),\n            (\n                \"bash\",\n                vec![\n                    r\"^#!/bin/(?:bash|sh)\",\n                    r\"\\$\\([^)]+\\)\",\n                    r\"(?:^|\\s)(?:-{1,2}[a-zA-Z0-9]+)\",\n                    r\"\\becho\\s+\",\n                    r\"\\|\\s*grep\\b\",\n                ],\n            ),\n            (\n                \"go\",\n                vec![\n                    r\"\\bfunc\\s+[a-zA-Z_][a-zA-Z0-9_]*\\s*\\([^)]*\\)\",\n                    r\"\\btype\\s+[A-Z][a-zA-Z0-9_]*\\s+struct\\b\",\n                    r\"\\bpackage\\s+[a-z][a-z0-9_]*\\b\",\n                    r\"\\bif\\s+err\\s*!=\\s*nil\\b\",\n                ],\n            ),\n            (\n                \"csharp\",\n                vec![\n                    r\"\\bnamespace\\s+[A-Za-z.]+\\b\",\n                    r\"\\bpublic\\s+(?:class|interface|enum)\\b\",\n                    r\"\\busing\\s+[A-Za-z.]+;\",\n                    r\"\\basync\\s+Task<\",\n                ],\n            ),\n            (\n                \"kotlin\",\n                vec![\n                    r\"\\bfun\\s+[a-zA-Z_][a-zA-Z0-9_]*\\s*\\(\",\n                    r\"\\bval\\s+[a-zA-Z_][a-zA-Z0-9_]*:\",\n                    r\"\\bvar\\s+[a-zA-Z_][a-zA-Z0-9_]*:\",\n                    r\"\\bdata\\s+class\\b\",\n                ],\n            ),\n            (\n                \"swift\",\n                vec![\n                    r\"\\bfunc\\s+[a-zA-Z_][a-zA-Z0-9_]*\\s*\\(\",\n                    r\"\\bvar\\s+[a-zA-Z_][a-zA-Z0-9_]*:\\s*[A-Z]\",\n                    r\"\\blet\\s+[a-zA-Z_][a-zA-Z0-9_]*:\",\n                    r\"\\bclass\\s+[A-Z][A-Za-z0-9_]*:\",\n                ],\n            ),\n            (\n                \"cpp\",\n                vec![\n                    r\"\\b(?:class|struct)\\s+[A-Z][a-zA-Z0-9_]*\\b\",\n                    r\"\\bstd::[a-z0-9_]+\",\n                    r\"\\b#include\\s+[<\\x22][a-z0-9_.]+[>\\x22]\",\n                    r\"\\btemplate\\s*<[^>]+>\",\n                ],\n            ),\n            (\n                \"scala\",\n                vec![\n                    r\"\\bdef\\s+[a-z][a-zA-Z0-9_]*\\s*\\(\",\n                    r\"\\bcase\\s+class\\b\",\n                    r\"\\bobject\\s+[A-Z][a-zA-Z0-9_]*\\b\",\n                    r\"\\bval\\s+[a-z][a-zA-Z0-9_]*\\s*=\",\n                ],\n            ),\n            (\n                \"perl\",\n                vec![\n                    r\"\\buse\\s+[A-Z][A-Za-z:]+;\",\n                    r\"\\bsub\\s+[a-z_][a-z0-9_]*\\s*\\{\",\n                    r\"@[a-zA-Z_][a-zA-Z0-9_]*\",\n                ],\n            ),\n            (\n                \"lua\",\n                vec![\n                    r\"\\bfunction\\s+[a-z_][a-z0-9_]*\\s*\\(\",\n                    r\"\\blocal\\s+[a-z_][a-z0-9_]*\\s*=\",\n                    r\"\\brequire\\s*\\(?['\\x22][^'\\x22]+['\\x22]\\)?\",\n                ],\n            ),\n            (\n                \"vb\",\n                vec![\n                    r\"\\bPublic\\s+(?:Class|Interface|Module)\\b\",\n                    r\"\\bPrivate\\s+Sub\\s+[A-Za-z_][A-Za-z0-9_]*\\(\",\n                    r\"\\bDim\\s+[A-Za-z_][A-Za-z0-9_]*\\s+As\\b\",\n                    r\"\\bEnd\\s+(?:Sub|Function|Class|If|While)\\b\",\n                ],\n            ),\n            (\n                \"fsharp\",\n                vec![\n                    r\"\\blet\\s+[a-z_][a-zA-Z0-9_]*\\s*=\",\n                    r\"\\bmodule\\s+[A-Z][A-Za-z0-9_]*\\s*=\",\n                    r\"\\btype\\s+[A-Z][A-Za-z0-9_]*\\s*=\",\n                    r\"\\bmatch\\s+.*\\bwith\\b\",\n                ],\n            ),\n            (\n                \"powershell\",\n                vec![\n                    r\"\\$[A-Za-z_][A-Za-z0-9_]*\",\n                    r\"\\[Parameter\\(.*?\\)\\]\",\n                    r\"\\bfunction\\s+[A-Z][A-Za-z0-9-]*\",\n                    r\"\\b(?:Get|Set|New|Remove)-[A-Z][A-Za-z]*\",\n                ],\n            ),\n            (\n                \"xaml\",\n                vec![\n                    r\"<Window\\s+[^>]*>\",\n                    r\"<UserControl\\s+[^>]*>\",\n                    r\"xmlns:(?:x|d)=\\x22[^\\x22]+\\x22\",\n                    r\"<(?:Grid|StackPanel|DockPanel)[^>]*>\",\n                ],\n            ),\n            (\n                \"razor\",\n                vec![\n                    r\"@(?:model|using|inject)\",\n                    r\"@Html\\.[A-Za-z]+\\(\",\n                    r\"@\\{.*?\\}\",\n                    r#\"<partial\\s+name=\\x22[^\\x22]+\\x22\\s*/>\"#,\n                ],\n            ),\n            (\n                \"html\",\n                vec![r\"<(html|head|body|h1|a|img|table|tr|td|ul|ol|li|script|style)[^>]*>\"],\n            ),\n        ];\n\n        let mut best_lang = String::new();\n        let mut best_score = 0usize;\n\n        for (lang, lang_patterns) in &patterns {\n            let mut score = 0usize;\n            for pattern in lang_patterns {\n                if let Ok(re) = Regex::new(pattern) {\n                    score += re.find_iter(code).count();\n                }\n            }\n            if score > best_score {\n                best_score = score;\n                best_lang = lang.to_string();\n            }\n        }\n\n        best_lang\n    }\n\n    /// Check if URL should be stored based on filters.\n    fn should_be_url_stored(&self, visited_url: &VisitedUrl) -> bool {\n        let mut result = false;\n\n        if !self.markdown_export_store_only_url_regex.is_empty() {\n            for regex_str in &self.markdown_export_store_only_url_regex {\n                let pattern = crate::utils::extract_pcre_regex_pattern(regex_str);\n                if let Ok(re) = Regex::new(&pattern)\n                    && re.is_match(&visited_url.url)\n                {\n                    result = true;\n                    break;\n                }\n            }\n        } else {\n            result = true;\n        }\n\n        // Do not store robots.txt\n        if visited_url.url.ends_with(\"robots.txt\") {\n            result = false;\n        }\n\n        result\n    }\n\n    /// Get relative file path for storing a visited URL.\n    fn get_relative_file_path_for_file_by_url(&self, visited_url: &VisitedUrl, status: &Status) -> String {\n        let initial_url = self\n            .initial_parsed_url\n            .clone()\n            .unwrap_or_else(|| ParsedUrl::parse(&visited_url.url, None));\n\n        let source_url = if !visited_url.source_uq_id.is_empty() {\n            status\n                .get_url_by_uq_id(&visited_url.source_uq_id)\n                .unwrap_or_else(|| visited_url.url.clone())\n        } else {\n            visited_url.url.clone()\n        };\n\n        let base_url = ParsedUrl::parse(&source_url, None);\n        let target_url = ParsedUrl::parse(&visited_url.url, None);\n\n        let attribute = if visited_url.content_type == ContentTypeId::Image {\n            \"src\"\n        } else {\n            \"href\"\n        };\n\n        let mut converter = OfflineUrlConverter::new(initial_url, base_url, target_url, None, None, Some(attribute));\n\n        let relative_url = converter.convert_url_to_relative(false);\n        let relative_target_url = converter.get_relative_target_url();\n        let target_domain_relation = converter.get_target_domain_relation();\n\n        match target_domain_relation {\n            TargetDomainRelation::InitialDifferentBaseSame | TargetDomainRelation::InitialDifferentBaseDifferent => {\n                let relative_path = relative_url\n                    .replace(\"../\", \"\")\n                    .trim_start_matches(['/', ' '])\n                    .to_string();\n                let host = relative_target_url.host.as_deref().unwrap_or(\"\");\n                if !relative_path.starts_with(&format!(\"_{}\", host)) {\n                    format!(\"_{}/{}\", host, relative_path)\n                } else {\n                    relative_path\n                }\n            }\n            TargetDomainRelation::InitialSameBaseSame | TargetDomainRelation::InitialSameBaseDifferent => relative_url\n                .replace(\"../\", \"\")\n                .trim_start_matches(['/', ' '])\n                .to_string(),\n        }\n    }\n\n    /// Validate URL.\n    fn is_valid_url(url: &str) -> bool {\n        url::Url::parse(url).is_ok()\n    }\n}\n\nimpl Exporter for MarkdownExporter {\n    fn get_name(&self) -> &str {\n        \"MarkdownExporter\"\n    }\n\n    fn should_be_activated(&self) -> bool {\n        self.markdown_export_directory.is_some() || self.markdown_export_single_file.is_some()\n    }\n\n    fn export(&mut self, status: &Status, _output: &dyn Output) -> CrawlerResult<()> {\n        let start_time = Instant::now();\n\n        // Set replace_query_string configuration\n        OfflineUrlConverter::set_replace_query_string(self.markdown_replace_query_string.clone());\n\n        // Determine valid content types\n        let mut valid_content_types = vec![ContentTypeId::Html, ContentTypeId::Redirect];\n        if !self.markdown_disable_images {\n            valid_content_types.push(ContentTypeId::Image);\n        }\n        if !self.markdown_disable_files {\n            valid_content_types.push(ContentTypeId::Document);\n        }\n\n        let visited_urls = status.get_visited_urls();\n\n        // Filter relevant URLs\n        let exported_urls: Vec<&VisitedUrl> = visited_urls\n            .iter()\n            .filter(|u| {\n                // Do not store images from non-img-src sources\n                if u.is_image() && !matches!(u.source_attr, SOURCE_IMG_SRC | SOURCE_A_HREF) {\n                    return false;\n                }\n\n                u.status_code == 200 && valid_content_types.contains(&u.content_type)\n            })\n            .collect();\n\n        // Store all allowed URLs\n        for exported_url in &exported_urls {\n            if Self::is_valid_url(&exported_url.url) && self.should_be_url_stored(exported_url) {\n                self.store_file(exported_url, status)?;\n            }\n        }\n\n        // Add info to summary\n        let duration = start_time.elapsed().as_secs_f64();\n        if let Some(ref export_dir) = self.markdown_export_directory {\n            let formatted_path = utils::get_output_formatted_path(export_dir);\n            let formatted_duration = utils::get_formatted_duration(duration);\n            status.add_info_to_summary(\n                \"markdown-generated\",\n                &format!(\n                    \"Markdown content generated to '{}' and took {}\",\n                    formatted_path, formatted_duration\n                ),\n            );\n        }\n\n        // Combine markdown files to single file if requested\n        if let (Some(single_file), Some(export_dir)) =\n            (&self.markdown_export_single_file, &self.markdown_export_directory)\n        {\n            let combine_start = Instant::now();\n            let combiner = MarkdownSiteAggregator::new(&self.initial_url);\n\n            match combiner.combine_directory(export_dir, self.markdown_remove_links_and_images_from_single_file) {\n                Ok(combined_markdown) => {\n                    // Ensure directory exists\n                    if let Some(parent) = Path::new(single_file).parent()\n                        && !parent.exists()\n                    {\n                        fs::create_dir_all(parent).map_err(|e| {\n                            CrawlerError::Export(format!(\n                                \"Cannot create directory for single markdown file: '{}': {}\",\n                                parent.display(),\n                                e\n                            ))\n                        })?;\n                    }\n\n                    fs::write(single_file, &combined_markdown).map_err(|e| {\n                        CrawlerError::Export(format!(\"Cannot write single markdown file '{}': {}\", single_file, e))\n                    })?;\n\n                    let combine_duration = combine_start.elapsed().as_secs_f64();\n                    let formatted_path = utils::get_output_formatted_path(single_file);\n                    let formatted_duration = utils::get_formatted_duration(combine_duration);\n                    status.add_info_to_summary(\n                        \"markdown-combined\",\n                        &format!(\n                            \"Markdown files combined into single file '{}' and took {}\",\n                            formatted_path, formatted_duration\n                        ),\n                    );\n                }\n                Err(e) => {\n                    status.add_critical_to_summary(\n                        \"markdown-combine-error\",\n                        &format!(\"Error combining markdown files: {}\", e),\n                    );\n                }\n            }\n        }\n\n        Ok(())\n    }\n}\n\n/// Extract regex pattern from a delimited string.\nfn extract_regex_pattern(input: &str) -> Option<String> {\n    if input.len() < 2 {\n        return None;\n    }\n    let delimiter = input.chars().next()?;\n    let rest = &input[1..];\n    if let Some(end_pos) = rest.rfind(delimiter) {\n        let pattern = &rest[..end_pos];\n        let flags = &rest[end_pos + 1..];\n        let mut regex_pattern = String::new();\n        if flags.contains('i') {\n            regex_pattern.push_str(\"(?i)\");\n        }\n        regex_pattern.push_str(pattern);\n        Some(regex_pattern)\n    } else {\n        None\n    }\n}\n\n/// Convert a local HTML file to Markdown without crawling.\n/// Used by the `--html-to-markdown` CLI mode.\npub fn convert_html_file_to_markdown(\n    html_file_path: &str,\n    exclude_selectors: Vec<String>,\n    disable_images: bool,\n    disable_files: bool,\n    move_content_before_h1_to_end: bool,\n) -> Result<String, CrawlerError> {\n    let html_content = fs::read_to_string(html_file_path)\n        .map_err(|e| CrawlerError::Export(format!(\"Cannot read HTML file '{}': {}\", html_file_path, e)))?;\n\n    let converter = HtmlToMarkdownConverter::new(&html_content, exclude_selectors);\n    let markdown = converter.get_markdown();\n\n    let mut exporter = MarkdownExporter::new();\n    exporter.set_markdown_disable_images(disable_images);\n    exporter.set_markdown_disable_files(disable_files);\n    exporter.set_markdown_move_content_before_h1_to_end(move_content_before_h1_to_end);\n\n    Ok(exporter.normalize_markdown_content(&markdown, false))\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n    use std::fs;\n\n    #[test]\n    fn test_should_be_activated() {\n        let mut exporter = MarkdownExporter::new();\n        assert!(!exporter.should_be_activated());\n\n        exporter.set_markdown_export_directory(Some(\"/tmp/md\".to_string()));\n        assert!(exporter.should_be_activated());\n    }\n\n    #[test]\n    fn test_detect_language_rust() {\n        let exporter = MarkdownExporter::new();\n        assert_eq!(exporter.detect_language(\"fn main() {\\n    let mut x = 5;\\n}\"), \"rust\");\n    }\n\n    #[test]\n    fn test_detect_language_python() {\n        let exporter = MarkdownExporter::new();\n        assert_eq!(\n            exporter.detect_language(\"def hello():\\n    print('hello')\\nfrom os import path\"),\n            \"python\"\n        );\n    }\n\n    #[test]\n    fn test_fix_multiline_images() {\n        let exporter = MarkdownExporter::new();\n        let input = \"[\\n![image](src)](link)\";\n        let result = exporter.fix_multiline_images(input);\n        assert_eq!(result, \"[![image](src)](link)\");\n    }\n\n    // --- Helper to test normalize_markdown_content ---\n\n    fn normalize(exporter: &MarkdownExporter, content: &str) -> String {\n        exporter.normalize_markdown_content(content, true)\n    }\n\n    // --- Tests for d2f9e51: preserve heading markers when trimming ---\n\n    #[test]\n    fn test_trim_preserves_heading_at_start() {\n        let exporter = MarkdownExporter::new();\n        let result = normalize(&exporter, \"# My Heading\\n\\nSome text.\");\n        assert!(\n            result.starts_with(\"# My Heading\"),\n            \"Heading should be preserved at start: {:?}\",\n            result\n        );\n    }\n\n    #[test]\n    fn test_trim_removes_special_chars_at_end() {\n        let exporter = MarkdownExporter::new();\n        let result = normalize(&exporter, \"Some text\\n\\n---\\n\\n###\\n\\n\");\n        assert!(!result.ends_with('#'));\n        assert!(!result.ends_with('-'));\n    }\n\n    // --- Tests for 2a07ac4: preserve .html/.htm in disable-files ---\n\n    #[test]\n    fn test_disable_files_preserves_html_links() {\n        let mut exporter = MarkdownExporter::new();\n        exporter.set_markdown_disable_files(true);\n        // Note: .html → .md conversion runs first, so page.html becomes page.md\n        let result = normalize(&exporter, \"[Click here](page.html)\\n[Download](doc.pdf)\");\n        assert!(\n            result.contains(\"[Click here](page.md)\"),\n            \"Page links should be preserved (as .md): {:?}\",\n            result\n        );\n        assert!(!result.contains(\"doc.pdf\"), \"PDF links should be removed: {:?}\", result);\n    }\n\n    #[test]\n    fn test_disable_files_preserves_htm_links() {\n        let mut exporter = MarkdownExporter::new();\n        exporter.set_markdown_disable_files(true);\n        let result = normalize(&exporter, \"[Page](old.htm)\");\n        assert!(\n            result.contains(\"[Page](old.htm)\"),\n            \"HTM links should be preserved: {:?}\",\n            result\n        );\n    }\n\n    #[test]\n    fn test_disable_files_preserves_md_links() {\n        let mut exporter = MarkdownExporter::new();\n        exporter.set_markdown_disable_files(true);\n        let result = normalize(&exporter, \"[About](about.md)\");\n        assert!(\n            result.contains(\"[About](about.md)\"),\n            \"MD links should be preserved: {:?}\",\n            result\n        );\n    }\n\n    // --- Tests for 9a6df27: preserve tel: and mailto: links ---\n\n    #[test]\n    fn test_disable_files_preserves_tel_links() {\n        let mut exporter = MarkdownExporter::new();\n        exporter.set_markdown_disable_files(true);\n        let result = normalize(&exporter, \"[Call us](tel:+420123456789)\");\n        assert!(\n            result.contains(\"[Call us](tel:+420123456789)\"),\n            \"tel: links should be preserved: {:?}\",\n            result\n        );\n    }\n\n    #[test]\n    fn test_disable_files_preserves_mailto_links() {\n        let mut exporter = MarkdownExporter::new();\n        exporter.set_markdown_disable_files(true);\n        let result = normalize(&exporter, \"[Email](mailto:info@example.com)\");\n        assert!(\n            result.contains(\"[Email](mailto:info@example.com)\"),\n            \"mailto: links should be preserved: {:?}\",\n            result\n        );\n    }\n\n    #[test]\n    fn test_disable_files_preserves_https_links() {\n        let mut exporter = MarkdownExporter::new();\n        exporter.set_markdown_disable_files(true);\n        let result = normalize(&exporter, \"Text\\n\\n[External](https://example.com/page)\\n\\nMore\");\n        assert!(\n            result.contains(\"[External](https://example.com/page)\"),\n            \"HTTPS links should be preserved: {:?}\",\n            result\n        );\n    }\n\n    #[test]\n    fn test_disable_files_removes_pdf() {\n        let mut exporter = MarkdownExporter::new();\n        exporter.set_markdown_disable_files(true);\n        let result = normalize(&exporter, \"[Manual](manual.pdf)\");\n        assert!(!result.contains(\"manual.pdf\"), \"PDF should be removed: {:?}\", result);\n    }\n\n    // --- Tests for 8606edc: empty list items + link text whitespace ---\n\n    #[test]\n    fn test_empty_list_items_removed() {\n        let exporter = MarkdownExporter::new();\n        let result = normalize(&exporter, \"Text\\n\\n- Item 1\\n- \\n- Item 2\");\n        assert!(result.contains(\"Item 1\"), \"Item 1 should be present: {:?}\", result);\n        assert!(result.contains(\"Item 2\"), \"Item 2 should be present: {:?}\", result);\n    }\n\n    #[test]\n    fn test_disable_images_normalizes_link_whitespace() {\n        let mut exporter = MarkdownExporter::new();\n        exporter.set_markdown_disable_images(true);\n        let result = normalize(&exporter, \"Text\\n\\n[ Some text](page.md)\");\n        assert!(\n            result.contains(\"[Some text](page.md)\"),\n            \"Leading whitespace in link text should be removed: {:?}\",\n            result\n        );\n    }\n\n    #[test]\n    fn test_disable_images_removes_standard_images() {\n        let mut exporter = MarkdownExporter::new();\n        exporter.set_markdown_disable_images(true);\n        let result = normalize(&exporter, \"Text before\\n\\n![Alt text](image.png)\\n\\nText after\");\n        assert!(\n            !result.contains(\"![Alt text]\"),\n            \"Images should be removed: {:?}\",\n            result\n        );\n        assert!(result.contains(\"Text before\"));\n        assert!(result.contains(\"Text after\"));\n    }\n\n    // --- Tests for 9e1def9 + 274439e: orphaned filename links ---\n\n    #[test]\n    fn test_orphaned_filename_link_removed() {\n        let exporter = MarkdownExporter::new();\n        let result = normalize(\n            &exporter,\n            \"## Heading\\n\\n[some-page.html](some-page.md)\\n\\nReal content\",\n        );\n        assert!(\n            !result.contains(\"some-page.html\"),\n            \"Orphaned filename link should be removed: {:?}\",\n            result\n        );\n        assert!(result.contains(\"Real content\"));\n    }\n\n    #[test]\n    fn test_orphaned_filename_link_with_leading_whitespace() {\n        let exporter = MarkdownExporter::new();\n        let result = normalize(&exporter, \"## Heading\\n\\n [my-page.html](my-page.md)\\n\\nContent\");\n        assert!(\n            !result.contains(\"my-page.html\"),\n            \"Indented orphaned link should be removed: {:?}\",\n            result\n        );\n    }\n\n    #[test]\n    fn test_real_link_text_not_removed() {\n        let exporter = MarkdownExporter::new();\n        // .html → .md conversion runs first\n        let result = normalize(&exporter, \"[Click here](page.html)\\n\\nSome text\");\n        assert!(\n            result.contains(\"[Click here](page.md)\"),\n            \"Links with real text should be kept: {:?}\",\n            result\n        );\n    }\n\n    // --- Tests for b8511cc: empty table rows ---\n\n    #[test]\n    fn test_empty_table_rows_removed() {\n        let exporter = MarkdownExporter::new();\n        let result = normalize(&exporter, \"| Header |\\n| --- |\\n| | |\\n| Data |\\n\");\n        assert!(\n            !result.contains(\"| | |\"),\n            \"Empty table rows should be removed: {:?}\",\n            result\n        );\n        assert!(result.contains(\"Data\"));\n    }\n\n    #[test]\n    fn test_table_row_with_content_preserved() {\n        let exporter = MarkdownExporter::new();\n        let result = normalize(&exporter, \"| Col1 | Col2 |\\n| --- | --- |\\n| A | B |\\n\");\n        assert!(result.contains(\"| A | B |\"));\n    }\n\n    // --- Tests for move_content_before_main_heading_to_end ---\n\n    #[test]\n    fn test_move_content_before_h1() {\n        let mut exporter = MarkdownExporter::new();\n        exporter.set_markdown_move_content_before_h1_to_end(true);\n        let result = exporter.move_content_before_main_heading_to_end(\"Nav content\\n\\n# Main Title\\n\\nPage body\");\n        assert!(\n            result.starts_with(\"# Main Title\"),\n            \"Should start with heading: {:?}\",\n            result\n        );\n        assert!(result.contains(\"Nav content\"));\n        assert!(result.contains(\"---\"));\n    }\n\n    #[test]\n    fn test_move_content_disabled() {\n        let exporter = MarkdownExporter::new();\n        let input = \"Nav content\\n\\n# Main Title\\n\\nPage body\";\n        let result = exporter.move_content_before_main_heading_to_end(input);\n        assert_eq!(result, input, \"Should return unchanged when disabled\");\n    }\n\n    #[test]\n    fn test_move_content_nothing_before_h1() {\n        let mut exporter = MarkdownExporter::new();\n        exporter.set_markdown_move_content_before_h1_to_end(true);\n        let input = \"# Title\\n\\nBody text\";\n        let result = exporter.move_content_before_main_heading_to_end(input);\n        assert_eq!(result, input, \"Should return unchanged when nothing before h1\");\n    }\n\n    // --- Tests for normalize_markdown_content and convert_html_file_to_markdown ---\n\n    #[test]\n    fn test_normalize_content_without_html_link_replacement() {\n        let exporter = MarkdownExporter::new();\n        let result = exporter.normalize_markdown_content(\"# Title\\n\\n[Link](page.html)\\n\\nText\", false);\n        assert!(\n            result.contains(\"[Link](page.html)\"),\n            \"Links should stay as .html: {:?}\",\n            result\n        );\n    }\n\n    #[test]\n    fn test_normalize_content_with_html_link_replacement() {\n        let exporter = MarkdownExporter::new();\n        let result = exporter.normalize_markdown_content(\"# Title\\n\\n[Link](page.html)\\n\\nText\", true);\n        assert!(\n            result.contains(\"[Link](page.md)\"),\n            \"Links should be converted to .md: {:?}\",\n            result\n        );\n    }\n\n    #[test]\n    fn test_convert_html_file_basic() {\n        let html = \"<html><body><h1>Hello</h1><p>World</p></body></html>\";\n        let path = \"/tmp/siteone_test_htm_convert.html\";\n        fs::write(path, html).unwrap();\n        let result = convert_html_file_to_markdown(path, vec![], false, false, false).unwrap();\n        let _ = fs::remove_file(path);\n        assert!(result.contains(\"# Hello\"), \"Should contain h1: {:?}\", result);\n        assert!(result.contains(\"World\"), \"Should contain paragraph: {:?}\", result);\n    }\n\n    #[test]\n    fn test_convert_html_file_nonexistent() {\n        let result = convert_html_file_to_markdown(\"/tmp/nonexistent_siteone_test.html\", vec![], false, false, false);\n        assert!(result.is_err(), \"Should error on nonexistent file\");\n    }\n\n    #[test]\n    fn test_convert_html_file_with_disable_images() {\n        let html = \"<html><body><h1>Title</h1><img src=\\\"photo.jpg\\\" alt=\\\"Photo\\\"><p>Text</p></body></html>\";\n        let path = \"/tmp/siteone_test_htm_images.html\";\n        fs::write(path, html).unwrap();\n        let result = convert_html_file_to_markdown(path, vec![], true, false, false).unwrap();\n        let _ = fs::remove_file(path);\n        assert!(!result.contains(\"photo.jpg\"), \"Images should be removed: {:?}\", result);\n        assert!(result.contains(\"Text\"));\n    }\n\n    #[test]\n    fn test_convert_html_file_preserves_html_links() {\n        let html = r#\"<html><body><h1>Title</h1><a href=\"other.html\">Link</a></body></html>\"#;\n        let path = \"/tmp/siteone_test_htm_links.html\";\n        fs::write(path, html).unwrap();\n        let result = convert_html_file_to_markdown(path, vec![], false, false, false).unwrap();\n        let _ = fs::remove_file(path);\n        assert!(\n            result.contains(\"other.html\"),\n            \"HTML links should NOT be converted to .md: {:?}\",\n            result\n        );\n    }\n}\n"
  },
  {
    "path": "src/export/mod.rs",
    "content": "pub mod exporter;\npub mod html_report;\npub mod utils;\n\npub mod base_exporter;\npub mod file_exporter;\npub mod mailer_exporter;\npub mod markdown_exporter;\npub mod offline_website_exporter;\npub mod sitemap_exporter;\npub mod upload_exporter;\n"
  },
  {
    "path": "src/export/offline_website_exporter.rs",
    "content": "// SiteOne Crawler - OfflineWebsiteExporter\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Saves all crawled pages to local filesystem for offline browsing.\n\nuse std::collections::HashMap;\nuse std::fs;\nuse std::path::Path;\nuse std::sync::{Arc, Mutex};\nuse std::time::Instant;\n\nuse md5::{Digest, Md5};\nuse regex::Regex;\n\nuse crate::content_processor::manager::ContentProcessorManager;\nuse crate::engine::parsed_url::ParsedUrl;\nuse crate::error::{CrawlerError, CrawlerResult};\nuse crate::export::exporter::Exporter;\nuse crate::export::utils::offline_url_converter::OfflineUrlConverter;\nuse crate::export::utils::target_domain_relation::TargetDomainRelation;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::result::visited_url::VisitedUrl;\nuse crate::types::ContentTypeId;\nuse crate::utils;\n\n/// Content types that require URL rewriting for offline browsing\nconst CONTENT_TYPES_REQUIRING_CHANGES: &[ContentTypeId] = &[\n    ContentTypeId::Html,\n    ContentTypeId::Script,\n    ContentTypeId::Stylesheet,\n    ContentTypeId::Redirect,\n];\n\n/// Exports all crawled pages to a local directory for offline browsing.\n/// Rewrites URLs in HTML/CSS/JS for offline navigation.\npub struct OfflineWebsiteExporter {\n    offline_export_directory: Option<String>,\n    offline_export_store_only_url_regex: Vec<String>,\n    offline_export_remove_unwanted_code: bool,\n    offline_export_no_auto_redirect_html: bool,\n    offline_export_preserve_url_structure: bool,\n    offline_export_lowercase: bool,\n    ignore_store_file_error: bool,\n    replace_content: Vec<String>,\n    replace_query_string: Vec<String>,\n    initial_parsed_url: Option<ParsedUrl>,\n    content_processor_manager: Option<Arc<Mutex<ContentProcessorManager>>>,\n    #[allow(clippy::type_complexity)]\n    is_domain_allowed_for_static_files: Option<Box<dyn Fn(&str) -> bool + Send + Sync>>,\n    #[allow(clippy::type_complexity)]\n    is_external_domain_allowed_for_crawling: Option<Box<dyn Fn(&str) -> bool + Send + Sync>>,\n    /// Maps URL -> relative file path for successfully exported files\n    exported_file_paths: HashMap<String, String>,\n}\n\nimpl Default for OfflineWebsiteExporter {\n    fn default() -> Self {\n        Self::new()\n    }\n}\n\nimpl OfflineWebsiteExporter {\n    pub fn new() -> Self {\n        Self {\n            offline_export_directory: None,\n            offline_export_store_only_url_regex: Vec::new(),\n            offline_export_remove_unwanted_code: false,\n            offline_export_no_auto_redirect_html: false,\n            offline_export_preserve_url_structure: false,\n            offline_export_lowercase: false,\n            ignore_store_file_error: false,\n            replace_content: Vec::new(),\n            replace_query_string: Vec::new(),\n            initial_parsed_url: None,\n            content_processor_manager: None,\n            is_domain_allowed_for_static_files: None,\n            is_external_domain_allowed_for_crawling: None,\n            exported_file_paths: HashMap::new(),\n        }\n    }\n\n    pub fn set_offline_export_directory(&mut self, dir: Option<String>) {\n        self.offline_export_directory = dir.map(|d| d.trim_end_matches('/').to_string());\n    }\n\n    pub fn set_offline_export_store_only_url_regex(&mut self, regexes: Vec<String>) {\n        self.offline_export_store_only_url_regex = regexes;\n    }\n\n    pub fn set_offline_export_remove_unwanted_code(&mut self, remove: bool) {\n        self.offline_export_remove_unwanted_code = remove;\n    }\n\n    pub fn set_offline_export_no_auto_redirect_html(&mut self, disable: bool) {\n        self.offline_export_no_auto_redirect_html = disable;\n    }\n\n    pub fn set_offline_export_preserve_url_structure(&mut self, preserve: bool) {\n        self.offline_export_preserve_url_structure = preserve;\n    }\n\n    pub fn set_offline_export_lowercase(&mut self, lowercase: bool) {\n        self.offline_export_lowercase = lowercase;\n    }\n\n    pub fn set_ignore_store_file_error(&mut self, ignore: bool) {\n        self.ignore_store_file_error = ignore;\n    }\n\n    pub fn set_replace_content(&mut self, replacements: Vec<String>) {\n        self.replace_content = replacements;\n    }\n\n    pub fn set_replace_query_string(&mut self, replacements: Vec<String>) {\n        self.replace_query_string = replacements;\n    }\n\n    pub fn set_initial_parsed_url(&mut self, url: ParsedUrl) {\n        self.initial_parsed_url = Some(url);\n    }\n\n    pub fn set_content_processor_manager(&mut self, cpm: Arc<Mutex<ContentProcessorManager>>) {\n        self.content_processor_manager = Some(cpm);\n    }\n\n    pub fn set_domain_callbacks(\n        &mut self,\n        static_files: Box<dyn Fn(&str) -> bool + Send + Sync>,\n        crawling: Box<dyn Fn(&str) -> bool + Send + Sync>,\n    ) {\n        self.is_domain_allowed_for_static_files = Some(static_files);\n        self.is_external_domain_allowed_for_crawling = Some(crawling);\n    }\n\n    /// Get the mapping of URL -> relative file path for all successfully exported files.\n    pub fn get_exported_file_paths(&self) -> &HashMap<String, String> {\n        &self.exported_file_paths\n    }\n\n    /// Store a single file to the offline export directory.\n    fn store_file(&mut self, visited_url: &VisitedUrl, status: &Status, _output: &dyn Output) -> CrawlerResult<()> {\n        let export_dir = self\n            .offline_export_directory\n            .as_ref()\n            .ok_or_else(|| CrawlerError::Export(\"Offline export directory not set\".to_string()))?;\n\n        let body_bytes = status.get_url_body(&visited_url.uq_id).unwrap_or_default();\n\n        // For content types requiring URL rewriting (HTML, CSS, JS), work with text\n        // For binary content types (images, fonts), keep raw bytes\n        let final_bytes =\n            if !body_bytes.is_empty() && CONTENT_TYPES_REQUIRING_CHANGES.contains(&visited_url.content_type) {\n                let mut content = String::from_utf8_lossy(&body_bytes).into_owned();\n\n                // Apply content changes through all content processors (URL rewriting for offline)\n                if let Some(ref cpm) = self.content_processor_manager {\n                    let parsed_url = ParsedUrl::parse(&visited_url.url, None);\n                    if let Ok(mut manager) = cpm.lock() {\n                        let original_content = content.clone();\n\n                        // Create a content loader that loads module content by URL from storage.\n                        // This enables Astro module inlining (and any future processor that needs it).\n                        let content_loader = |url_str: &str| -> Option<String> {\n                            let parsed = ParsedUrl::parse(url_str, None);\n                            let full_url = parsed.get_full_url(true, false);\n                            let mut hasher = Md5::new();\n                            hasher.update(full_url.as_bytes());\n                            let hash = format!(\"{:x}\", hasher.finalize());\n                            let uq_id = hash[..8].to_string();\n                            status.get_url_body_text(&uq_id)\n                        };\n\n                        manager.apply_content_changes_for_offline_version_with_loader(\n                            &mut content,\n                            visited_url.content_type,\n                            &parsed_url,\n                            self.offline_export_remove_unwanted_code,\n                            &content_loader,\n                        );\n                        // If content was somehow corrupted, use original\n                        if content.is_empty() {\n                            content = original_content;\n                        }\n                    }\n                }\n\n                // Apply custom content replacements\n                if !self.replace_content.is_empty() {\n                    for replace in &self.replace_content {\n                        let parts: Vec<&str> = replace.splitn(2, \"->\").collect();\n                        let replace_from = parts[0].trim();\n                        let replace_to = if parts.len() > 1 { parts[1].trim() } else { \"\" };\n\n                        let is_regex = crate::utils::is_regex_pattern(replace_from);\n\n                        if is_regex {\n                            if let Some(pattern) = extract_regex_pattern(replace_from)\n                                && let Ok(re) = Regex::new(&pattern)\n                            {\n                                content = re.replace_all(&content, replace_to).to_string();\n                            }\n                        } else {\n                            content = content.replace(replace_from, replace_to);\n                        }\n                    }\n                }\n\n                content.into_bytes()\n            } else {\n                body_bytes\n            };\n\n        // Build store file path\n        let relative_path = self.get_relative_file_path_for_file_by_url(visited_url, status);\n        let sanitized_path = OfflineUrlConverter::sanitize_file_path(&relative_path, false);\n        // Path traversal protection: strip \"../\" sequences from sanitized path\n        let sanitized_path = sanitized_path.replace(\"../\", \"\").replace(\"..\\\\\", \"\");\n        let store_file_path = format!(\"{}/{}\", export_dir, sanitized_path);\n\n        // Create directory structure\n        let dir_path = Path::new(&store_file_path).parent().ok_or_else(|| {\n            CrawlerError::Export(format!(\"Cannot determine parent directory for '{}'\", store_file_path))\n        })?;\n\n        if !dir_path.exists() {\n            fs::create_dir_all(dir_path).map_err(|e| {\n                CrawlerError::Export(format!(\"Cannot create directory '{}': {}\", dir_path.display(), e))\n            })?;\n        }\n\n        // Check if we should save the file\n        let mut save_file = true;\n        if Path::new(&store_file_path).exists()\n            && let Some(ref initial_url) = self.initial_parsed_url\n            && !visited_url.is_https()\n            && initial_url.is_https()\n        {\n            save_file = false;\n            let message = format!(\n                \"File '{}' already exists and will not be overwritten because initial request was HTTPS and this request is HTTP: {}\",\n                store_file_path, visited_url.url\n            );\n            status.add_notice_to_summary(\"offline-exporter-store-file-ignored\", &message);\n        }\n\n        if save_file {\n            match fs::write(&store_file_path, &final_bytes) {\n                Ok(()) => {\n                    self.exported_file_paths\n                        .insert(visited_url.url.clone(), sanitized_path.clone());\n                }\n                Err(e) => {\n                    let has_extension = Regex::new(r\"(?i)\\.[a-z0-9\\-]{1,15}$\")\n                        .map(|re| re.is_match(&store_file_path))\n                        .unwrap_or(false);\n\n                    if has_extension && !self.ignore_store_file_error {\n                        return Err(CrawlerError::Export(format!(\n                            \"Cannot store file '{}': {}\",\n                            store_file_path, e\n                        )));\n                    } else {\n                        let message = format!(\n                            \"Cannot store file '{}' (undefined extension). Original URL: {}\",\n                            store_file_path, visited_url.url\n                        );\n                        status.add_notice_to_summary(\"offline-exporter-store-file-error\", &message);\n                    }\n                }\n            }\n        }\n\n        Ok(())\n    }\n\n    /// Check if URL should be stored based on filters.\n    fn should_be_url_stored(&self, visited_url: &VisitedUrl) -> bool {\n        let mut result = false;\n\n        // Check --offline-export-store-only-url-regex\n        if !self.offline_export_store_only_url_regex.is_empty() {\n            for regex_str in &self.offline_export_store_only_url_regex {\n                let pattern = crate::utils::extract_pcre_regex_pattern(regex_str);\n                if let Ok(re) = Regex::new(&pattern)\n                    && re.is_match(&visited_url.url)\n                {\n                    result = true;\n                    break;\n                }\n            }\n        } else {\n            result = true;\n        }\n\n        // Check --allow-domain-* for external domains\n        if result && visited_url.is_external {\n            let parsed_url = ParsedUrl::parse(&visited_url.url, None);\n            if let Some(ref host) = parsed_url.host\n                && let Some(ref cb) = self.is_external_domain_allowed_for_crawling\n            {\n                if cb(host) {\n                    result = true;\n                } else if visited_url.is_static_file() || parsed_url.is_static_file() {\n                    if let Some(ref static_cb) = self.is_domain_allowed_for_static_files {\n                        result = static_cb(host);\n                    } else {\n                        result = false;\n                    }\n                } else {\n                    result = false;\n                }\n            }\n        }\n\n        result\n    }\n\n    /// Get relative file path for storing a visited URL.\n    fn get_relative_file_path_for_file_by_url(&self, visited_url: &VisitedUrl, status: &Status) -> String {\n        let initial_url = self\n            .initial_parsed_url\n            .clone()\n            .unwrap_or_else(|| ParsedUrl::parse(&visited_url.url, None));\n\n        let source_url = if !visited_url.source_uq_id.is_empty() {\n            status\n                .get_url_by_uq_id(&visited_url.source_uq_id)\n                .unwrap_or_else(|| visited_url.url.clone())\n        } else {\n            visited_url.url.clone()\n        };\n\n        let base_url = ParsedUrl::parse(&source_url, None);\n        let target_url = ParsedUrl::parse(&visited_url.url, None);\n\n        // Determine source attribute hint\n        let attribute = if visited_url.content_type == ContentTypeId::Image {\n            \"src\"\n        } else {\n            \"href\"\n        };\n\n        let mut converter = OfflineUrlConverter::new(initial_url, base_url, target_url, None, None, Some(attribute));\n        converter.set_preserve_url_structure(self.offline_export_preserve_url_structure);\n\n        let relative_url = converter.convert_url_to_relative(false);\n        let relative_target_url = converter.get_relative_target_url();\n        let target_domain_relation = converter.get_target_domain_relation();\n\n        match target_domain_relation {\n            TargetDomainRelation::InitialDifferentBaseSame | TargetDomainRelation::InitialDifferentBaseDifferent => {\n                let relative_path = relative_url\n                    .replace(\"../\", \"\")\n                    .trim_start_matches(['/', ' '])\n                    .to_string();\n                let host = relative_target_url.host.as_deref().unwrap_or(\"\");\n                if !relative_path.starts_with(&format!(\"_{}\", host)) {\n                    format!(\"_{}/{}\", host, relative_path)\n                } else {\n                    relative_path\n                }\n            }\n            TargetDomainRelation::InitialSameBaseSame | TargetDomainRelation::InitialSameBaseDifferent => relative_url\n                .replace(\"../\", \"\")\n                .trim_start_matches(['/', ' '])\n                .to_string(),\n        }\n    }\n\n    /// Validate URL for export.\n    fn is_valid_url(url: &str) -> bool {\n        // First try standard URL parsing\n        if url::Url::parse(url).is_ok() {\n            return true;\n        }\n\n        // Try with URL-encoded version for international characters\n        let encoded: String = url\n            .chars()\n            .map(|c| {\n                if c.is_ascii() && c as u32 >= 0x20 && (c as u32) <= 0x7E {\n                    c.to_string()\n                } else {\n                    percent_encoding::utf8_percent_encode(&c.to_string(), percent_encoding::NON_ALPHANUMERIC)\n                        .to_string()\n                }\n            })\n            .collect();\n\n        url::Url::parse(&encoded).is_ok()\n    }\n\n    /// Add redirect HTML files to subfolders that contain index.html.\n    fn add_redirect_html_to_subfolders(dir: &str) -> CrawlerResult<()> {\n        let dir_path = Path::new(dir);\n        if !dir_path.is_dir() {\n            return Ok(());\n        }\n\n        let entries = fs::read_dir(dir_path)\n            .map_err(|e| CrawlerError::Export(format!(\"Cannot read directory '{}': {}\", dir, e)))?;\n\n        for entry in entries.flatten() {\n            let path = entry.path();\n            if path.is_dir() {\n                let dir_name = path.file_name().and_then(|n| n.to_str()).unwrap_or(\"\");\n                let index_html_path = path.join(\"index.html\");\n\n                if index_html_path.exists() {\n                    // Create redirect HTML file for the folder\n                    let redirect_html_path = format!(\"{}.html\", path.display());\n                    if !Path::new(&redirect_html_path).exists() {\n                        let redirect_content = format!(\n                            \"<!DOCTYPE html><meta http-equiv=\\\"refresh\\\" content=\\\"0;url={}/index.html\\\">\",\n                            dir_name\n                        );\n                        let _ = fs::write(&redirect_html_path, redirect_content);\n                    }\n                }\n\n                // Recurse into subdirectories\n                Self::add_redirect_html_to_subfolders(&path.to_string_lossy())?;\n            }\n        }\n\n        Ok(())\n    }\n}\n\nimpl Exporter for OfflineWebsiteExporter {\n    fn get_name(&self) -> &str {\n        \"OfflineWebsiteExporter\"\n    }\n\n    fn should_be_activated(&self) -> bool {\n        self.offline_export_directory.is_some()\n    }\n\n    fn export(&mut self, status: &Status, output: &dyn Output) -> CrawlerResult<()> {\n        let start_time = Instant::now();\n        let export_dir = match self.offline_export_directory {\n            Some(ref dir) => dir.clone(),\n            None => return Ok(()),\n        };\n\n        // Set replace_query_string configuration\n        OfflineUrlConverter::set_replace_query_string(self.replace_query_string.clone());\n\n        // Set lowercase configuration for all URL conversions\n        OfflineUrlConverter::set_lowercase(self.offline_export_lowercase);\n\n        let visited_urls = status.get_visited_urls();\n\n        // Filter relevant URLs with OK status codes\n        let exported_urls: Vec<&VisitedUrl> = visited_urls\n            .iter()\n            .filter(|u| matches!(u.status_code, 200 | 201 | 301 | 302 | 303 | 308))\n            .collect();\n\n        // Store all allowed URLs\n        for exported_url in &exported_urls {\n            if Self::is_valid_url(&exported_url.url) && self.should_be_url_stored(exported_url) {\n                self.store_file(exported_url, status, output)?;\n            }\n        }\n\n        // Add redirect HTML files for subfolders\n        if !self.offline_export_no_auto_redirect_html {\n            let _ = Self::add_redirect_html_to_subfolders(&export_dir);\n        }\n\n        // Add info to summary\n        let duration = start_time.elapsed().as_secs_f64();\n        let formatted_path = utils::get_output_formatted_path(&export_dir);\n        let formatted_duration = utils::get_formatted_duration(duration);\n        status.add_info_to_summary(\n            \"offline-website-generated\",\n            &format!(\n                \"Offline website generated to '{}' and took {}\",\n                formatted_path, formatted_duration\n            ),\n        );\n\n        Ok(())\n    }\n}\n\n/// Extract regex pattern from a delimited string (e.g., /pattern/flags)\nfn extract_regex_pattern(input: &str) -> Option<String> {\n    if input.len() < 2 {\n        return None;\n    }\n    let delimiter = input.chars().next()?;\n    let rest = &input[1..];\n    if let Some(end_pos) = rest.rfind(delimiter) {\n        let pattern = &rest[..end_pos];\n        let flags = &rest[end_pos + 1..];\n        let mut regex_pattern = String::new();\n        if flags.contains('i') {\n            regex_pattern.push_str(\"(?i)\");\n        }\n        regex_pattern.push_str(pattern);\n        Some(regex_pattern)\n    } else {\n        None\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_is_valid_url() {\n        assert!(OfflineWebsiteExporter::is_valid_url(\"https://example.com/\"));\n        assert!(OfflineWebsiteExporter::is_valid_url(\"https://example.com/path/page\"));\n        assert!(!OfflineWebsiteExporter::is_valid_url(\"not-a-url\"));\n    }\n\n    #[test]\n    fn test_should_be_activated() {\n        let mut exporter = OfflineWebsiteExporter::new();\n        assert!(!exporter.should_be_activated());\n\n        exporter.set_offline_export_directory(Some(\"/tmp/offline\".to_string()));\n        assert!(exporter.should_be_activated());\n    }\n}\n"
  },
  {
    "path": "src/export/sitemap_exporter.rs",
    "content": "// SiteOne Crawler - SitemapExporter\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Generates sitemap.xml and/or sitemap.txt from crawl results.\n\nuse std::fs;\nuse std::io::Write;\nuse std::path::Path;\n\nuse crate::error::{CrawlerError, CrawlerResult};\nuse crate::export::exporter::Exporter;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::types::ContentTypeId;\nuse crate::utils;\n\npub struct SitemapExporter {\n    /// Path for XML sitemap output (--sitemap-xml-file)\n    pub output_sitemap_xml: Option<String>,\n    /// Path for TXT sitemap output (--sitemap-txt-file)\n    pub output_sitemap_txt: Option<String>,\n    /// Base priority for XML sitemap entries (--sitemap-base-priority)\n    pub base_priority: f64,\n    /// Priority increase value based on slash count (--sitemap-priority-increase)\n    pub priority_increase: f64,\n}\n\nimpl SitemapExporter {\n    pub fn new(\n        output_sitemap_xml: Option<String>,\n        output_sitemap_txt: Option<String>,\n        base_priority: f64,\n        priority_increase: f64,\n    ) -> Self {\n        Self {\n            output_sitemap_xml,\n            output_sitemap_txt,\n            base_priority,\n            priority_increase,\n        }\n    }\n\n    /// Collect URLs eligible for sitemap: internal, HTML, 200 status code.\n    /// Sort by slash count ascending, then alphabetically.\n    fn collect_sitemap_urls(&self, status: &Status) -> Vec<String> {\n        let visited_urls = status.get_visited_urls();\n        let mut urls: Vec<String> = visited_urls\n            .iter()\n            .filter(|vu| !vu.is_external && vu.content_type == ContentTypeId::Html && vu.status_code == 200)\n            .map(|vu| vu.url.clone())\n            .collect();\n\n        // Sort by slash count ascending, then alphabetically\n        urls.sort_by(|a, b| {\n            let a_trimmed = a.trim_end_matches('/');\n            let b_trimmed = b.trim_end_matches('/');\n            let a_slashes = a_trimmed.matches('/').count();\n            let b_slashes = b_trimmed.matches('/').count();\n            a_slashes.cmp(&b_slashes).then_with(|| a.cmp(b))\n        });\n\n        urls\n    }\n\n    /// Generate an XML sitemap file.\n    fn generate_xml_sitemap(&self, output_file: &str, urls: &[String]) -> CrawlerResult<String> {\n        // Ensure .xml extension\n        let output_file = if output_file.to_lowercase().ends_with(\".xml\") {\n            output_file.to_string()\n        } else {\n            let stripped = regex::Regex::new(r\"\\.xml$\")\n                .ok()\n                .map(|re| re.replace(output_file, \"\").to_string())\n                .unwrap_or_else(|| output_file.to_string());\n            format!(\"{}.xml\", stripped)\n        };\n\n        // Ensure parent directory exists\n        let path = Path::new(&output_file);\n        if let Some(parent) = path.parent()\n            && !parent.exists()\n        {\n            fs::create_dir_all(parent).map_err(|e| {\n                CrawlerError::Export(format!(\"Cannot create output directory '{}': {}\", parent.display(), e))\n            })?;\n        }\n\n        // Build XML content manually for proper formatting\n        let mut xml = String::new();\n        xml.push_str(\"<?xml version=\\\"1.0\\\" encoding=\\\"UTF-8\\\"?>\\n\");\n        xml.push_str(\"<urlset xmlns=\\\"https://www.sitemaps.org/schemas/sitemap/0.9\\\">\\n\");\n        xml.push_str(\"<!-- Sitemap generated using SiteOne Crawler - https://crawler.siteone.io/features/sitemap-generator/ -->\\n\");\n\n        for url in urls {\n            // Calculate priority based on slash count in path\n            let slashes_count = url::Url::parse(url)\n                .ok()\n                .map(|u| u.path().matches('/').count())\n                .unwrap_or(1) as f64;\n\n            let priority = (self.base_priority + (self.priority_increase * (1.0 - slashes_count))).clamp(0.1, 1.0);\n\n            // Escape special XML characters in URL\n            let escaped_url = escape_xml(url);\n\n            xml.push_str(\"  <url>\\n\");\n            xml.push_str(&format!(\"    <loc>{}</loc>\\n\", escaped_url));\n            xml.push_str(&format!(\"    <priority>{:.1}</priority>\\n\", priority));\n            xml.push_str(\"  </url>\\n\");\n        }\n\n        xml.push_str(\"</urlset>\\n\");\n\n        // Write to file\n        let mut file = fs::File::create(&output_file)\n            .map_err(|e| CrawlerError::Export(format!(\"Failed to create XML sitemap file '{}': {}\", output_file, e)))?;\n        file.write_all(xml.as_bytes())\n            .map_err(|e| CrawlerError::Export(format!(\"Failed to write XML sitemap to '{}': {}\", output_file, e)))?;\n\n        Ok(output_file)\n    }\n\n    /// Generate a TXT sitemap file (plain list of URLs).\n    fn generate_txt_sitemap(&self, output_file: &str, urls: &[String]) -> CrawlerResult<String> {\n        // Ensure .txt extension\n        let output_file = if output_file.to_lowercase().ends_with(\".txt\") {\n            output_file.to_string()\n        } else {\n            let stripped = regex::Regex::new(r\"\\.txt$\")\n                .ok()\n                .map(|re| re.replace(output_file, \"\").to_string())\n                .unwrap_or_else(|| output_file.to_string());\n            format!(\"{}.txt\", stripped)\n        };\n\n        // Ensure parent directory exists\n        let path = Path::new(&output_file);\n        if let Some(parent) = path.parent()\n            && !parent.exists()\n        {\n            fs::create_dir_all(parent).map_err(|e| {\n                CrawlerError::Export(format!(\"Cannot create output directory '{}': {}\", parent.display(), e))\n            })?;\n        }\n\n        let content = urls.join(\"\\n\");\n        fs::write(&output_file, &content)\n            .map_err(|e| CrawlerError::Export(format!(\"Failed to write TXT sitemap to '{}': {}\", output_file, e)))?;\n\n        Ok(output_file)\n    }\n}\n\nimpl Exporter for SitemapExporter {\n    fn get_name(&self) -> &str {\n        \"SitemapExporter\"\n    }\n\n    fn should_be_activated(&self) -> bool {\n        self.output_sitemap_xml.is_some() || self.output_sitemap_txt.is_some()\n    }\n\n    fn export(&mut self, status: &Status, _output: &dyn Output) -> CrawlerResult<()> {\n        let urls = self.collect_sitemap_urls(status);\n\n        // Generate XML sitemap\n        if let Some(ref output_file) = self.output_sitemap_xml.clone() {\n            match self.generate_xml_sitemap(output_file, &urls) {\n                Ok(sitemap_file) => {\n                    let display_path = utils::get_output_formatted_path(&sitemap_file);\n                    status.add_info_to_summary(\"sitemap-xml\", &format!(\"XML sitemap generated to '{}'\", display_path));\n                }\n                Err(e) => {\n                    status.add_critical_to_summary(\"sitemap-xml\", &format!(\"Sitemap XML ERROR: {}\", e));\n                }\n            }\n        }\n\n        // Generate TXT sitemap\n        if let Some(ref output_file) = self.output_sitemap_txt.clone() {\n            match self.generate_txt_sitemap(output_file, &urls) {\n                Ok(sitemap_file) => {\n                    let display_path = utils::get_output_formatted_path(&sitemap_file);\n                    status.add_info_to_summary(\"sitemap-txt\", &format!(\"TXT sitemap generated to '{}'\", display_path));\n                }\n                Err(e) => {\n                    status.add_critical_to_summary(\"sitemap-txt\", &format!(\"Sitemap TXT ERROR: {}\", e));\n                }\n            }\n        }\n\n        Ok(())\n    }\n}\n\n/// Escape special XML characters in a string.\nfn escape_xml(s: &str) -> String {\n    s.replace('&', \"&amp;\")\n        .replace('<', \"&lt;\")\n        .replace('>', \"&gt;\")\n        .replace('\"', \"&quot;\")\n        .replace('\\'', \"&apos;\")\n}\n"
  },
  {
    "path": "src/export/upload_exporter.rs",
    "content": "// SiteOne Crawler - UploadExporter\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Uploads HTML report to crawler.siteone.io via HTTP POST.\n\nuse std::time::Instant;\n\nuse flate2::Compression;\nuse flate2::write::GzEncoder;\nuse std::io::Write;\n\nuse crate::error::{CrawlerError, CrawlerResult};\nuse crate::export::exporter::Exporter;\nuse crate::output::output::Output;\nuse crate::result::status::Status;\nuse crate::utils;\nuse crate::version;\n\npub struct UploadExporter {\n    /// Whether upload is enabled (--upload)\n    pub upload_enabled: bool,\n    /// Upload endpoint URL (--upload-to)\n    pub endpoint: String,\n    /// Retention period (--upload-retention)\n    pub retention: Option<String>,\n    /// Optional password for the online report (--upload-password)\n    pub password: Option<String>,\n    /// Upload timeout in seconds (--upload-timeout)\n    pub upload_timeout: u64,\n    /// HTML report content to upload (set before export)\n    pub html_report_content: Option<String>,\n}\n\nimpl UploadExporter {\n    pub fn new(\n        upload_enabled: bool,\n        endpoint: String,\n        retention: Option<String>,\n        password: Option<String>,\n        upload_timeout: u64,\n    ) -> Self {\n        Self {\n            upload_enabled,\n            endpoint,\n            retention,\n            password,\n            upload_timeout,\n            html_report_content: None,\n        }\n    }\n\n    /// Set HTML report content to be uploaded.\n    pub fn set_html_report_content(&mut self, content: String) {\n        self.html_report_content = Some(content);\n    }\n\n    /// Upload the HTML report to the configured endpoint.\n    /// Returns the URL where the report is available.\n    fn upload(&self, html: &str) -> CrawlerResult<String> {\n        // Gzip compress the HTML body\n        let mut encoder = GzEncoder::new(Vec::new(), Compression::default());\n        encoder\n            .write_all(html.as_bytes())\n            .map_err(|e| CrawlerError::Export(format!(\"Failed to compress HTML for upload: {}\", e)))?;\n        let compressed_html = encoder\n            .finish()\n            .map_err(|e| CrawlerError::Export(format!(\"Failed to finish compression for upload: {}\", e)))?;\n\n        // Build form data\n        let mut form = vec![\n            (\"version\".to_string(), version::CODE.to_string()),\n            (\"platform\".to_string(), std::env::consts::OS.to_string()),\n            (\"arch\".to_string(), get_arch()),\n        ];\n\n        if let Some(ref retention) = self.retention {\n            form.push((\"retention\".to_string(), retention.clone()));\n        }\n        if let Some(ref password) = self.password {\n            let trimmed = password.trim();\n            if !trimmed.is_empty() {\n                form.push((\"password\".to_string(), trimmed.to_string()));\n            }\n        }\n\n        // Send as application/x-www-form-urlencoded.\n        // The gzipped binary htmlBody is URL-encoded via percent-encoding.\n        let client = reqwest::blocking::Client::builder()\n            .timeout(std::time::Duration::from_secs(self.upload_timeout))\n            .build()\n            .map_err(|e| CrawlerError::Export(format!(\"Failed to create HTTP client for upload: {}\", e)))?;\n\n        // Build URL-encoded body manually — reqwest's .form() doesn't support binary values\n        use percent_encoding::{NON_ALPHANUMERIC, percent_encode};\n        let mut parts: Vec<String> = Vec::new();\n        let encoded_html = percent_encode(&compressed_html, NON_ALPHANUMERIC).to_string();\n        parts.push(format!(\"htmlBody={}\", encoded_html));\n        for (key, value) in &form {\n            parts.push(format!(\n                \"{}={}\",\n                percent_encode(key.as_bytes(), NON_ALPHANUMERIC),\n                percent_encode(value.as_bytes(), NON_ALPHANUMERIC)\n            ));\n        }\n        let body = parts.join(\"&\");\n\n        let response = client\n            .post(&self.endpoint)\n            .header(\"Content-Type\", \"application/x-www-form-urlencoded\")\n            .body(body)\n            .send()\n            .map_err(|e| CrawlerError::Export(format!(\"Upload request failed: {}\", e)))?;\n\n        let status_code = response.status();\n        let body = response.text().unwrap_or_default();\n\n        // Try to parse JSON response\n        if let Ok(json) = serde_json::from_str::<serde_json::Value>(&body) {\n            if let Some(url) = json.get(\"url\").and_then(|v| v.as_str()) {\n                return Ok(url.to_string());\n            }\n            if let Some(error) = json.get(\"error\").and_then(|v| v.as_str()) {\n                return Err(CrawlerError::Export(format!(\n                    \"Upload failed: {} ({})\",\n                    error, status_code\n                )));\n            }\n        }\n\n        Err(CrawlerError::Export(format!(\n            \"Upload failed: unknown error ({})\",\n            status_code\n        )))\n    }\n}\n\nimpl Exporter for UploadExporter {\n    fn get_name(&self) -> &str {\n        \"UploadExporter\"\n    }\n\n    fn should_be_activated(&self) -> bool {\n        self.upload_enabled\n    }\n\n    fn export(&mut self, status: &Status, _output: &dyn Output) -> CrawlerResult<()> {\n        let html = match &self.html_report_content {\n            Some(c) => c.clone(),\n            None => {\n                return Err(CrawlerError::Export(\n                    \"HTML report content not available. Set it via set_html_report_content() before export.\"\n                        .to_string(),\n                ));\n            }\n        };\n\n        let start = Instant::now();\n        match self.upload(&html) {\n            Ok(online_url) => {\n                let elapsed = start.elapsed().as_secs_f64();\n                status.add_info_to_summary(\n                    \"upload-done\",\n                    &format!(\n                        \"HTML report uploaded to '{}' and took {}\",\n                        online_url,\n                        utils::get_formatted_duration(elapsed)\n                    ),\n                );\n            }\n            Err(e) => {\n                let elapsed = start.elapsed().as_secs_f64();\n                status.add_critical_to_summary(\n                    \"upload-failed\",\n                    &format!(\n                        \"HTML report upload failed: {} and took {}\",\n                        e,\n                        utils::get_formatted_duration(elapsed)\n                    ),\n                );\n            }\n        }\n\n        Ok(())\n    }\n}\n\n/// Detect system architecture.\nfn get_arch() -> String {\n    match std::env::consts::ARCH {\n        \"x86_64\" => \"x64\".to_string(),\n        \"aarch64\" => \"arm64\".to_string(),\n        other => other.to_string(),\n    }\n}\n"
  },
  {
    "path": "src/export/utils/html_to_markdown.rs",
    "content": "// SiteOne Crawler - HtmlToMarkdownConverter\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Converts HTML to Markdown format using the scraper crate for HTML parsing.\n\nuse std::collections::HashMap;\n\nuse ego_tree::NodeRef;\nuse once_cell::sync::Lazy;\nuse regex::Regex;\nuse scraper::{Html, Node, Selector};\n\nstatic RE_NON_ALNUM: Lazy<Regex> = Lazy::new(|| Regex::new(r\"[^a-z0-9]+\").unwrap());\n\n/// Converts HTML content to Markdown format.\n/// Handles all HTML elements: headings, paragraphs, bold/italic, links, images,\n/// lists, tables, blockquotes, code blocks, horizontal rules, etc.\npub struct HtmlToMarkdownConverter {\n    html: String,\n    excluded_selectors: Vec<String>,\n    implicit_excluded_selectors: Vec<String>,\n    strong_delimiter: String,\n    em_delimiter: String,\n    bullet_list_marker: String,\n    code_block_fence: String,\n    horizontal_rule: String,\n    heading_style: HeadingStyle,\n    escape_mode: bool,\n    include_images: bool,\n    convert_tables: bool,\n    convert_strikethrough: bool,\n    strikethrough_delimiter: String,\n}\n\n#[derive(Debug, Clone, Copy, PartialEq)]\npub enum HeadingStyle {\n    Atx,\n    Setext,\n}\n\nimpl HtmlToMarkdownConverter {\n    pub fn new(html: &str, excluded_selectors: Vec<String>) -> Self {\n        Self {\n            html: html.to_string(),\n            excluded_selectors,\n            implicit_excluded_selectors: vec![\n                // Hidden elements\n                \".hidden\".to_string(),\n                \".hide\".to_string(),\n                \".invisible\".to_string(),\n                \".lg\\\\:sl-hidden\".to_string(),\n                \".md\\\\:sl-hidden\".to_string(),\n                \".lg\\\\:hidden\".to_string(),\n                \".md\\\\:hidden\".to_string(),\n                // ARIA hidden and menu elements\n                \"[aria-hidden='true']\".to_string(),\n                \"[role='menu']\".to_string(),\n                // Cookie consent banners\n                \".cookie-panel\".to_string(),\n                \".cookie-banner\".to_string(),\n                \".cookie-consent\".to_string(),\n                \".cookie-notice\".to_string(),\n                \".cookie-bar\".to_string(),\n                \"#cookie-banner\".to_string(),\n                \"#cookie-consent\".to_string(),\n                \"#cookie-notice\".to_string(),\n                \"#cookiebanner\".to_string(),\n                \"#CybotCookiebotDialog\".to_string(),\n                \".cc-window\".to_string(),\n                \"#onetrust-banner-sdk\".to_string(),\n            ],\n            strong_delimiter: \"**\".to_string(),\n            em_delimiter: \"*\".to_string(),\n            bullet_list_marker: \"-\".to_string(),\n            code_block_fence: \"```\".to_string(),\n            horizontal_rule: \"* * *\".to_string(),\n            heading_style: HeadingStyle::Atx,\n            escape_mode: true,\n            include_images: true,\n            convert_tables: true,\n            convert_strikethrough: true,\n            strikethrough_delimiter: \"~~\".to_string(),\n        }\n    }\n\n    pub fn set_strong_delimiter(&mut self, delimiter: &str) -> &mut Self {\n        self.strong_delimiter = delimiter.to_string();\n        self\n    }\n\n    pub fn set_em_delimiter(&mut self, delimiter: &str) -> &mut Self {\n        self.em_delimiter = delimiter.to_string();\n        self\n    }\n\n    pub fn set_bullet_list_marker(&mut self, marker: &str) -> &mut Self {\n        if [\"-\", \"*\", \"+\"].contains(&marker) {\n            self.bullet_list_marker = marker.to_string();\n        }\n        self\n    }\n\n    pub fn set_code_block_fence(&mut self, fence: &str) -> &mut Self {\n        if fence.len() >= 3 && fence.starts_with('`') {\n            self.code_block_fence = fence.to_string();\n        }\n        self\n    }\n\n    pub fn set_horizontal_rule(&mut self, rule: &str) -> &mut Self {\n        self.horizontal_rule = rule.to_string();\n        self\n    }\n\n    pub fn set_heading_style(&mut self, style: HeadingStyle) -> &mut Self {\n        self.heading_style = style;\n        self\n    }\n\n    pub fn set_escape_mode(&mut self, enable: bool) -> &mut Self {\n        self.escape_mode = enable;\n        self\n    }\n\n    pub fn set_include_images(&mut self, include: bool) -> &mut Self {\n        self.include_images = include;\n        self\n    }\n\n    pub fn set_convert_tables(&mut self, convert: bool) -> &mut Self {\n        self.convert_tables = convert;\n        self\n    }\n\n    pub fn set_convert_strikethrough(&mut self, convert: bool) -> &mut Self {\n        self.convert_strikethrough = convert;\n        self\n    }\n\n    pub fn set_strikethrough_delimiter(&mut self, delimiter: &str) -> &mut Self {\n        self.strikethrough_delimiter = delimiter.to_string();\n        self\n    }\n\n    /// Convert the HTML to Markdown.\n    pub fn get_markdown(&self) -> String {\n        let document = Html::parse_document(&self.html);\n\n        // Remove excluded selectors from the document - we'll skip these during conversion\n        let excluded_ids = self.collect_excluded_node_ids(&document);\n\n        // Try to get the body element first, fallback to documentElement\n        let body_selector = Selector::parse(\"body\").unwrap_or_else(|_| Selector::parse(\"*\").unwrap());\n        let start_node = document\n            .select(&body_selector)\n            .next()\n            .map(|el| el.id())\n            .unwrap_or_else(|| document.root_element().id());\n\n        let node_ref = document.tree.get(start_node);\n        let raw_markdown = match node_ref {\n            Some(node) => self.convert_node(&node, &document, &excluded_ids),\n            None => return String::new(),\n        };\n\n        let normalized = self.normalize_whitespace(&raw_markdown);\n\n        // Deduplication logic\n        let blocks: Vec<&str> = normalized.split(\"\\n\\n\").collect();\n        if blocks.len() <= 1 {\n            let result = normalized.trim().to_string();\n            return self.post_process(&result);\n        }\n\n        let mut fingerprints: HashMap<String, (String, usize)> = HashMap::new();\n        let mut unique_blocks: Vec<(usize, String)> = Vec::new();\n\n        for (index, original_block) in blocks.iter().enumerate() {\n            let trimmed = original_block.trim();\n\n            if trimmed.is_empty() {\n                unique_blocks.push((index, original_block.to_string()));\n                continue;\n            }\n\n            // Create fingerprint: lowercase alphanumeric only\n            let fingerprint = RE_NON_ALNUM.replace_all(&trimmed.to_lowercase(), \"\").to_string();\n\n            if fingerprint.is_empty() {\n                unique_blocks.push((index, original_block.to_string()));\n                continue;\n            }\n\n            if let Some((existing_block, existing_index)) = fingerprints.get(&fingerprint) {\n                // Duplicate found - keep the longer one\n                if trimmed.len() > existing_block.trim().len() {\n                    // Remove the shorter one\n                    unique_blocks.retain(|(idx, _)| *idx != *existing_index);\n                    unique_blocks.push((index, original_block.to_string()));\n                    fingerprints.insert(fingerprint, (original_block.to_string(), index));\n                }\n                // else: existing is longer or equal, discard current\n            } else {\n                fingerprints.insert(fingerprint, (original_block.to_string(), index));\n                unique_blocks.push((index, original_block.to_string()));\n            }\n        }\n\n        // Sort by original index to preserve order\n        unique_blocks.sort_by_key(|(idx, _)| *idx);\n\n        let final_markdown: String = unique_blocks\n            .into_iter()\n            .map(|(_, block)| block)\n            .collect::<Vec<_>>()\n            .join(\"\\n\\n\");\n\n        self.post_process(&final_markdown)\n    }\n\n    fn post_process(&self, markdown: &str) -> String {\n        // Replace backslashes with actual characters\n        let result = Regex::new(r\"\\\\([.\\-])\")\n            .map(|re| re.replace_all(markdown, \"$1\").to_string())\n            .unwrap_or_else(|_| markdown.to_string());\n\n        result.trim().to_string()\n    }\n\n    /// Minimum number of links in a list block to trigger collapsing into accordion.\n    pub const MIN_LINKS_FOR_COLLAPSE: usize = 8;\n\n    /// Collapse large link lists into `<details>` accordions.\n    /// First collapsed list on the page gets \"Menu\" label, subsequent get \"Links\".\n    pub fn collapse_large_link_lists(markdown: &str) -> String {\n        let lines: Vec<&str> = markdown.lines().collect();\n        let len = lines.len();\n        let mut result_lines: Vec<String> = Vec::with_capacity(len);\n        let mut is_first_collapse = true;\n        let mut i = 0;\n\n        while i < len {\n            // Check if this line starts a list block\n            if Self::is_list_item(lines[i]) {\n                let block_start = i;\n\n                // Consume the entire list block\n                while i < len {\n                    if Self::is_list_item(lines[i]) || Self::is_list_continuation(lines[i]) {\n                        i += 1;\n                    } else if lines[i].trim().is_empty() {\n                        // Blank line — check if the list continues after it\n                        let mut next_non_blank = i + 1;\n                        while next_non_blank < len && lines[next_non_blank].trim().is_empty() {\n                            next_non_blank += 1;\n                        }\n                        if next_non_blank < len && Self::is_list_item(lines[next_non_blank]) {\n                            // List continues after blank line(s)\n                            i = next_non_blank;\n                        } else {\n                            break;\n                        }\n                    } else {\n                        break;\n                    }\n                }\n\n                let block_end = i;\n                let block_lines = &lines[block_start..block_end];\n\n                // Count lines containing markdown links\n                let link_count = block_lines.iter().filter(|line| line.contains(\"](\")).count();\n\n                if link_count > Self::MIN_LINKS_FOR_COLLAPSE {\n                    let label = if is_first_collapse { \"Menu\" } else { \"Links\" };\n                    is_first_collapse = false;\n                    result_lines.push(\"<details>\".to_string());\n                    result_lines.push(format!(\"<summary>{}</summary>\", label));\n                    result_lines.push(String::new());\n                    for line in block_lines {\n                        result_lines.push(line.to_string());\n                    }\n                    result_lines.push(String::new());\n                    result_lines.push(\"</details>\".to_string());\n                    result_lines.push(String::new()); // blank line required so next Markdown (e.g. heading) isn't swallowed into the HTML block\n                } else {\n                    for line in block_lines {\n                        result_lines.push(line.to_string());\n                    }\n                }\n            } else {\n                result_lines.push(lines[i].to_string());\n                i += 1;\n            }\n        }\n\n        result_lines.join(\"\\n\")\n    }\n\n    /// Check if a line is a list item (starts with `- `, `* `, `+ `, or numbered `1. `).\n    fn is_list_item(line: &str) -> bool {\n        let trimmed = line.trim_start();\n        trimmed.starts_with(\"- \")\n            || trimmed.starts_with(\"* \")\n            || trimmed.starts_with(\"+ \")\n            || trimmed.bytes().next().is_some_and(|b| b.is_ascii_digit()) && trimmed.contains(\". \")\n    }\n\n    /// Check if a line is a continuation of a list item (indented text that's not a new item).\n    fn is_list_continuation(line: &str) -> bool {\n        let trimmed = line.trim_start();\n        // Indented non-empty line that's not a list item itself\n        line.len() > trimmed.len() && !trimmed.is_empty()\n    }\n\n    /// Collect node IDs of elements matching excluded selectors\n    fn collect_excluded_node_ids(&self, document: &Html) -> Vec<ego_tree::NodeId> {\n        let mut excluded = Vec::new();\n        let all_selectors: Vec<&str> = self\n            .excluded_selectors\n            .iter()\n            .chain(self.implicit_excluded_selectors.iter())\n            .map(|s| s.as_str())\n            .collect();\n\n        for selector_str in all_selectors {\n            if let Ok(selector) = Selector::parse(selector_str) {\n                for element in document.select(&selector) {\n                    excluded.push(element.id());\n                    // Also exclude all descendants\n                    for descendant in element.descendants() {\n                        excluded.push(descendant.id());\n                    }\n                }\n            }\n        }\n\n        // Also collect unwanted tags: script, style, noscript, head, meta, link, iframe, frame\n        for tag in &[\"script\", \"style\", \"noscript\", \"head\", \"meta\", \"link\", \"iframe\", \"frame\"] {\n            if let Ok(selector) = Selector::parse(tag) {\n                for element in document.select(&selector) {\n                    excluded.push(element.id());\n                    for descendant in element.descendants() {\n                        excluded.push(descendant.id());\n                    }\n                }\n            }\n        }\n\n        excluded\n    }\n\n    /// Convert a DOM node to Markdown.\n    fn convert_node(&self, node: &NodeRef<Node>, document: &Html, excluded: &[ego_tree::NodeId]) -> String {\n        if excluded.contains(&node.id()) {\n            return String::new();\n        }\n\n        match node.value() {\n            Node::Text(text) => {\n                let text_content = text.text.to_string();\n                // Check parent context\n                if let Some(parent) = node.parent()\n                    && let Node::Element(el) = parent.value()\n                {\n                    let tag = el.name.local.as_ref();\n                    if tag == \"code\" || tag == \"pre\" {\n                        return text_content;\n                    }\n                }\n                self.escape_markdown_chars(&text_content)\n            }\n            Node::Element(el) => {\n                let tag = el.name.local.as_ref().to_lowercase();\n                match tag.as_str() {\n                    \"strong\" | \"b\" => {\n                        let inner = self.collapse_inline_whitespace(&self.get_inner_markdown(node, document, excluded));\n                        self.wrap_with_delimiter(&inner, &self.strong_delimiter)\n                    }\n                    \"em\" | \"i\" => {\n                        let inner = self.collapse_inline_whitespace(&self.get_inner_markdown(node, document, excluded));\n                        self.wrap_with_delimiter(&inner, &self.em_delimiter)\n                    }\n                    \"h1\" | \"h2\" | \"h3\" | \"h4\" | \"h5\" | \"h6\" => self.convert_heading(node, document, excluded),\n                    \"p\" => {\n                        let inner = self.get_inner_markdown(node, document, excluded).trim().to_string();\n                        if inner.is_empty() {\n                            String::new()\n                        } else {\n                            format!(\"\\n\\n{}\\n\\n\", inner)\n                        }\n                    }\n                    \"br\" => \"  \\n\".to_string(),\n                    \"hr\" => format!(\"\\n\\n{}\\n\\n\", self.horizontal_rule),\n                    \"a\" => self.convert_link(node, document, excluded),\n                    \"img\" => self.convert_image(node),\n                    \"code\" => self.convert_inline_code(node),\n                    \"pre\" => self.convert_code_block(node, document),\n                    \"ul\" | \"ol\" => self.convert_list_to_markdown(node, document, excluded),\n                    \"blockquote\" => self.convert_blockquote(node, document, excluded),\n                    \"table\" => self.convert_table(node, document, excluded),\n                    \"s\" | \"del\" | \"strike\" => {\n                        if !self.convert_strikethrough {\n                            return self.get_inner_markdown(node, document, excluded);\n                        }\n                        let inner = self.collapse_inline_whitespace(&self.get_inner_markdown(node, document, excluded));\n                        self.wrap_with_delimiter(&inner, &self.strikethrough_delimiter)\n                    }\n                    \"dl\" => self.convert_definition_list(node, document, excluded),\n                    \"dt\" | \"dd\" => self.get_inner_markdown(node, document, excluded),\n                    \"sup\" => {\n                        let inner = self.collapse_inline_whitespace(&self.get_inner_markdown(node, document, excluded));\n                        format!(\"^{}^\", inner)\n                    }\n                    \"sub\" => {\n                        let inner = self.collapse_inline_whitespace(&self.get_inner_markdown(node, document, excluded));\n                        format!(\"~{}~\", inner)\n                    }\n                    // Ignored form/non-content elements\n                    \"form\" | \"fieldset\" | \"legend\" | \"label\" | \"dialog\" | \"button\" | \"input\" | \"select\"\n                    | \"textarea\" | \"script\" | \"style\" | \"noscript\" | \"head\" | \"meta\" | \"link\" | \"iframe\" | \"frame\" => {\n                        String::new()\n                    }\n                    // Block container elements - wrap with newlines to prevent text concatenation\n                    \"nav\" | \"header\" | \"footer\" | \"aside\" | \"article\" | \"section\" | \"main\" | \"figure\"\n                    | \"figcaption\" | \"div\" => {\n                        let inner = self.get_inner_markdown(node, document, excluded);\n                        let trimmed = inner.trim();\n                        if trimmed.is_empty() {\n                            String::new()\n                        } else {\n                            format!(\"\\n\\n{}\\n\\n\", trimmed)\n                        }\n                    }\n                    // Inline container elements\n                    \"span\" => self.get_inner_markdown(node, document, excluded),\n                    _ => self.get_inner_markdown(node, document, excluded),\n                }\n            }\n            Node::Comment(_) => String::new(),\n            _ => String::new(),\n        }\n    }\n\n    /// Get inner markdown by processing all children of a node.\n    fn get_inner_markdown(&self, node: &NodeRef<Node>, document: &Html, excluded: &[ego_tree::NodeId]) -> String {\n        let mut markdown = String::new();\n        let mut consecutive_links: Vec<ego_tree::NodeId> = Vec::new();\n\n        for child in node.children() {\n            if excluded.contains(&child.id()) {\n                continue;\n            }\n\n            let is_valid_link = self.is_valid_link_node(&child);\n\n            if is_valid_link {\n                consecutive_links.push(child.id());\n            } else if matches!(child.value(), Node::Text(t) if t.text.trim().is_empty())\n                && !consecutive_links.is_empty()\n            {\n                // Ignore whitespace between links\n                continue;\n            } else {\n                // Process collected links\n                if consecutive_links.len() >= 2 {\n                    markdown.push_str(&self.convert_consecutive_links_to_table(&consecutive_links, document, excluded));\n                } else if consecutive_links.len() == 1\n                    && let Some(link_node) = document.tree.get(consecutive_links[0])\n                {\n                    markdown.push_str(&self.convert_link(&link_node, document, excluded));\n                }\n                consecutive_links.clear();\n\n                markdown.push_str(&self.convert_node(&child, document, excluded));\n            }\n        }\n\n        // Process remaining links\n        if consecutive_links.len() >= 2 {\n            markdown.push_str(&self.convert_consecutive_links_to_table(&consecutive_links, document, excluded));\n        } else if consecutive_links.len() == 1\n            && let Some(link_node) = document.tree.get(consecutive_links[0])\n        {\n            markdown.push_str(&self.convert_link(&link_node, document, excluded));\n        }\n\n        markdown\n    }\n\n    /// Check if a node is a valid link for consecutive link detection.\n    fn is_valid_link_node(&self, node: &NodeRef<Node>) -> bool {\n        if let Node::Element(el) = node.value() {\n            if el.name.local.as_ref() != \"a\" {\n                return false;\n            }\n            let href = el.attr(\"href\");\n            if href.map(|v| v.is_empty()).unwrap_or(true) {\n                return false;\n            }\n            // Must have text content or image child\n            let text_content = self.extract_text_content(node).trim().to_string();\n            let has_image = node\n                .descendants()\n                .any(|d| matches!(d.value(), Node::Element(e) if e.name.local.as_ref() == \"img\"));\n            !text_content.is_empty() || has_image\n        } else {\n            false\n        }\n    }\n\n    /// Extract plain text content from a node recursively.\n    fn extract_text_content(&self, node: &NodeRef<Node>) -> String {\n        let mut text = String::new();\n        for child in node.descendants() {\n            if let Node::Text(t) = child.value() {\n                text.push_str(&t.text);\n            }\n        }\n        text\n    }\n\n    /// Collapse multiple whitespace characters into a single space.\n    fn collapse_inline_whitespace(&self, text: &str) -> String {\n        let text = text.replace(\"&nbsp;\", \" \").replace('\\u{00A0}', \" \");\n        Regex::new(r\"\\s+\")\n            .map(|re| re.replace_all(&text, \" \").trim().to_string())\n            .unwrap_or_else(|_| text.trim().to_string())\n    }\n\n    /// Convert heading element to Markdown.\n    fn convert_heading(&self, node: &NodeRef<Node>, document: &Html, excluded: &[ego_tree::NodeId]) -> String {\n        if let Node::Element(el) = node.value() {\n            let tag = el.name.local.as_ref();\n            let level: usize = tag[1..].parse().unwrap_or(1);\n            let content = self.collapse_inline_whitespace(&self.get_inner_markdown(node, document, excluded));\n            // Remove markdown characters that might interfere inside headings\n            let content = content.replace(['#', '*', '_', '`', '[', ']'], \"\");\n            let content = content.trim().to_string();\n\n            if content.is_empty() {\n                return String::new();\n            }\n\n            if self.heading_style == HeadingStyle::Setext && level <= 2 {\n                let underline_char = if level == 1 { '=' } else { '-' };\n                let underline = underline_char.to_string().repeat(content.chars().count());\n                format!(\"\\n\\n{}\\n{}\\n\\n\", content, underline)\n            } else {\n                let prefix = \"#\".repeat(level);\n                format!(\"\\n\\n{} {}\\n\\n\", prefix, content)\n            }\n        } else {\n            String::new()\n        }\n    }\n\n    /// Convert link element to Markdown.\n    fn convert_link(&self, node: &NodeRef<Node>, document: &Html, excluded: &[ego_tree::NodeId]) -> String {\n        if let Node::Element(el) = node.value() {\n            let href = el.attr(\"href\").unwrap_or(\"\").to_string();\n\n            if href.is_empty() {\n                return self.get_inner_markdown(node, document, excluded);\n            }\n\n            let text = self.collapse_inline_whitespace(&self.get_inner_markdown(node, document, excluded));\n\n            let text = if !text.is_empty() {\n                text\n            } else if let Some(aria_label) = el.attr(\"aria-label\") {\n                let label = aria_label.trim().to_string();\n                if label.is_empty() { href.clone() } else { label }\n            } else {\n                href.clone()\n            };\n\n            let title = el.attr(\"title\").unwrap_or(\"\").to_string();\n\n            let mut markdown = format!(\"[{}]({}\", text, href);\n            if !title.is_empty() {\n                markdown.push_str(&format!(\" \\\"{}\\\"\", self.escape_markdown_chars(&title)));\n            }\n            markdown.push(')');\n\n            markdown\n        } else {\n            String::new()\n        }\n    }\n\n    /// Convert image element to Markdown.\n    fn convert_image(&self, node: &NodeRef<Node>) -> String {\n        if let Node::Element(el) = node.value() {\n            if !self.include_images {\n                let alt = el.attr(\"alt\").unwrap_or(\"\").to_string();\n                return if alt.is_empty() {\n                    String::new()\n                } else {\n                    self.escape_markdown_chars(&alt)\n                };\n            }\n\n            let alt = self.collapse_inline_whitespace(el.attr(\"alt\").unwrap_or(\"\"));\n            let src = el.attr(\"src\").unwrap_or(\"\").to_string();\n            let title = el.attr(\"title\").unwrap_or(\"\").to_string();\n\n            if src.is_empty() {\n                return String::new();\n            }\n\n            let title = self.escape_markdown_chars(&title);\n\n            let mut markdown = format!(\"![{}]({}\", alt, src);\n            if !title.is_empty() {\n                markdown.push_str(&format!(\" \\\"{}\\\"\", title));\n            }\n            markdown.push(')');\n\n            format!(\"\\n\\n{}\\n\\n\", markdown)\n        } else {\n            String::new()\n        }\n    }\n\n    /// Convert inline code element to Markdown.\n    fn convert_inline_code(&self, node: &NodeRef<Node>) -> String {\n        let code = self.extract_text_content(node);\n        let trimmed_code = code.trim();\n\n        // Determine required backticks\n        let mut max_backticks = 0usize;\n        let mut current_count = 0usize;\n        for ch in code.chars() {\n            if ch == '`' {\n                current_count += 1;\n                max_backticks = max_backticks.max(current_count);\n            } else {\n                current_count = 0;\n            }\n        }\n        let fence = \"`\".repeat(max_backticks + 1);\n\n        let prefix_space = if trimmed_code.starts_with('`') { \" \" } else { \"\" };\n        let suffix_space = if trimmed_code.ends_with('`') { \" \" } else { \"\" };\n\n        format!(\"{}{}{}{}{}\", fence, prefix_space, trimmed_code, suffix_space, fence)\n    }\n\n    /// Convert pre/code block to Markdown.\n    fn convert_code_block(&self, node: &NodeRef<Node>, _document: &Html) -> String {\n        // Find inner <code> element if present\n        let code_text = node\n            .descendants()\n            .find(|d| matches!(d.value(), Node::Element(e) if e.name.local.as_ref() == \"code\"))\n            .map(|code_node| self.extract_text_content(&code_node))\n            .unwrap_or_else(|| self.extract_text_content(node));\n\n        let code = code_text.trim_matches(|c: char| c == '\\n' || c == '\\r');\n\n        // Replace '\\' followed by multiple spaces with '\\' + newline + spaces\n        let code = Regex::new(r\"(\\\\)(\\s{2,})\")\n            .map(|re| re.replace_all(code, \"$1\\n$2\").to_string())\n            .unwrap_or_else(|_| code.to_string());\n\n        // Detect language from class attribute\n        let mut language = String::new();\n\n        // Check class on <pre> or inner <code>\n        let class_attr = if let Node::Element(el) = node.value() {\n            el.attr(\"class\").map(|v| v.to_string())\n        } else {\n            None\n        };\n\n        let class_to_check = class_attr.or_else(|| {\n            node.descendants()\n                .find(|d| matches!(d.value(), Node::Element(e) if e.name.local.as_ref() == \"code\"))\n                .and_then(|code_node| {\n                    if let Node::Element(el) = code_node.value() {\n                        el.attr(\"class\").map(|v| v.to_string())\n                    } else {\n                        None\n                    }\n                })\n        });\n\n        if let Some(class_val) = class_to_check {\n            for class in class_val.split_whitespace() {\n                if let Some(lang) = class.strip_prefix(\"language-\") {\n                    language = lang.to_string();\n                    break;\n                } else if let Some(lang) = class.strip_prefix(\"lang-\") {\n                    language = lang.to_string();\n                    break;\n                }\n            }\n        }\n\n        // Clean language identifier\n        language = language.replace(|c: char| c.is_whitespace() || c == '`', \"\");\n\n        format!(\n            \"\\n\\n{}{}\\n{}\\n{}\\n\\n\",\n            self.code_block_fence, language, code, self.code_block_fence\n        )\n    }\n\n    /// Convert blockquote element to Markdown.\n    fn convert_blockquote(&self, node: &NodeRef<Node>, document: &Html, excluded: &[ego_tree::NodeId]) -> String {\n        let content = self.get_inner_markdown(node, document, excluded);\n        let content = content.trim();\n        if content.is_empty() {\n            return String::new();\n        }\n\n        let mut markdown = String::new();\n        for line in content.lines() {\n            markdown.push_str(&format!(\"> {}\\n\", line));\n        }\n\n        format!(\"\\n\\n{}\\n\\n\", markdown.trim_end())\n    }\n\n    /// Convert table element to Markdown.\n    fn convert_table(&self, node: &NodeRef<Node>, document: &Html, excluded: &[ego_tree::NodeId]) -> String {\n        if !self.convert_tables {\n            // Return clean HTML table\n            return format!(\"\\n\\n{}\\n\\n\", self.extract_text_content(node).trim());\n        }\n\n        let mut rows: Vec<Vec<String>> = Vec::new();\n        let mut header_cells: Vec<String> = Vec::new();\n        let mut max_col_lengths: Vec<usize> = Vec::new();\n        let mut has_header = false;\n\n        // Process thead\n        for child in node.children() {\n            if let Node::Element(el) = child.value() {\n                let tag = el.name.local.as_ref();\n                if tag == \"thead\" {\n                    has_header = true;\n                    // Find tr in thead\n                    for thead_child in child.children() {\n                        if let Node::Element(tr_el) = thead_child.value()\n                            && tr_el.name.local.as_ref() == \"tr\"\n                        {\n                            let mut col_index = 0;\n                            for cell_node in thead_child.children() {\n                                if let Node::Element(cell_el) = cell_node.value() {\n                                    let cell_tag = cell_el.name.local.as_ref();\n                                    if cell_tag == \"th\" || cell_tag == \"td\" {\n                                        let content = self.extract_header_content(&cell_node, document, excluded);\n                                        while max_col_lengths.len() <= col_index {\n                                            max_col_lengths.push(0);\n                                        }\n                                        max_col_lengths[col_index] =\n                                            max_col_lengths[col_index].max(content.chars().count());\n                                        header_cells.push(content);\n                                        col_index += 1;\n                                    }\n                                }\n                            }\n                            break; // Only first tr in thead\n                        }\n                    }\n                }\n            }\n        }\n\n        // Process tbody and direct tr children\n        let mut direct_trs: Vec<ego_tree::NodeId> = Vec::new();\n        for child in node.children() {\n            if let Node::Element(el) = child.value() {\n                let tag = el.name.local.as_ref();\n                if tag == \"tbody\" {\n                    for tbody_child in child.children() {\n                        if let Node::Element(tr_el) = tbody_child.value()\n                            && tr_el.name.local.as_ref() == \"tr\"\n                        {\n                            direct_trs.push(tbody_child.id());\n                        }\n                    }\n                } else if tag == \"tr\" && !has_header && direct_trs.is_empty() {\n                    direct_trs.push(child.id());\n                }\n            }\n        }\n\n        // If no tbody, look for direct TR children\n        if direct_trs.is_empty() && !has_header {\n            for child in node.children() {\n                if let Node::Element(el) = child.value()\n                    && el.name.local.as_ref() == \"tr\"\n                {\n                    direct_trs.push(child.id());\n                }\n            }\n        }\n\n        for tr_id in &direct_trs {\n            if let Some(tr_node) = document.tree.get(*tr_id) {\n                // If no header found yet, check if first row has <th>\n                if !has_header && rows.is_empty() {\n                    let mut potential_header: Vec<String> = Vec::new();\n                    let mut is_potential_header = false;\n\n                    for cell_node in tr_node.children() {\n                        if let Node::Element(cell_el) = cell_node.value() {\n                            let cell_tag = cell_el.name.local.as_ref();\n                            if cell_tag == \"th\" || cell_tag == \"td\" {\n                                if cell_tag == \"th\" {\n                                    is_potential_header = true;\n                                }\n                                let content = self.extract_header_content(&cell_node, document, excluded);\n                                let col_index = potential_header.len();\n                                while max_col_lengths.len() <= col_index {\n                                    max_col_lengths.push(0);\n                                }\n                                max_col_lengths[col_index] = max_col_lengths[col_index].max(content.chars().count());\n                                potential_header.push(content);\n                            }\n                        }\n                    }\n\n                    if is_potential_header {\n                        header_cells = potential_header;\n                        has_header = true;\n                        continue;\n                    }\n                }\n\n                // Process as data row\n                let mut row_cells: Vec<String> = Vec::new();\n                for cell_node in tr_node.children() {\n                    if let Node::Element(cell_el) = cell_node.value() {\n                        let cell_tag = cell_el.name.local.as_ref();\n                        if cell_tag == \"th\" || cell_tag == \"td\" {\n                            let content = self\n                                .collapse_inline_whitespace(&self.get_inner_markdown(&cell_node, document, excluded));\n                            let col_index = row_cells.len();\n                            while max_col_lengths.len() <= col_index {\n                                max_col_lengths.push(0);\n                            }\n                            max_col_lengths[col_index] = max_col_lengths[col_index].max(content.chars().count());\n                            row_cells.push(content);\n                        }\n                    }\n                }\n\n                // Pad row if fewer cells than max columns\n                let num_cols = max_col_lengths.len();\n                while row_cells.len() < num_cols {\n                    row_cells.push(String::new());\n                }\n\n                rows.push(row_cells);\n            }\n        }\n\n        if header_cells.is_empty() && rows.is_empty() {\n            return String::new();\n        }\n\n        // Determine number of columns\n        let mut num_cols = header_cells.len();\n        for row in &rows {\n            num_cols = num_cols.max(row.len());\n        }\n\n        // Ensure min length 3 for separator\n        while max_col_lengths.len() < num_cols {\n            max_col_lengths.push(0);\n        }\n        for length in &mut max_col_lengths {\n            *length = (*length).max(3);\n        }\n\n        let mut markdown = \"\\n\\n\".to_string();\n        if !header_cells.is_empty() {\n            while header_cells.len() < num_cols {\n                header_cells.push(String::new());\n            }\n            markdown.push_str(&self.format_table_row(&header_cells, &max_col_lengths));\n            markdown.push_str(&self.format_table_separator(&max_col_lengths));\n        } else {\n            markdown.push_str(&self.format_table_separator(&max_col_lengths));\n        }\n\n        for row in &rows {\n            let mut padded_row = row.clone();\n            while padded_row.len() < num_cols {\n                padded_row.push(String::new());\n            }\n            markdown.push_str(&self.format_table_row(&padded_row, &max_col_lengths));\n        }\n\n        format!(\"{}\\n\\n\", markdown.trim_end())\n    }\n\n    /// Extract header content from a table header cell.\n    fn extract_header_content(&self, cell: &NodeRef<Node>, document: &Html, excluded: &[ego_tree::NodeId]) -> String {\n        let content = self.collapse_inline_whitespace(&self.get_inner_markdown(cell, document, excluded));\n\n        if content.trim().is_empty() {\n            // Fallback: extract text content directly\n            self.collapse_inline_whitespace(&self.extract_text_content(cell))\n        } else {\n            content\n        }\n    }\n\n    /// Convert consecutive links to a table.\n    fn convert_consecutive_links_to_table(\n        &self,\n        link_ids: &[ego_tree::NodeId],\n        document: &Html,\n        excluded: &[ego_tree::NodeId],\n    ) -> String {\n        let mut cells: Vec<String> = Vec::new();\n        let mut max_col_lengths: Vec<usize> = Vec::new();\n\n        for link_id in link_ids {\n            if let Some(link_node) = document.tree.get(*link_id) {\n                let cell_content = self.convert_link(&link_node, document, excluded);\n                if cell_content.is_empty() {\n                    continue;\n                }\n                max_col_lengths.push(cell_content.chars().count().max(3));\n                cells.push(cell_content);\n            }\n        }\n\n        if cells.is_empty() {\n            return String::new();\n        }\n\n        let mut markdown = \"\\n\\n\".to_string();\n        markdown.push_str(&self.format_table_row(&cells, &max_col_lengths));\n\n        format!(\"{}\\n\", markdown)\n    }\n\n    /// Format a table row.\n    fn format_table_row(&self, cells: &[String], max_lengths: &[usize]) -> String {\n        let mut row = \"|\".to_string();\n        for (i, cell) in cells.iter().enumerate() {\n            let max_length = max_lengths.get(i).copied().unwrap_or(cell.chars().count());\n            let padding_len = max_length.saturating_sub(cell.chars().count());\n            let padding = \" \".repeat(padding_len);\n            let escaped = self.escape_markdown_table_cell_content(cell);\n            row.push_str(&format!(\" {}{} |\", escaped, padding));\n        }\n        row.push('\\n');\n        row\n    }\n\n    /// Format a table separator row.\n    fn format_table_separator(&self, max_lengths: &[usize]) -> String {\n        let mut separator = \"|\".to_string();\n        for length in max_lengths {\n            let dash_count = (*length).max(3);\n            separator.push_str(&format!(\" {} |\", \"-\".repeat(dash_count)));\n        }\n        separator.push('\\n');\n        separator\n    }\n\n    /// Wrap text with a delimiter.\n    fn wrap_with_delimiter(&self, text: &str, delimiter: &str) -> String {\n        if text.trim().is_empty() {\n            return text.to_string();\n        }\n        format!(\"{}{}{}\", delimiter, text.trim(), delimiter)\n    }\n\n    /// Escape Markdown special characters.\n    fn escape_markdown_chars(&self, text: &str) -> String {\n        if !self.escape_mode {\n            return text.to_string();\n        }\n        let mut result = text.replace('\\\\', \"\\\\\\\\\");\n        for ch in &[\n            '`', '*', '_', '{', '}', '[', ']', '(', ')', '#', '+', '-', '.', '!', '|',\n        ] {\n            result = result.replace(*ch, &format!(\"\\\\{}\", ch));\n        }\n        result\n    }\n\n    /// Escape pipe character in table cells.\n    fn escape_markdown_table_cell_content(&self, text: &str) -> String {\n        text.replace('|', \"\\\\|\")\n    }\n\n    /// Convert definition list.\n    fn convert_definition_list(&self, node: &NodeRef<Node>, document: &Html, excluded: &[ego_tree::NodeId]) -> String {\n        let mut markdown = String::new();\n        let mut dt_content: Option<String> = None;\n\n        for child in node.children() {\n            if let Node::Element(el) = child.value() {\n                let tag = el.name.local.as_ref();\n                if tag == \"dt\" {\n                    if let Some(ref content) = dt_content {\n                        markdown.push_str(&format!(\"{}\\n\", content));\n                    }\n                    dt_content = Some(self.get_inner_markdown(&child, document, excluded));\n                } else if tag == \"dd\" {\n                    let dd_content = self.get_inner_markdown(&child, document, excluded);\n                    if let Some(ref dt) = dt_content {\n                        markdown.push_str(&format!(\"\\n{}\\n:   {}\\n\", dt, dd_content));\n                        dt_content = None;\n                    } else {\n                        markdown.push_str(&format!(\"\\n:   {}\\n\", dd_content));\n                    }\n                }\n            }\n        }\n\n        if let Some(ref content) = dt_content {\n            markdown.push_str(&format!(\"\\n{}\\n\", content));\n        }\n\n        if markdown.is_empty() {\n            String::new()\n        } else {\n            format!(\"\\n{}\\n\\n\", markdown.trim())\n        }\n    }\n\n    /// Convert list (ul/ol) to Markdown.\n    fn convert_list_to_markdown(&self, node: &NodeRef<Node>, document: &Html, excluded: &[ego_tree::NodeId]) -> String {\n        let list_markdown = self.process_list(node, 0, document, excluded);\n        let trimmed = list_markdown.trim();\n        if trimmed.is_empty() {\n            String::new()\n        } else {\n            format!(\"\\n\\n{}\\n\\n\", trimmed)\n        }\n    }\n\n    /// Recursively process a list element.\n    fn process_list(\n        &self,\n        list_element: &NodeRef<Node>,\n        level: usize,\n        document: &Html,\n        excluded: &[ego_tree::NodeId],\n    ) -> String {\n        let mut markdown = String::new();\n        let is_ordered = matches!(list_element.value(), Node::Element(el)\n            if el.name.local.as_ref() == \"ol\");\n\n        let mut item_counter: usize = 1;\n        if is_ordered\n            && let Node::Element(el) = list_element.value()\n            && let Some(start_val) = el.attr(\"start\")\n            && let Ok(start) = start_val.parse::<usize>()\n            && start > 1\n        {\n            item_counter = start;\n        }\n\n        let indent = \"    \".repeat(level);\n\n        for child in list_element.children() {\n            if excluded.contains(&child.id()) {\n                continue;\n            }\n            if let Node::Element(el) = child.value()\n                && el.name.local.as_ref() == \"li\"\n            {\n                let marker = if is_ordered {\n                    let m = format!(\"{}.\", item_counter);\n                    item_counter += 1;\n                    m\n                } else {\n                    self.bullet_list_marker.clone()\n                };\n\n                let (item_content, nested_list) = self.extract_li_data(&child, level, document, excluded);\n\n                let trimmed_content = item_content.trim();\n                let lines: Vec<&str> = trimmed_content.split('\\n').filter(|s| !s.is_empty()).collect();\n\n                let first_line = lines.first().copied().unwrap_or(\"\");\n                markdown.push_str(&format!(\"{}{} {}\\n\", indent, marker, first_line));\n\n                // Add subsequent lines with proper indentation\n                let subsequent_indent = format!(\"{}{}\", indent, \" \".repeat(marker.len() + 1));\n                for line in lines.iter().skip(1) {\n                    markdown.push_str(&format!(\"{}{}\\n\", subsequent_indent, line));\n                }\n\n                if !nested_list.is_empty() {\n                    markdown.push_str(&nested_list);\n                    markdown.push('\\n');\n                }\n            }\n        }\n\n        markdown\n    }\n\n    /// Extract content and nested list markdown from a <li> element.\n    fn extract_li_data(\n        &self,\n        li_element: &NodeRef<Node>,\n        level: usize,\n        document: &Html,\n        excluded: &[ego_tree::NodeId],\n    ) -> (String, String) {\n        let mut item_content = String::new();\n        let mut nested_list = String::new();\n\n        for child in li_element.children() {\n            if excluded.contains(&child.id()) {\n                continue;\n            }\n            if let Node::Element(el) = child.value() {\n                let tag = el.name.local.as_ref();\n                if tag == \"ul\" || tag == \"ol\" {\n                    nested_list.push('\\n');\n                    nested_list.push_str(&self.process_list(&child, level + 1, document, excluded));\n                } else if tag == \"p\" {\n                    item_content.push_str(self.get_inner_markdown(&child, document, excluded).trim());\n                    item_content.push('\\n');\n                } else {\n                    item_content.push_str(&self.convert_node(&child, document, excluded));\n                }\n            } else {\n                item_content.push_str(&self.convert_node(&child, document, excluded));\n            }\n        }\n\n        let cleaned_item = item_content.trim().to_string();\n        let cleaned_nested = nested_list.trim().to_string();\n\n        let final_nested = if !cleaned_nested.is_empty() && !cleaned_item.is_empty() {\n            format!(\"\\n{}\", cleaned_nested)\n        } else {\n            cleaned_nested\n        };\n\n        (cleaned_item, final_nested)\n    }\n\n    /// Normalize whitespace in converted Markdown.\n    fn normalize_whitespace(&self, text: &str) -> String {\n        // Replace CRLF with LF\n        let text = text.replace(\"\\r\\n\", \"\\n\");\n        // Replace multiple consecutive newlines with max two\n        let text = Regex::new(r\"\\n{3,}\")\n            .map(|re| re.replace_all(&text, \"\\n\\n\").to_string())\n            .unwrap_or(text);\n        // Trim trailing spaces/tabs from each line\n        let text = Regex::new(r\"[ \\t]+$\")\n            .map(|re| {\n                text.lines()\n                    .map(|line| re.replace_all(line, \"\").to_string())\n                    .collect::<Vec<_>>()\n                    .join(\"\\n\")\n            })\n            .unwrap_or(text);\n\n        text.trim().to_string()\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_simple_paragraph() {\n        let converter = HtmlToMarkdownConverter::new(\"<p>Hello world</p>\", vec![]);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"Hello world\"));\n    }\n\n    #[test]\n    fn test_heading_atx() {\n        let mut converter = HtmlToMarkdownConverter::new(\"<h1>Title</h1>\", vec![]);\n        converter.set_heading_style(HeadingStyle::Atx);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"# Title\"));\n    }\n\n    #[test]\n    fn test_heading_setext() {\n        let mut converter = HtmlToMarkdownConverter::new(\"<h1>Title</h1>\", vec![]);\n        converter.set_heading_style(HeadingStyle::Setext);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"Title\\n=====\"));\n    }\n\n    #[test]\n    fn test_bold() {\n        let converter = HtmlToMarkdownConverter::new(\"<strong>bold text</strong>\", vec![]);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"**bold text**\"));\n    }\n\n    #[test]\n    fn test_italic() {\n        let converter = HtmlToMarkdownConverter::new(\"<em>italic text</em>\", vec![]);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"*italic text*\"));\n    }\n\n    #[test]\n    fn test_link() {\n        let converter = HtmlToMarkdownConverter::new(\"<a href=\\\"https://example.com\\\">Example</a>\", vec![]);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"[Example](https://example.com)\"));\n    }\n\n    #[test]\n    fn test_image() {\n        let converter = HtmlToMarkdownConverter::new(\"<img src=\\\"image.jpg\\\" alt=\\\"An image\\\">\", vec![]);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"![An image](image.jpg)\"));\n    }\n\n    #[test]\n    fn test_unordered_list() {\n        let converter = HtmlToMarkdownConverter::new(\"<ul><li>Item 1</li><li>Item 2</li></ul>\", vec![]);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"- Item 1\"));\n        assert!(md.contains(\"- Item 2\"));\n    }\n\n    #[test]\n    fn test_ordered_list() {\n        let converter = HtmlToMarkdownConverter::new(\"<ol><li>First</li><li>Second</li></ol>\", vec![]);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"1. First\"));\n        assert!(md.contains(\"2. Second\"));\n    }\n\n    #[test]\n    fn test_code_block() {\n        let converter =\n            HtmlToMarkdownConverter::new(\"<pre><code class=\\\"language-rust\\\">fn main() {}</code></pre>\", vec![]);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"```rust\"));\n        assert!(md.contains(\"fn main() {}\"));\n        assert!(md.contains(\"```\"));\n    }\n\n    #[test]\n    fn test_inline_code() {\n        let converter = HtmlToMarkdownConverter::new(\"<code>foo</code>\", vec![]);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"`foo`\"));\n    }\n\n    #[test]\n    fn test_blockquote() {\n        let converter = HtmlToMarkdownConverter::new(\"<blockquote>Quoted text</blockquote>\", vec![]);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"> Quoted text\"));\n    }\n\n    #[test]\n    fn test_horizontal_rule() {\n        let converter = HtmlToMarkdownConverter::new(\"<hr>\", vec![]);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"* * *\"));\n    }\n\n    #[test]\n    fn test_table() {\n        let converter = HtmlToMarkdownConverter::new(\n            \"<table><thead><tr><th>Name</th><th>Value</th></tr></thead>\\\n             <tbody><tr><td>A</td><td>1</td></tr></tbody></table>\",\n            vec![],\n        );\n        let md = converter.get_markdown();\n        assert!(md.contains(\"| Name\"));\n        assert!(md.contains(\"| A\"));\n        assert!(md.contains(\"---\"));\n    }\n\n    #[test]\n    fn test_strikethrough() {\n        let converter = HtmlToMarkdownConverter::new(\"<del>deleted text</del>\", vec![]);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"~~deleted text~~\"));\n    }\n\n    #[test]\n    fn test_excluded_selector() {\n        let converter = HtmlToMarkdownConverter::new(\n            \"<div><p>Keep this</p><div class=\\\"hidden\\\">Remove this</div></div>\",\n            vec![],\n        );\n        let md = converter.get_markdown();\n        assert!(md.contains(\"Keep this\"));\n        assert!(!md.contains(\"Remove this\"));\n    }\n\n    #[test]\n    fn test_script_removed() {\n        let converter = HtmlToMarkdownConverter::new(\"<div><p>Content</p><script>alert('test')</script></div>\", vec![]);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"Content\"));\n        assert!(!md.contains(\"alert\"));\n    }\n\n    // --- Tests for aria-hidden and role=menu exclusion ---\n\n    #[test]\n    fn test_aria_hidden_excluded() {\n        let converter = HtmlToMarkdownConverter::new(\n            \"<div><p>Visible</p><div aria-hidden=\\\"true\\\"><p>Hidden mega-menu</p></div></div>\",\n            vec![],\n        );\n        let md = converter.get_markdown();\n        assert!(md.contains(\"Visible\"));\n        assert!(!md.contains(\"Hidden mega-menu\"));\n    }\n\n    #[test]\n    fn test_aria_hidden_children_excluded() {\n        let converter = HtmlToMarkdownConverter::new(\n            \"<div><p>Content</p><nav aria-hidden=\\\"true\\\"><ul><li><a href=\\\"/\\\">Home</a></li><li><a href=\\\"/about\\\">About</a></li></ul></nav></div>\",\n            vec![],\n        );\n        let md = converter.get_markdown();\n        assert!(md.contains(\"Content\"));\n        assert!(!md.contains(\"Home\"));\n        assert!(!md.contains(\"About\"));\n    }\n\n    #[test]\n    fn test_role_menu_excluded() {\n        let converter = HtmlToMarkdownConverter::new(\n            \"<div><p>Page content</p><ul role=\\\"menu\\\"><li>Menu Item 1</li><li>Menu Item 2</li></ul></div>\",\n            vec![],\n        );\n        let md = converter.get_markdown();\n        assert!(md.contains(\"Page content\"));\n        assert!(!md.contains(\"Menu Item\"));\n    }\n\n    // --- Tests for block element spacing ---\n\n    #[test]\n    fn test_adjacent_divs_have_spacing() {\n        let converter = HtmlToMarkdownConverter::new(\"<div>text one</div><div>text two</div>\", vec![]);\n        let md = converter.get_markdown();\n        assert!(\n            !md.contains(\"text onetext two\"),\n            \"Adjacent divs should not concatenate: {}\",\n            md\n        );\n        assert!(md.contains(\"text one\"));\n        assert!(md.contains(\"text two\"));\n    }\n\n    #[test]\n    fn test_adjacent_sections_have_spacing() {\n        let converter = HtmlToMarkdownConverter::new(\n            \"<section><p>First</p></section><section><p>Second</p></section>\",\n            vec![],\n        );\n        let md = converter.get_markdown();\n        assert!(\n            !md.contains(\"FirstSecond\"),\n            \"Adjacent sections should not concatenate: {}\",\n            md\n        );\n    }\n\n    #[test]\n    fn test_span_remains_inline() {\n        let converter = HtmlToMarkdownConverter::new(\"<p>Hello <span>world</span> test</p>\", vec![]);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"Hello world test\"));\n    }\n\n    #[test]\n    fn test_nested_divs_no_excessive_whitespace() {\n        let converter = HtmlToMarkdownConverter::new(\"<div><div><div>deep text</div></div></div>\", vec![]);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"deep text\"));\n        // Should not have more than two consecutive newlines after normalization\n        assert!(\n            !md.contains(\"\\n\\n\\n\"),\n            \"Nested divs should not produce excessive newlines\"\n        );\n    }\n\n    #[test]\n    fn test_empty_div_produces_no_output() {\n        let converter = HtmlToMarkdownConverter::new(\"<p>Before</p><div></div><p>After</p>\", vec![]);\n        let md = converter.get_markdown();\n        assert!(md.contains(\"Before\"));\n        assert!(md.contains(\"After\"));\n    }\n\n    // --- Tests for aria-label fallback on links ---\n\n    #[test]\n    fn test_link_aria_label_fallback() {\n        let converter = HtmlToMarkdownConverter::new(\n            r#\"<a href=\"https://facebook.com/page\" aria-label=\"Facebook\"><svg><path d=\"M0 0\"/></svg></a>\"#,\n            vec![],\n        );\n        let md = converter.get_markdown();\n        assert!(\n            md.contains(\"[Facebook](https://facebook.com/page)\"),\n            \"Should use aria-label: {}\",\n            md\n        );\n    }\n\n    #[test]\n    fn test_link_visible_text_preferred_over_aria_label() {\n        let converter = HtmlToMarkdownConverter::new(\n            r#\"<a href=\"https://example.com\" aria-label=\"Aria Label\">Visible Text</a>\"#,\n            vec![],\n        );\n        let md = converter.get_markdown();\n        assert!(md.contains(\"[Visible Text](https://example.com)\"));\n        assert!(!md.contains(\"Aria Label\"));\n    }\n\n    #[test]\n    fn test_link_url_fallback_without_aria_label() {\n        let converter = HtmlToMarkdownConverter::new(r#\"<a href=\"https://example.com\"><svg></svg></a>\"#, vec![]);\n        let md = converter.get_markdown();\n        assert!(\n            md.contains(\"[https://example.com](https://example.com)\"),\n            \"Should fall back to URL: {}\",\n            md\n        );\n    }\n\n    #[test]\n    fn test_link_empty_aria_label_falls_back_to_url() {\n        let converter = HtmlToMarkdownConverter::new(\n            r#\"<a href=\"https://example.com\" aria-label=\"  \"><svg></svg></a>\"#,\n            vec![],\n        );\n        let md = converter.get_markdown();\n        assert!(\n            md.contains(\"[https://example.com](https://example.com)\"),\n            \"Empty aria-label should fall back to URL: {}\",\n            md\n        );\n    }\n\n    // --- Tests for cookie banner exclusion ---\n\n    #[test]\n    fn test_cookie_banner_excluded() {\n        let converter = HtmlToMarkdownConverter::new(\n            \"<div><p>Content</p><div class=\\\"cookie-banner\\\"><p>We use cookies</p><button>Accept</button></div></div>\",\n            vec![],\n        );\n        let md = converter.get_markdown();\n        assert!(md.contains(\"Content\"));\n        assert!(!md.contains(\"cookies\"));\n    }\n\n    #[test]\n    fn test_onetrust_banner_excluded() {\n        let converter = HtmlToMarkdownConverter::new(\n            \"<div><p>Content</p><div id=\\\"onetrust-banner-sdk\\\"><p>Cookie preferences</p></div></div>\",\n            vec![],\n        );\n        let md = converter.get_markdown();\n        assert!(md.contains(\"Content\"));\n        assert!(!md.contains(\"Cookie preferences\"));\n    }\n}\n"
  },
  {
    "path": "src/export/utils/markdown_site_aggregator.rs",
    "content": "// SiteOne Crawler - MarkdownSiteAggregator\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Combines multiple markdown files into a single file.\n\nuse std::fs;\nuse std::path::Path;\n\nuse regex::Regex;\n\nuse crate::error::{CrawlerError, CrawlerResult};\n\n/// Similarity threshold for common header/footer detection (percentage).\nconst SIMILARITY_THRESHOLD: f64 = 80.0;\n\n/// Combines multiple Markdown files from a directory into a single document.\n/// Detects and extracts common headers/footers, adds page separators and URLs.\npub struct MarkdownSiteAggregator {\n    base_url: String,\n}\n\nimpl MarkdownSiteAggregator {\n    pub fn new(base_url: &str) -> Self {\n        Self {\n            base_url: base_url.trim_end_matches('/').to_string(),\n        }\n    }\n\n    /// Combine all markdown files in a directory into a single document.\n    pub fn combine_directory(&self, directory_path: &str, remove_links_and_images: bool) -> CrawlerResult<String> {\n        let files = self.get_markdown_files(directory_path)?;\n\n        // Load content of all files into a map [url => lines]\n        let mut pages: Vec<(String, Vec<String>)> = Vec::new();\n        for file_path in &files {\n            let url = self.make_url_from_path(file_path, directory_path);\n            let content = fs::read_to_string(file_path)\n                .map_err(|e| CrawlerError::Export(format!(\"Cannot read file '{}': {}\", file_path, e)))?;\n            let lines: Vec<String> = content.trim_end().split('\\n').map(|s| s.to_string()).collect();\n            pages.push((url, lines));\n        }\n\n        // Sort URLs to ensure index pages come first\n        let base_url = self.base_url.clone();\n        pages.sort_by(|(url_a, _), (url_b, _)| {\n            // Root URL should always be first\n            if url_a == &base_url || url_a.is_empty() {\n                return std::cmp::Ordering::Less;\n            }\n            if url_b == &base_url || url_b.is_empty() {\n                return std::cmp::Ordering::Greater;\n            }\n\n            let parts_a: Vec<&str> = url_a.trim_end_matches('/').split('/').collect();\n            let parts_b: Vec<&str> = url_b.trim_end_matches('/').split('/').collect();\n\n            let min_len = parts_a.len().min(parts_b.len());\n            for i in 0..min_len {\n                if parts_a[i] != parts_b[i] {\n                    return parts_a[i].cmp(parts_b[i]);\n                }\n            }\n\n            parts_a.len().cmp(&parts_b.len())\n        });\n\n        // Detect common header and footer\n        let page_lines: Vec<&Vec<String>> = pages.iter().map(|(_, lines)| lines).collect();\n        let header_lines = self.detect_common_header(&page_lines);\n        let footer_lines = self.detect_common_footer(&page_lines);\n\n        // Remove header and footer from individual pages\n        for (_, lines) in &mut pages {\n            if !header_lines.is_empty() {\n                *lines = self.remove_prefix(lines, &header_lines);\n            }\n            if !footer_lines.is_empty() {\n                *lines = self.remove_suffix(lines, &footer_lines);\n            }\n        }\n\n        // Build resulting markdown\n        let mut result_lines: Vec<String> = Vec::new();\n        if !header_lines.is_empty() {\n            result_lines.extend(header_lines.iter().cloned());\n            result_lines.push(String::new());\n        }\n\n        // Add content of all pages with their URLs\n        for (url, lines) in &pages {\n            // Use emoji + URL marker\n            result_lines.push(format!(\"\\u{2B07}\\u{FE0F} `URL: {}`\\n\\n---\\n\\n\", url));\n            for line in lines {\n                result_lines.push(line.clone());\n            }\n            result_lines.push(\"\\n\\n---\\n\".to_string());\n        }\n\n        if !footer_lines.is_empty() {\n            // Remove the last empty line before footer if present\n            if result_lines.last().map(|s| s.is_empty()).unwrap_or(false) {\n                result_lines.pop();\n            }\n            result_lines.push(String::new());\n            result_lines.extend(footer_lines.iter().cloned());\n        }\n\n        let mut final_markdown = result_lines.join(\"\\n\");\n\n        if remove_links_and_images {\n            final_markdown = self.remove_links_and_images(&final_markdown);\n        }\n\n        Ok(final_markdown)\n    }\n\n    /// Get all markdown files in a directory recursively.\n    fn get_markdown_files(&self, dir: &str) -> CrawlerResult<Vec<String>> {\n        let mut paths = Vec::new();\n        self.collect_markdown_files(dir, &mut paths)?;\n        Ok(paths)\n    }\n\n    #[allow(clippy::only_used_in_recursion)]\n    fn collect_markdown_files(&self, dir: &str, paths: &mut Vec<String>) -> CrawlerResult<()> {\n        let dir_path = Path::new(dir);\n        if !dir_path.is_dir() {\n            return Ok(());\n        }\n\n        let entries = fs::read_dir(dir_path)\n            .map_err(|e| CrawlerError::Export(format!(\"Cannot read directory '{}': {}\", dir, e)))?;\n\n        for entry in entries.flatten() {\n            let path = entry.path();\n            if path.is_dir() {\n                self.collect_markdown_files(&path.to_string_lossy(), paths)?;\n            } else if path.is_file()\n                && let Some(ext) = path.extension()\n                && ext.to_str().map(|e| e.to_lowercase()) == Some(\"md\".to_string())\n            {\n                paths.push(path.to_string_lossy().to_string());\n            }\n        }\n\n        Ok(())\n    }\n\n    /// Make URL from file path.\n    fn make_url_from_path(&self, file_path: &str, root_dir: &str) -> String {\n        let root = root_dir.trim_end_matches('/');\n        let rel_path = file_path[root.len()..].trim_start_matches('/').replace('\\\\', \"/\");\n\n        // Remove .md extension\n        let rel_path = if rel_path.ends_with(\".md\") {\n            &rel_path[..rel_path.len() - 3]\n        } else {\n            &rel_path\n        };\n\n        // Replace index at end with /\n        let rel_path = Regex::new(r\"/index$\")\n            .map(|re| re.replace(rel_path, \"/\").to_string())\n            .unwrap_or_else(|_| rel_path.to_string());\n\n        // Handle root index.md\n        if rel_path == \"index\" || rel_path.is_empty() {\n            return if !self.base_url.is_empty() {\n                self.base_url.clone()\n            } else {\n                String::new()\n            };\n        }\n\n        if !self.base_url.is_empty() {\n            format!(\"{}/{}\", self.base_url, rel_path.trim_start_matches('/'))\n        } else {\n            rel_path.to_string()\n        }\n    }\n\n    /// Detect common header across pages.\n    fn detect_common_header(&self, pages: &[&Vec<String>]) -> Vec<String> {\n        if pages.is_empty() {\n            return Vec::new();\n        }\n\n        // Use pages starting from index 2 (skip first 2), take up to 3\n        let sample_start = 2.min(pages.len());\n        let sample_end = (sample_start + 3).min(pages.len());\n        if sample_start >= sample_end {\n            return Vec::new();\n        }\n\n        let sample_pages = &pages[sample_start..sample_end];\n\n        let mut common_header = sample_pages[0].clone();\n        for page in sample_pages.iter().skip(1) {\n            common_header = self.align_common_prefix(&common_header, page);\n            if common_header.is_empty() {\n                break;\n            }\n        }\n\n        common_header\n    }\n\n    /// Detect common footer across pages.\n    fn detect_common_footer(&self, pages: &[&Vec<String>]) -> Vec<String> {\n        if pages.is_empty() {\n            return Vec::new();\n        }\n\n        let sample_start = 2.min(pages.len());\n        let sample_end = (sample_start + 3).min(pages.len());\n        if sample_start >= sample_end {\n            return Vec::new();\n        }\n\n        let sample_pages = &pages[sample_start..sample_end];\n\n        // Reverse the first page\n        let mut common_footer: Vec<String> = sample_pages[0].iter().rev().cloned().collect();\n        for page in sample_pages.iter().skip(1) {\n            let other_rev: Vec<String> = page.iter().rev().cloned().collect();\n            common_footer = self.align_common_prefix(&common_footer, &other_rev);\n            if common_footer.is_empty() {\n                break;\n            }\n        }\n\n        // Reverse back to correct order\n        common_footer.reverse();\n        common_footer\n    }\n\n    /// Align two line arrays and find their common prefix with fuzzy tolerance.\n    fn align_common_prefix(&self, lines_a: &[String], lines_b: &[String]) -> Vec<String> {\n        let mut result = Vec::new();\n        let mut i = 0;\n        let mut j = 0;\n\n        while i < lines_a.len() && j < lines_b.len() {\n            if self.lines_similar(&lines_a[i], &lines_b[j]) {\n                result.push(lines_a[i].clone());\n                i += 1;\n                j += 1;\n            } else {\n                // Try skipping a line in A or B\n                let skip_a = i + 1 < lines_a.len() && self.lines_similar(&lines_a[i + 1], &lines_b[j]);\n                let skip_b = !skip_a && j + 1 < lines_b.len() && self.lines_similar(&lines_a[i], &lines_b[j + 1]);\n\n                if skip_a {\n                    i += 1;\n                } else if skip_b {\n                    j += 1;\n                } else {\n                    break;\n                }\n            }\n        }\n\n        result\n    }\n\n    /// Evaluate similarity of two lines (ignoring markdown formatting).\n    fn lines_similar(&self, a: &str, b: &str) -> bool {\n        let normalize = |s: &str| -> String {\n            let result = Regex::new(r\"[*_]+\")\n                .map(|re| re.replace_all(s, \"\").to_string())\n                .unwrap_or_else(|_| s.to_string());\n            result.trim().to_string()\n        };\n\n        let na = normalize(a);\n        let nb = normalize(b);\n\n        if na == nb {\n            return true;\n        }\n\n        // Calculate similarity percentage\n        let percent = self.similar_text_percent(&na, &nb);\n        percent >= SIMILARITY_THRESHOLD\n    }\n\n    /// Calculate similarity percentage between two strings.\n    fn similar_text_percent(&self, a: &str, b: &str) -> f64 {\n        if a.is_empty() && b.is_empty() {\n            return 100.0;\n        }\n        if a.is_empty() || b.is_empty() {\n            return 0.0;\n        }\n\n        let matching = self.longest_common_substring_len(a, b);\n        let total = (a.len() + b.len()) as f64;\n        (2.0 * matching as f64 / total) * 100.0\n    }\n\n    /// Find the length of the longest common substring.\n    fn longest_common_substring_len(&self, a: &str, b: &str) -> usize {\n        let a_bytes = a.as_bytes();\n        let b_bytes = b.as_bytes();\n        let m = a_bytes.len();\n        let n = b_bytes.len();\n\n        if m == 0 || n == 0 {\n            return 0;\n        }\n\n        let mut max_len = 0;\n        let mut prev = vec![0usize; n + 1];\n        let mut curr = vec![0usize; n + 1];\n\n        for i in 1..=m {\n            for j in 1..=n {\n                if a_bytes[i - 1] == b_bytes[j - 1] {\n                    curr[j] = prev[j - 1] + 1;\n                    max_len = max_len.max(curr[j]);\n                } else {\n                    curr[j] = 0;\n                }\n            }\n            std::mem::swap(&mut prev, &mut curr);\n            curr.fill(0);\n        }\n\n        max_len\n    }\n\n    /// Remove common prefix (header) from a page's lines.\n    fn remove_prefix(&self, lines: &[String], prefix_lines: &[String]) -> Vec<String> {\n        if prefix_lines.is_empty() {\n            return lines.to_vec();\n        }\n        let len = prefix_lines.len();\n        if lines.len() >= len {\n            lines[len..].to_vec()\n        } else {\n            lines.to_vec()\n        }\n    }\n\n    /// Remove common suffix (footer) from a page's lines.\n    fn remove_suffix(&self, lines: &[String], suffix_lines: &[String]) -> Vec<String> {\n        if suffix_lines.is_empty() {\n            return lines.to_vec();\n        }\n        let len = suffix_lines.len();\n        if lines.len() >= len {\n            lines[..lines.len() - len].to_vec()\n        } else {\n            lines.to_vec()\n        }\n    }\n\n    /// Remove links and images from markdown text.\n    fn remove_links_and_images(&self, markdown: &str) -> String {\n        let mut result = markdown.to_string();\n\n        // Remove image in anchor text\n        if let Ok(re) = Regex::new(r\"\\[!\\[[^\\]]*\\]\\([^\\)]*\\)\\]\\([^\\)]*\\)\") {\n            result = re.replace_all(&result, \"\").to_string();\n        }\n\n        // Remove standalone images\n        if let Ok(re) = Regex::new(r#\"!\\[.*?\\]\\([^)]*\\)(\\s*\"[^\"]*\")?\"#) {\n            result = re.replace_all(&result, \"\").to_string();\n        }\n\n        // Replace links in list items\n        if let Ok(re) = Regex::new(r\"(?m)^\\s*(\\*|-|[0-9]+\\.)\\s*\\[([^\\]]+)\\]\\([^)]+\\)\") {\n            result = re.replace_all(&result, \"\").to_string();\n        }\n\n        // Replace empty links\n        if let Ok(re) = Regex::new(r\"\\[\\]\\([^)]+\\)\") {\n            result = re.replace_all(&result, \"\").to_string();\n        }\n\n        // Clean up tables - remove rows with only whitespace and pipes\n        if let Ok(re) = Regex::new(r\"(?m)^\\s*(\\|\\s*)+\\|\\s*$\") {\n            result = re.replace_all(&result, \"\").to_string();\n        }\n\n        // Clean empty list items\n        if let Ok(re) = Regex::new(r\"(?m)^\\s*(\\*|-|[0-9]+\\.)\\s*$\") {\n            result = re.replace_all(&result, \"\").to_string();\n        }\n\n        // Remove multiple consecutive empty lines\n        if let Ok(re) = Regex::new(r\"\\n{3,}\") {\n            result = re.replace_all(&result, \"\\n\\n\").to_string();\n        }\n\n        result\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn test_make_url_from_path() {\n        let aggregator = MarkdownSiteAggregator::new(\"https://example.com\");\n        assert_eq!(\n            aggregator.make_url_from_path(\"/tmp/export/index.md\", \"/tmp/export\"),\n            \"https://example.com\"\n        );\n        assert_eq!(\n            aggregator.make_url_from_path(\"/tmp/export/about.md\", \"/tmp/export\"),\n            \"https://example.com/about\"\n        );\n        assert_eq!(\n            aggregator.make_url_from_path(\"/tmp/export/docs/intro.md\", \"/tmp/export\"),\n            \"https://example.com/docs/intro\"\n        );\n    }\n\n    #[test]\n    fn test_lines_similar() {\n        let aggregator = MarkdownSiteAggregator::new(\"\");\n        assert!(aggregator.lines_similar(\"Hello world\", \"Hello world\"));\n        assert!(aggregator.lines_similar(\"**Hello** world\", \"Hello world\"));\n        assert!(!aggregator.lines_similar(\"Hello\", \"Completely different text\"));\n    }\n\n    #[test]\n    fn test_remove_links_and_images() {\n        let aggregator = MarkdownSiteAggregator::new(\"\");\n        let input = \"Some text ![image](img.jpg) and [link](url)\";\n        let result = aggregator.remove_links_and_images(input);\n        assert!(!result.contains(\"![image]\"));\n    }\n}\n"
  },
  {
    "path": "src/export/utils/mod.rs",
    "content": "// SiteOne Crawler - Export utilities module\n// (c) Jan Reges <jan.reges@siteone.cz>\n\npub mod html_to_markdown;\npub mod markdown_site_aggregator;\npub mod offline_url_converter;\npub mod target_domain_relation;\n"
  },
  {
    "path": "src/export/utils/offline_url_converter.rs",
    "content": "// SiteOne Crawler - OfflineUrlConverter\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Converts absolute URLs to relative paths for offline browsing.\n\nuse std::sync::Mutex;\n\nuse once_cell::sync::Lazy;\nuse regex::Regex;\n\nuse crate::engine::parsed_url::ParsedUrl;\nuse crate::utils;\n\nuse super::target_domain_relation::TargetDomainRelation;\n\n/// Static replace_query_string configuration\nstatic REPLACE_QUERY_STRING: Lazy<Mutex<Vec<String>>> = Lazy::new(|| Mutex::new(Vec::new()));\n\n/// Static lowercase configuration for offline export\nstatic LOWERCASE: Lazy<Mutex<bool>> = Lazy::new(|| Mutex::new(false));\n\n/// Regex for removing file extension from path\nstatic STRIP_EXT_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)\\.[a-z0-9]{1,10}$\").unwrap());\n\n/// Regex for removing domain from path\nstatic DOMAIN_IN_PATH_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)^(//|https?://)([^/]+)(:[0-9]+)?\").unwrap());\n\n/// Static files extensions regex pattern\nstatic STATIC_FILES_EXTENSIONS: &str = \"jpg|jpeg|png|gif|webp|svg|ico|js|css|txt|woff2|woff|ttf|eot|mp4|webm|ogg|mp3|wav|flac|pdf|doc\\\n     |docx|xls|xlsx|ppt|pptx|zip|rar|gz|bz2|7z|tar|xml|json|action|asp|aspx|cfm|cfml|cgi|do|gsp|jsp|jspx|lasso|phtml\\\n     |php|php3|php4|php5|php7|php8|php9|pl|py|rb|rbw|rhtml|shtml|srv|vm|vmdk\";\n\n/// Dynamic page extensions that get .html appended\nstatic DYNAMIC_PAGE_EXTENSIONS: &str = \"action|asp|aspx|cfm|cfml|cgi|do|gsp|jsp|jspx|lasso|phtml|php3|php4|php5|php7|php8|php9|php|pl|py|rb|rbw|rhtml|shtml|srv|vm\";\n\n// Pre-compiled regexes for sanitize_file_path hot path\nstatic RE_PATH_EXTENSION: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)^(.+)\\.([a-z0-9]{1,10})\").unwrap());\nstatic RE_CONTROL_CHARS: Lazy<Regex> = Lazy::new(|| Regex::new(r\"[\\x00-\\x1F\\x7F]\").unwrap());\nstatic RE_WHITESPACE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"\\s+\").unwrap());\nstatic RE_MULTI_UNDERSCORE: Lazy<Regex> = Lazy::new(|| Regex::new(r\"_{2,}\").unwrap());\nstatic RE_FRAGMENT_SUFFIX: Lazy<Regex> = Lazy::new(|| Regex::new(r\"#.+$\").unwrap());\nstatic RE_DOTTED_FOLDER: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)([^/]+)\\.([a-z0-9]+)/\").unwrap());\nstatic RE_DOMAIN_TLD: Lazy<Regex> =\n    Lazy::new(|| Regex::new(r\"(?i)\\.(com|org|net|dev|io|test|local|localhost)$\").unwrap());\nstatic RE_STATIC_EXT_FOLDER: Lazy<Regex> = Lazy::new(|| {\n    let pattern = format!(r\"(?i)([^.]+)\\.({})\\/\", STATIC_FILES_EXTENSIONS);\n    Regex::new(&pattern).unwrap()\n});\nstatic RE_STATIC_EXT_MATCH: Lazy<Regex> = Lazy::new(|| {\n    let pattern = format!(r\"(?i)^({})$\", STATIC_FILES_EXTENSIONS);\n    Regex::new(&pattern).unwrap()\n});\nstatic RE_DYNAMIC_EXT: Lazy<Regex> = Lazy::new(|| {\n    let pattern = format!(r\"(?i)\\.({})$\", DYNAMIC_PAGE_EXTENSIONS);\n    Regex::new(&pattern).unwrap()\n});\n\n/// Converts absolute URLs to relative paths for offline browsing.\npub struct OfflineUrlConverter {\n    initial_url: ParsedUrl,\n    base_url: ParsedUrl,\n    target_url: ParsedUrl,\n    relative_target_url: ParsedUrl,\n    target_url_source_attribute: Option<String>,\n    #[allow(clippy::type_complexity)]\n    callback_is_domain_allowed_for_static_files: Option<Box<dyn Fn(&str) -> bool + Send + Sync>>,\n    #[allow(clippy::type_complexity)]\n    callback_is_external_domain_allowed_for_crawling: Option<Box<dyn Fn(&str) -> bool + Send + Sync>>,\n    target_domain_relation: TargetDomainRelation,\n    preserve_url_structure: bool,\n}\n\nimpl OfflineUrlConverter {\n    #[allow(clippy::type_complexity)]\n    pub fn new(\n        initial_url: ParsedUrl,\n        base_url: ParsedUrl,\n        target_url: ParsedUrl,\n        callback_is_domain_allowed_for_static_files: Option<Box<dyn Fn(&str) -> bool + Send + Sync>>,\n        callback_is_external_domain_allowed_for_crawling: Option<Box<dyn Fn(&str) -> bool + Send + Sync>>,\n        attribute: Option<&str>,\n    ) -> Self {\n        let relative_target_url = target_url.clone();\n        let target_domain_relation = TargetDomainRelation::get_by_urls(&initial_url, &base_url, &target_url);\n\n        Self {\n            initial_url,\n            base_url,\n            target_url,\n            relative_target_url,\n            target_url_source_attribute: attribute.map(|s| s.to_string()),\n            callback_is_domain_allowed_for_static_files,\n            callback_is_external_domain_allowed_for_crawling,\n            target_domain_relation,\n            preserve_url_structure: false,\n        }\n    }\n\n    pub fn set_preserve_url_structure(&mut self, preserve: bool) {\n        self.preserve_url_structure = preserve;\n    }\n\n    /// Convert URL to relative path for offline browsing.\n    pub fn convert_url_to_relative(&mut self, keep_fragment: bool) -> String {\n        if let Some(forced_url) = self.get_forced_url_if_needed() {\n            return forced_url;\n        }\n\n        self.detect_and_set_file_name_with_extension();\n        self.calculate_and_apply_depth();\n\n        let pre_final_url = self.relative_target_url.get_full_url(false, keep_fragment);\n        Self::sanitize_file_path(&pre_final_url, keep_fragment)\n    }\n\n    pub fn get_relative_target_url(&self) -> &ParsedUrl {\n        &self.relative_target_url\n    }\n\n    pub fn get_target_domain_relation(&self) -> TargetDomainRelation {\n        self.target_domain_relation\n    }\n\n    /// Set global replace_query_string configuration\n    pub fn set_replace_query_string(replace: Vec<String>) {\n        if let Ok(mut rqs) = REPLACE_QUERY_STRING.lock() {\n            *rqs = replace;\n        }\n    }\n\n    /// Set global lowercase configuration for offline export\n    pub fn set_lowercase(lowercase: bool) {\n        if let Ok(mut lc) = LOWERCASE.lock() {\n            *lc = lowercase;\n        }\n    }\n\n    /// Get depth of base URL path in target offline version.\n    pub fn get_offline_base_url_depth(url: &ParsedUrl) -> usize {\n        let trimmed = url.path.trim_start_matches('/').trim();\n        if trimmed.is_empty() {\n            return 0;\n        }\n        trimmed.matches('/').count()\n    }\n\n    /// Check if URL needs to be forced (not converted to relative).\n    fn get_forced_url_if_needed(&self) -> Option<String> {\n        if self.relative_target_url.is_only_fragment()\n            && let Some(ref f) = self.relative_target_url.fragment\n        {\n            return Some(format!(\"#{}\", f));\n        }\n\n        // when URL is not requestable resource, it is not possible to convert it to relative URL\n        if !utils::is_href_for_requestable_resource(&self.target_url.get_full_url(true, true)) {\n            return Some(self.target_url.get_full_url(false, true));\n        }\n\n        // when target host is external and not allowed\n        let is_external_host = matches!(\n            self.target_domain_relation,\n            TargetDomainRelation::InitialDifferentBaseDifferent | TargetDomainRelation::InitialDifferentBaseSame\n        );\n\n        if is_external_host && let Some(ref host) = self.target_url.host {\n            if self.is_external_domain_allowed_for_crawling(host)\n                || (self.target_url.is_static_file() && self.is_domain_allowed_for_static_files(host))\n                || (!self.target_url.is_static_file()\n                    && self.target_url_source_attribute.as_deref() == Some(\"src\")\n                    && self.is_domain_allowed_for_static_files(host))\n            {\n                return None;\n            } else {\n                return Some(self.target_url.get_full_url(true, true));\n            }\n        }\n\n        None\n    }\n\n    /// Add '*.html' or '/index.html' to path when needed.\n    fn detect_and_set_file_name_with_extension(&mut self) {\n        let query_hash = self\n            .relative_target_url\n            .query\n            .as_ref()\n            .map(|q| Self::get_query_hash_from_query_string(q))\n            .filter(|h| !h.trim().is_empty());\n\n        // when the path is empty or '/'\n        let trimmed_path = self\n            .relative_target_url\n            .path\n            .trim_matches(|c: char| c == '/' || c == ' ');\n        if trimmed_path.is_empty() {\n            if let Some(ref hash) = query_hash {\n                self.relative_target_url.set_path(format!(\"/index.{}.html\", hash));\n                self.relative_target_url.set_query(None);\n            } else if self.relative_target_url.path.is_empty() && self.relative_target_url.fragment.is_some() {\n                // only #fragment\n                return;\n            } else {\n                self.relative_target_url.set_path(\"/index.html\".to_string());\n            }\n            return;\n        }\n\n        let is_image_attribute = matches!(\n            self.target_url_source_attribute.as_deref(),\n            Some(\"src\") | Some(\"srcset\")\n        );\n\n        // if the URL is probably icon, we use SVG extension, otherwise we use JPG\n        let full_url_lower = self.relative_target_url.get_full_url(true, true).to_lowercase();\n        let img_extension = if full_url_lower.contains(\"icon\") { \"svg\" } else { \"jpg\" };\n\n        // when the URL is probably font from Google Fonts, we use CSS extension\n        let other_file_extension = if self.target_url_source_attribute.as_deref() == Some(\"href\")\n            && self\n                .relative_target_url\n                .url\n                .to_lowercase()\n                .contains(\"fonts.googleapis.com/css\")\n        {\n            \"css\"\n        } else {\n            \"html\"\n        };\n\n        let extension = self.relative_target_url.estimate_extension().unwrap_or_else(|| {\n            if is_image_attribute {\n                img_extension.to_string()\n            } else {\n                other_file_extension.to_string()\n            }\n        });\n\n        if self.relative_target_url.path.ends_with('/') {\n            let base_name = \"index\";\n            if let Some(ref hash) = query_hash {\n                self.relative_target_url.set_path(format!(\n                    \"{}{}.{}.{}\",\n                    self.relative_target_url.path, base_name, hash, extension\n                ));\n                self.relative_target_url.set_query(None);\n            } else {\n                self.relative_target_url\n                    .set_path(format!(\"{}{}.{}\", self.relative_target_url.path, base_name, extension));\n            }\n        } else if self.preserve_url_structure && self.target_url.estimate_extension().is_none() {\n            // Preserve URL structure: /about → /about/index.html (instead of /about.html)\n            // Only for page-like URLs without a real file extension\n            if let Some(ref hash) = query_hash {\n                self.relative_target_url\n                    .set_path(format!(\"{}/index.{}.html\", self.relative_target_url.path, hash));\n                self.relative_target_url.set_query(None);\n            } else {\n                self.relative_target_url\n                    .set_path(format!(\"{}/index.html\", self.relative_target_url.path));\n            }\n        } else {\n            // Remove existing extension from path\n            let path_without_ext = STRIP_EXT_RE.replace(&self.relative_target_url.path, \"\").to_string();\n            if let Some(ref hash) = query_hash {\n                self.relative_target_url\n                    .set_path(format!(\"{}.{}.{}\", path_without_ext, hash, extension));\n                self.relative_target_url.set_query(None);\n            } else {\n                self.relative_target_url\n                    .set_path(format!(\"{}.{}\", path_without_ext, extension));\n            }\n        }\n    }\n\n    /// Calculate and apply depth for relative path conversion.\n    fn calculate_and_apply_depth(&mut self) {\n        let base_path_trimmed = self.base_url.path.trim_start_matches(['/', ' ']);\n        let base_depth = if base_path_trimmed.is_empty() {\n            0usize\n        } else {\n            base_path_trimmed.matches('/').count()\n        };\n\n        match self.target_domain_relation {\n            TargetDomainRelation::InitialSameBaseSame | TargetDomainRelation::InitialDifferentBaseSame => {\n                if self.relative_target_url.path.starts_with('/') {\n                    if base_depth > 0 {\n                        self.relative_target_url.change_depth(base_depth as i32);\n                    } else {\n                        let new_path = self.relative_target_url.path.trim_start_matches('/').to_string();\n                        self.relative_target_url.set_path(new_path);\n                    }\n                }\n            }\n            TargetDomainRelation::InitialSameBaseDifferent => {\n                // backlink from the other domain back to initial domain\n                let cleaned_path = DOMAIN_IN_PATH_RE\n                    .replace(&self.relative_target_url.path, \"\")\n                    .to_string();\n                let cleaned_path = cleaned_path.trim_start_matches(['/', ' ']);\n                let prefix = \"../\".repeat(base_depth + 1);\n                self.relative_target_url.set_path(format!(\"{}{}\", prefix, cleaned_path));\n            }\n            TargetDomainRelation::InitialDifferentBaseDifferent => {\n                let extra_depth = if self.base_url.host != self.initial_url.host {\n                    1\n                } else {\n                    0\n                };\n                let host = self.relative_target_url.host.clone().unwrap_or_default();\n                let path = self.relative_target_url.path.clone();\n                let prefix = \"../\".repeat(base_depth + extra_depth);\n                self.relative_target_url\n                    .set_path(format!(\"{}_{}{}\", prefix, host, path));\n            }\n        }\n    }\n\n    fn is_domain_allowed_for_static_files(&self, domain: &str) -> bool {\n        self.callback_is_domain_allowed_for_static_files\n            .as_ref()\n            .map(|cb| cb(domain))\n            .unwrap_or(false)\n    }\n\n    fn is_external_domain_allowed_for_crawling(&self, domain: &str) -> bool {\n        self.callback_is_external_domain_allowed_for_crawling\n            .as_ref()\n            .map(|cb| cb(domain))\n            .unwrap_or(false)\n    }\n\n    /// Sanitize file path and replace special chars.\n    pub fn sanitize_file_path(file_path: &str, keep_fragment: bool) -> String {\n        // First decode URL-encoded characters\n        let file_path = percent_encoding::percent_decode_str(file_path)\n            .decode_utf8_lossy()\n            .to_string();\n\n        // Parse the file path to extract components\n        let (parsed_path, parsed_query, parsed_fragment) = parse_file_path_components(&file_path);\n\n        // Check if path has an extension\n        let path_with_extension = RE_PATH_EXTENSION.captures(&parsed_path);\n\n        let mut result = file_path.clone();\n        let mut extension: Option<String> = None;\n\n        if let Some(caps) = path_with_extension {\n            let start = caps.get(1).map(|m| m.as_str()).unwrap_or(\"\");\n            let ext = caps.get(2).map(|m| m.as_str()).unwrap_or(\"\");\n            extension = Some(ext.to_string());\n\n            if let Some(ref query_string) = parsed_query {\n                let trimmed_query = query_string.trim();\n                if !trimmed_query.is_empty() {\n                    let query_hash = Self::get_query_hash_from_query_string(trimmed_query);\n                    // Only add query hash if it's not empty after processing\n                    if !query_hash.trim().is_empty() {\n                        result = format!(\"{}.{}.{}\", start, query_hash, ext);\n                    } else {\n                        result = format!(\"{}.{}\", start, ext);\n                    }\n\n                    // add fragment to the end of the file path\n                    if keep_fragment && let Some(ref frag) = parsed_fragment {\n                        result = format!(\"{}#{}\", result, frag);\n                    }\n                }\n            }\n        }\n\n        // Remove characters that are dangerous for filesystems\n        let dangerous_characters = ['\\\\', ':', '*', '?', '\"', '<', '>', '|'];\n        for ch in &dangerous_characters {\n            result = result.replace(*ch, \"_\");\n        }\n\n        // Replace control characters\n        result = RE_CONTROL_CHARS.replace_all(&result, \"_\").to_string();\n\n        // Handle filesystem-specific limitations\n        result = result\n            .trim_matches(|c: char| c == ' ' || c == '\\t' || c == '\\n' || c == '\\r' || c == '\\0' || c == '\\x0B')\n            .to_string();\n\n        // Replace multiple spaces with single underscore\n        result = RE_WHITESPACE.replace_all(&result, \"_\").to_string();\n\n        // Remove multiple underscores\n        result = RE_MULTI_UNDERSCORE.replace_all(&result, \"_\").to_string();\n\n        // When filepath is too long and there is a long filename, replace filename with shorter md5\n        let file_path_for_length = RE_FRAGMENT_SUFFIX.replace(&result, \"\").to_string();\n        if file_path_for_length.len() > 200 {\n            let basename = std::path::Path::new(&result)\n                .file_name()\n                .and_then(|n| n.to_str())\n                .unwrap_or(\"\")\n                .to_string();\n            if basename.len() > 40 {\n                let ext = extension\n                    .as_deref()\n                    .or_else(|| std::path::Path::new(&basename).extension().and_then(|e| e.to_str()))\n                    .unwrap_or(\"html\");\n                let hash = {\n                    use md5::{Digest, Md5};\n                    let mut hasher = Md5::new();\n                    hasher.update(basename.as_bytes());\n                    format!(\"{:x}\", hasher.finalize())\n                };\n                let short_hash = &hash[..10.min(hash.len())];\n                result = result.replace(&basename, &format!(\"{}.{}\", short_hash, ext));\n            }\n        }\n\n        // Adding \"_\" to the end of the folder that contains the potential file extension\n        result = RE_STATIC_EXT_FOLDER.replace_all(&result, \"${1}.${2}_/\").to_string();\n\n        // Handle any other dotted folder names that might conflict\n        {\n            let re = &*RE_DOTTED_FOLDER;\n\n            let result_clone = result.clone();\n            let mut new_result = String::new();\n            let mut last_end = 0;\n\n            for caps in re.captures_iter(&result_clone) {\n                let Some(full_match) = caps.get(0) else {\n                    continue;\n                };\n                let name = caps.get(1).map(|m| m.as_str()).unwrap_or(\"\");\n                let ext = caps.get(2).map(|m| m.as_str()).unwrap_or(\"\");\n\n                new_result.push_str(&result_clone[last_end..full_match.start()]);\n\n                // Skip if starts with underscore (domain name)\n                if name.starts_with('_') {\n                    new_result.push_str(full_match.as_str());\n                } else if RE_DOMAIN_TLD.is_match(&format!(\"{}.{}\", name, ext)) {\n                    // Skip domain-like names\n                    new_result.push_str(full_match.as_str());\n                } else if RE_STATIC_EXT_MATCH.is_match(ext) {\n                    // Already handled by the previous regex\n                    new_result.push_str(full_match.as_str());\n                } else {\n                    new_result.push_str(&format!(\"{}.{}_/\", name, ext));\n                }\n\n                last_end = full_match.end();\n            }\n            new_result.push_str(&result_clone[last_end..]);\n            result = new_result;\n        }\n\n        // Replace extensions of typical dynamic pages\n        result = RE_DYNAMIC_EXT.replace(&result, \".$1.html\").to_string();\n\n        if !keep_fragment && result.contains('#') {\n            result = RE_FRAGMENT_SUFFIX.replace(&result, \"\").to_string();\n        }\n\n        // Convert to lowercase if configured\n        if let Ok(lc) = LOWERCASE.lock()\n            && *lc\n        {\n            result = result.to_lowercase();\n        }\n\n        result\n    }\n\n    /// Get query hash from query string.\n    fn get_query_hash_from_query_string(query_string: &str) -> String {\n        let replace_qs = REPLACE_QUERY_STRING.lock().unwrap_or_else(|e| e.into_inner());\n        let has_replacements = !replace_qs.is_empty();\n\n        if has_replacements {\n            let replacements = &replace_qs;\n            let mut qs = query_string.to_string();\n\n            for replace in replacements.iter() {\n                let parts: Vec<&str> = replace.splitn(2, \"->\").collect();\n                let replace_from = parts[0].trim();\n                let replace_to = if parts.len() > 1 { parts[1].trim() } else { \"\" };\n\n                // Check if it's a regex\n                let is_regex = crate::utils::is_regex_pattern(replace_from);\n\n                if is_regex {\n                    // Extract the pattern from delimiters\n                    if let Some(pattern) = extract_regex_pattern(replace_from)\n                        && let Ok(re) = Regex::new(&pattern)\n                    {\n                        qs = re.replace_all(&qs, replace_to).to_string();\n                    }\n                } else {\n                    qs = qs.replace(replace_from, replace_to);\n                }\n            }\n\n            // replace slashes with '~'\n            qs.replace('/', \"~\")\n        } else {\n            // Use MD5 hash (first 10 chars)\n            let decoded = html_entities_decode(&percent_encoding::percent_decode_str(query_string).decode_utf8_lossy());\n            let hash = {\n                use md5::{Digest, Md5};\n                let mut hasher = Md5::new();\n                hasher.update(decoded.as_bytes());\n                format!(\"{:x}\", hasher.finalize())\n            };\n            hash[..10.min(hash.len())].to_string()\n        }\n    }\n}\n\n/// Extract regex pattern from a delimited string (e.g., /pattern/flags)\nfn extract_regex_pattern(input: &str) -> Option<String> {\n    if input.len() < 2 {\n        return None;\n    }\n    let delimiter = input.chars().next()?;\n    let rest = &input[1..];\n\n    // Find the last occurrence of the delimiter\n    if let Some(end_pos) = rest.rfind(delimiter) {\n        let pattern = &rest[..end_pos];\n        let flags = &rest[end_pos + 1..];\n\n        let mut regex_pattern = String::new();\n        if flags.contains('i') {\n            regex_pattern.push_str(\"(?i)\");\n        }\n        regex_pattern.push_str(pattern);\n        Some(regex_pattern)\n    } else {\n        None\n    }\n}\n\n/// Parse file path into path, query, and fragment components\nfn parse_file_path_components(file_path: &str) -> (String, Option<String>, Option<String>) {\n    let mut remaining = file_path;\n\n    // Extract fragment\n    let fragment = if let Some(hash_pos) = remaining.find('#') {\n        let f = &remaining[hash_pos + 1..];\n        remaining = &remaining[..hash_pos];\n        if f.is_empty() { None } else { Some(f.to_string()) }\n    } else {\n        None\n    };\n\n    // Extract query\n    let query = if let Some(q_pos) = remaining.find('?') {\n        let q = &remaining[q_pos + 1..];\n        remaining = &remaining[..q_pos];\n        if q.is_empty() { None } else { Some(q.to_string()) }\n    } else {\n        None\n    };\n\n    (remaining.to_string(), query, fragment)\n}\n\n/// Decode HTML entities\nfn html_entities_decode(input: &str) -> String {\n    input\n        .replace(\"&amp;\", \"&\")\n        .replace(\"&lt;\", \"<\")\n        .replace(\"&gt;\", \">\")\n        .replace(\"&quot;\", \"\\\"\")\n        .replace(\"&#039;\", \"'\")\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    // Helper: create converter with siteone.io as initial URL, with domain allow callbacks\n    fn make_converter(initial: &str, base: &str, target: &str, attribute: Option<&str>) -> OfflineUrlConverter {\n        let initial_url = ParsedUrl::parse(initial, None);\n        let base_url = ParsedUrl::parse(base, None);\n        let base_url_ref = if target.starts_with(\"//\")\n            || target.starts_with(\"http\")\n            || target.starts_with('#')\n            || target.starts_with('?')\n        {\n            None\n        } else {\n            Some(&base_url)\n        };\n        let target_url = ParsedUrl::parse(target, base_url_ref);\n\n        let allowed_static: Box<dyn Fn(&str) -> bool + Send + Sync> =\n            Box::new(|domain: &str| matches!(domain, \"cdn.siteone.io\" | \"cdn.webflow.com\" | \"nextjs.org\"));\n        let allowed_crawling: Box<dyn Fn(&str) -> bool + Send + Sync> =\n            Box::new(|domain: &str| matches!(domain, \"svelte.dev\" | \"nextjs.org\"));\n\n        OfflineUrlConverter::new(\n            initial_url,\n            base_url,\n            target_url,\n            Some(allowed_static),\n            Some(allowed_crawling),\n            attribute,\n        )\n    }\n\n    fn convert(initial: &str, base: &str, target: &str, attribute: Option<&str>) -> String {\n        let mut converter = make_converter(initial, base, target, attribute);\n        converter.convert_url_to_relative(true)\n    }\n\n    // =========================================================================\n    // getOfflineBaseUrlDepth tests\n    // =========================================================================\n\n    #[test]\n    fn depth_root() {\n        assert_eq!(\n            OfflineUrlConverter::get_offline_base_url_depth(&ParsedUrl::parse(\"/\", None)),\n            0\n        );\n    }\n\n    #[test]\n    fn depth_file() {\n        assert_eq!(\n            OfflineUrlConverter::get_offline_base_url_depth(&ParsedUrl::parse(\"/foo\", None)),\n            0\n        );\n    }\n\n    #[test]\n    fn depth_dir() {\n        assert_eq!(\n            OfflineUrlConverter::get_offline_base_url_depth(&ParsedUrl::parse(\"/foo/\", None)),\n            1\n        );\n    }\n\n    #[test]\n    fn depth_file_in_dir() {\n        assert_eq!(\n            OfflineUrlConverter::get_offline_base_url_depth(&ParsedUrl::parse(\"/foo/bar\", None)),\n            1\n        );\n    }\n\n    #[test]\n    fn depth_nested_dir() {\n        assert_eq!(\n            OfflineUrlConverter::get_offline_base_url_depth(&ParsedUrl::parse(\"/foo/bar/\", None)),\n            2\n        );\n    }\n\n    #[test]\n    fn depth_root_with_query() {\n        // /?param=1 → /index.queryMd5Hash.html → depth 0\n        assert_eq!(\n            OfflineUrlConverter::get_offline_base_url_depth(&ParsedUrl::parse(\"/?param=1\", None)),\n            0\n        );\n    }\n\n    #[test]\n    fn depth_file_with_query() {\n        // /foo?param=1 → /foo.queryMd5Hash.html → depth 0\n        assert_eq!(\n            OfflineUrlConverter::get_offline_base_url_depth(&ParsedUrl::parse(\"/foo?param=1\", None)),\n            0\n        );\n    }\n\n    #[test]\n    fn depth_dir_with_query() {\n        // /foo/?param=1 → /foo/index.queryMd5Hash.html → depth 1\n        assert_eq!(\n            OfflineUrlConverter::get_offline_base_url_depth(&ParsedUrl::parse(\"/foo/?param=1\", None)),\n            1\n        );\n    }\n\n    #[test]\n    fn depth_file_in_dir_with_query() {\n        // /foo/bar?param=1 → /foo/bar.queryMd5Hash.html → depth 1\n        assert_eq!(\n            OfflineUrlConverter::get_offline_base_url_depth(&ParsedUrl::parse(\"/foo/bar?param=1\", None)),\n            1\n        );\n    }\n\n    #[test]\n    fn depth_nested_dir_with_query() {\n        // /foo/bar/?param=1 → /foo/bar/index.queryMd5Hash.html → depth 2\n        assert_eq!(\n            OfflineUrlConverter::get_offline_base_url_depth(&ParsedUrl::parse(\"/foo/bar/?param=1\", None)),\n            2\n        );\n    }\n\n    // =========================================================================\n    // Core URL-to-file conversion tests (the most critical ones)\n    // =========================================================================\n\n    #[test]\n    fn convert_root_to_root() {\n        assert_eq!(\n            convert(\n                \"https://siteone.io/\",\n                \"https://siteone.io/\",\n                \"https://siteone.io/\",\n                None\n            ),\n            \"index.html\"\n        );\n    }\n\n    #[test]\n    fn convert_root_page() {\n        assert_eq!(\n            convert(\n                \"https://siteone.io/\",\n                \"https://siteone.io/\",\n                \"https://siteone.io/page\",\n                None\n            ),\n            \"page.html\"\n        );\n    }\n\n    #[test]\n    fn convert_root_page_trailing_slash() {\n        assert_eq!(\n            convert(\n                \"https://siteone.io/\",\n                \"https://siteone.io\",\n                \"https://siteone.io/page/\",\n                None\n            ),\n            \"page/index.html\"\n        );\n    }\n\n    #[test]\n    fn convert_from_subdir_with_fragment() {\n        let result = convert(\n            \"https://siteone.io/\",\n            \"https://siteone.io/t/\",\n            \"https://siteone.io/page#fragment\",\n            None,\n        );\n        assert_eq!(result, \"../page.html#fragment\");\n    }\n\n    #[test]\n    fn convert_relative_page() {\n        assert_eq!(\n            convert(\"https://siteone.io/\", \"https://siteone.io/\", \"/page\", None),\n            \"page.html\"\n        );\n    }\n\n    #[test]\n    fn convert_relative_page_dir() {\n        assert_eq!(\n            convert(\"https://siteone.io/\", \"https://siteone.io/\", \"/page/\", None),\n            \"page/index.html\"\n        );\n    }\n\n    #[test]\n    fn convert_relative_plain() {\n        assert_eq!(\n            convert(\"https://siteone.io/\", \"https://siteone.io/\", \"page\", None),\n            \"page.html\"\n        );\n    }\n\n    #[test]\n    fn convert_relative_parent() {\n        assert_eq!(\n            convert(\"https://siteone.io/\", \"https://siteone.io/path/\", \"../page\", None),\n            \"../page.html\"\n        );\n    }\n\n    #[test]\n    fn convert_relative_parent_dir() {\n        assert_eq!(\n            convert(\"https://siteone.io/\", \"https://siteone.io/path/\", \"../page/\", None),\n            \"../page/index.html\"\n        );\n    }\n\n    #[test]\n    fn convert_from_subpath_same_dir() {\n        assert_eq!(\n            convert(\n                \"https://siteone.io/\",\n                \"https://siteone.io/path/\",\n                \"https://siteone.io/path/page\",\n                None\n            ),\n            \"../path/page.html\"\n        );\n    }\n\n    // ---- External domains ----\n\n    #[test]\n    fn convert_external_allowed_domain_root() {\n        assert_eq!(\n            convert(\n                \"https://siteone.io/\",\n                \"https://siteone.io/\",\n                \"https://nextjs.org/\",\n                None\n            ),\n            \"_nextjs.org/index.html\"\n        );\n    }\n\n    #[test]\n    fn convert_external_allowed_domain_from_subdir() {\n        assert_eq!(\n            convert(\n                \"https://siteone.io/\",\n                \"https://siteone.io/t/\",\n                \"https://svelte.dev/x\",\n                None\n            ),\n            \"../_svelte.dev/x.html\"\n        );\n    }\n\n    #[test]\n    fn convert_external_css_file() {\n        assert_eq!(\n            convert(\n                \"https://siteone.io/\",\n                \"https://siteone.io/t/\",\n                \"https://svelte.dev/x/file.css\",\n                None\n            ),\n            \"../_svelte.dev/x/file.css\"\n        );\n    }\n\n    // ---- Backlinks ----\n\n    #[test]\n    fn convert_backlink_to_initial_domain() {\n        assert_eq!(\n            convert(\n                \"https://siteone.io/\",\n                \"https://nextjs.org/\",\n                \"https://siteone.io/\",\n                None\n            ),\n            \"../index.html\"\n        );\n    }\n\n    #[test]\n    fn convert_backlink_subpage_to_initial() {\n        assert_eq!(\n            convert(\n                \"https://siteone.io/\",\n                \"https://nextjs.org/subpage\",\n                \"https://siteone.io/\",\n                None\n            ),\n            \"../index.html\"\n        );\n    }\n\n    #[test]\n    fn convert_backlink_subdir_to_initial() {\n        assert_eq!(\n            convert(\n                \"https://siteone.io/\",\n                \"https://nextjs.org/subpage/\",\n                \"https://siteone.io/a\",\n                None\n            ),\n            \"../../a.html\"\n        );\n    }\n\n    #[test]\n    fn convert_backlink_to_third_domain() {\n        assert_eq!(\n            convert(\n                \"https://siteone.io/\",\n                \"https://nextjs.org/\",\n                \"https://svelte.dev/page\",\n                None\n            ),\n            \"../_svelte.dev/page.html\"\n        );\n    }\n\n    // ---- Protocol-relative ----\n\n    #[test]\n    fn convert_protocol_relative_external() {\n        assert_eq!(\n            convert(\"https://siteone.io/\", \"https://siteone.io/\", \"//nextjs.org/\", None),\n            \"_nextjs.org/index.html\"\n        );\n    }\n\n    #[test]\n    fn convert_protocol_relative_backlink() {\n        assert_eq!(\n            convert(\"https://siteone.io/\", \"https://nextjs.org/\", \"//siteone.io/page\", None),\n            \"../page.html\"\n        );\n    }\n\n    // ---- Fragment only ----\n\n    #[test]\n    fn convert_fragment_only() {\n        assert_eq!(\n            convert(\"https://siteone.io/\", \"https://siteone.io/\", \"#fragment2\", None),\n            \"#fragment2\"\n        );\n    }\n\n    #[test]\n    fn convert_fragment_only_external() {\n        assert_eq!(\n            convert(\"https://siteone.io/\", \"https://nextjs.org/\", \"#fragment3\", None),\n            \"#fragment3\"\n        );\n    }\n\n    // ---- Query string handling (md5 hash) ----\n\n    #[test]\n    fn convert_page_with_query() {\n        let result = convert(\n            \"https://siteone.io/\",\n            \"https://siteone.io/\",\n            \"https://siteone.io/page?p=1\",\n            None,\n        );\n        // Should have query hash between basename and extension: page.HASH.html\n        assert!(\n            result.starts_with(\"page.\"),\n            \"expected 'page.HASH.html', got '{}'\",\n            result\n        );\n        assert!(result.ends_with(\".html\"), \"expected '*.html', got '{}'\", result);\n        assert!(!result.contains('?'));\n    }\n\n    #[test]\n    fn convert_query_only() {\n        let result = convert(\"https://siteone.io/\", \"https://siteone.io/\", \"?p=1\", None);\n        // Should be: index.HASH.html\n        assert!(\n            result.starts_with(\"index.\"),\n            \"expected 'index.HASH.html', got '{}'\",\n            result\n        );\n        assert!(result.ends_with(\".html\"), \"expected '*.html', got '{}'\", result);\n    }\n\n    #[test]\n    fn convert_css_with_query() {\n        let result = convert(\n            \"https://siteone.io/\",\n            \"https://siteone.io/\",\n            \"https://siteone.io/file.css?p=1\",\n            None,\n        );\n        // Should be: file.HASH.css\n        assert!(result.ends_with(\".css\"), \"expected '*.css', got '{}'\", result);\n        assert!(!result.contains('?'));\n    }\n\n    // ---- Complex relative paths ----\n\n    #[test]\n    fn convert_double_parent_relative() {\n        assert_eq!(\n            convert(\n                \"https://siteone.io/\",\n                \"https://siteone.io/path/more/\",\n                \"../../page\",\n                None\n            ),\n            \"../../page.html\"\n        );\n    }\n\n    #[test]\n    fn convert_double_parent_relative_dir() {\n        assert_eq!(\n            convert(\n                \"https://siteone.io/\",\n                \"https://siteone.io/path/more/\",\n                \"../../page/\",\n                None\n            ),\n            \"../../page/index.html\"\n        );\n    }\n\n    // ---- External CSS references ----\n\n    #[test]\n    fn convert_from_external_css_to_external_image() {\n        let result = convert(\n            \"https://siteone.io/\",\n            \"https://cdn.siteone.io/siteone.io/css/styles.css\",\n            \"https://cdn.webflow.com/a/b1.jpg\",\n            None,\n        );\n        assert_eq!(result, \"../../../_cdn.webflow.com/a/b1.jpg\");\n    }\n\n    #[test]\n    fn convert_from_deep_external_css_to_image() {\n        let result = convert(\n            \"https://siteone.io/\",\n            \"https://cdn.siteone.io/siteone.io/css/hello/hi/styles.css\",\n            \"https://cdn.webflow.com/b2.jpg\",\n            None,\n        );\n        assert_eq!(result, \"../../../../../_cdn.webflow.com/b2.jpg\");\n    }\n\n    #[test]\n    fn convert_from_external_css_to_initial_domain() {\n        let result = convert(\n            \"https://siteone.io/\",\n            \"https://cdn.siteone.io/siteone.io/css/hello/hi/styles.css\",\n            \"https://siteone.io/test/image.jpg\",\n            None,\n        );\n        assert_eq!(result, \"../../../../../test/image.jpg\");\n    }\n\n    #[test]\n    fn convert_from_external_css_relative_root() {\n        let result = convert(\n            \"https://siteone.io/\",\n            \"https://cdn.siteone.io/siteone.io/css/styles.css\",\n            \"/abt.jpg\",\n            None,\n        );\n        assert_eq!(result, \"../../abt.jpg\");\n    }\n\n    #[test]\n    fn convert_from_external_css_relative_parent() {\n        let result = convert(\n            \"https://siteone.io/\",\n            \"https://cdn.siteone.io/siteone.io/css/styles.css\",\n            \"../abz.jpg\",\n            None,\n        );\n        assert_eq!(result, \"../abz.jpg\");\n    }\n\n    // ---- Unknown/not-allowed domains → keep absolute ----\n\n    #[test]\n    fn convert_unknown_domain_stays_absolute() {\n        let result = convert(\n            \"https://siteone.io/\",\n            \"https://siteone.io/\",\n            \"https://unknown.com/\",\n            None,\n        );\n        assert_eq!(result, \"https://unknown.com/\");\n    }\n\n    #[test]\n    fn convert_unknown_domain_http_stays_absolute() {\n        let result = convert(\n            \"https://siteone.io/\",\n            \"https://siteone.io/\",\n            \"http://unknown.com/page\",\n            None,\n        );\n        assert_eq!(result, \"http://unknown.com/page\");\n    }\n\n    // =========================================================================\n    // sanitizeFilePath (UTF-8 subset)\n    // =========================================================================\n\n    #[test]\n    fn sanitize_utf8_czech() {\n        assert_eq!(\n            OfflineUrlConverter::sanitize_file_path(\"české-výrobky\", false),\n            \"české-výrobky\"\n        );\n    }\n\n    #[test]\n    fn sanitize_utf8_german() {\n        assert_eq!(OfflineUrlConverter::sanitize_file_path(\"über-uns\", false), \"über-uns\");\n    }\n\n    #[test]\n    fn sanitize_utf8_chinese() {\n        assert_eq!(OfflineUrlConverter::sanitize_file_path(\"电子产品\", false), \"电子产品\");\n    }\n\n    #[test]\n    fn sanitize_url_encoded_czech() {\n        assert_eq!(\n            OfflineUrlConverter::sanitize_file_path(\"%C4%8Desk%C3%A9-v%C3%BDrobky\", false),\n            \"české-výrobky\"\n        );\n    }\n\n    #[test]\n    fn sanitize_url_encoded_german() {\n        assert_eq!(\n            OfflineUrlConverter::sanitize_file_path(\"%C3%BCber-uns\", false),\n            \"über-uns\"\n        );\n    }\n\n    #[test]\n    fn sanitize_url_encoded_chinese() {\n        assert_eq!(\n            OfflineUrlConverter::sanitize_file_path(\"%E7%94%B5%E5%AD%90%E4%BA%A7%E5%93%81\", false),\n            \"电子产品\"\n        );\n    }\n\n    #[test]\n    fn sanitize_dangerous_chars_colon() {\n        assert_eq!(\n            OfflineUrlConverter::sanitize_file_path(\"file:with:colons\", false),\n            \"file_with_colons\"\n        );\n    }\n\n    #[test]\n    fn sanitize_dangerous_chars_asterisk() {\n        assert_eq!(\n            OfflineUrlConverter::sanitize_file_path(\"file*with*asterisks\", false),\n            \"file_with_asterisks\"\n        );\n    }\n\n    #[test]\n    fn sanitize_dangerous_chars_question() {\n        assert_eq!(\n            OfflineUrlConverter::sanitize_file_path(\"file?with?questions\", false),\n            \"file_with_questions\"\n        );\n    }\n\n    #[test]\n    fn sanitize_dangerous_chars_quotes() {\n        assert_eq!(\n            OfflineUrlConverter::sanitize_file_path(\"file\\\"with\\\"quotes\", false),\n            \"file_with_quotes\"\n        );\n    }\n\n    #[test]\n    fn sanitize_dangerous_chars_brackets() {\n        assert_eq!(\n            OfflineUrlConverter::sanitize_file_path(\"file<with>brackets\", false),\n            \"file_with_brackets\"\n        );\n    }\n\n    #[test]\n    fn sanitize_dangerous_chars_pipes() {\n        assert_eq!(\n            OfflineUrlConverter::sanitize_file_path(\"file|with|pipes\", false),\n            \"file_with_pipes\"\n        );\n    }\n\n    #[test]\n    fn sanitize_dangerous_chars_backslash() {\n        assert_eq!(\n            OfflineUrlConverter::sanitize_file_path(\"file\\\\with\\\\backslashes\", false),\n            \"file_with_backslashes\"\n        );\n    }\n\n    #[test]\n    fn sanitize_mixed_utf8_and_dangerous() {\n        assert_eq!(\n            OfflineUrlConverter::sanitize_file_path(\"české:výrobky\", false),\n            \"české_výrobky\"\n        );\n    }\n\n    #[test]\n    fn sanitize_empty() {\n        assert_eq!(OfflineUrlConverter::sanitize_file_path(\"\", false), \"\");\n    }\n\n    #[test]\n    fn sanitize_dots() {\n        assert_eq!(OfflineUrlConverter::sanitize_file_path(\".\", false), \".\");\n        assert_eq!(OfflineUrlConverter::sanitize_file_path(\"..\", false), \"..\");\n    }\n\n    // =========================================================================\n    // Direct OfflineUrlConverter URL conversion tests\n    // =========================================================================\n\n    fn convert_simple(base: &str, target: &str) -> String {\n        let initial_url = ParsedUrl::parse(\"https://example.com/\", None);\n        let base_url = ParsedUrl::parse(base, None);\n        let base_url_for_ref = ParsedUrl::parse(base, None);\n        let target_url = ParsedUrl::parse(target, Some(&base_url_for_ref));\n\n        let false_cb1: Box<dyn Fn(&str) -> bool + Send + Sync> = Box::new(|_| false);\n        let false_cb2: Box<dyn Fn(&str) -> bool + Send + Sync> = Box::new(|_| false);\n\n        let mut converter = OfflineUrlConverter::new(\n            initial_url,\n            base_url,\n            target_url,\n            Some(false_cb1),\n            Some(false_cb2),\n            None,\n        );\n        converter.convert_url_to_relative(false)\n    }\n\n    #[test]\n    fn simple_from_subdir_to_root_asset() {\n        assert_eq!(\n            convert_simple(\"https://example.com/page/\", \"/style.css\"),\n            \"../style.css\"\n        );\n    }\n\n    #[test]\n    fn simple_from_subdir_to_root_image() {\n        assert_eq!(\n            convert_simple(\"https://example.com/page/\", \"/images/logo.png\"),\n            \"../images/logo.png\"\n        );\n    }\n\n    #[test]\n    fn simple_from_deep_subdir_to_root_asset() {\n        assert_eq!(\n            convert_simple(\"https://example.com/dir/page/\", \"/style.css\"),\n            \"../../style.css\"\n        );\n    }\n\n    #[test]\n    fn simple_from_root_to_root_asset() {\n        assert_eq!(convert_simple(\"https://example.com/\", \"/style.css\"), \"style.css\");\n    }\n\n    #[test]\n    fn simple_from_root_to_subdir_image() {\n        assert_eq!(\n            convert_simple(\"https://example.com/\", \"/images/logo.png\"),\n            \"images/logo.png\"\n        );\n    }\n\n    // ---- UTF-8 URL conversion ----\n\n    fn convert_utf8(base: &str, target: &str) -> String {\n        let initial_url = ParsedUrl::parse(\"https://example.com/\", None);\n        let base_url = ParsedUrl::parse(base, None);\n        let target_url = ParsedUrl::parse(target, None);\n\n        let false_cb1: Box<dyn Fn(&str) -> bool + Send + Sync> = Box::new(|_| false);\n        let false_cb2: Box<dyn Fn(&str) -> bool + Send + Sync> = Box::new(|_| false);\n\n        let mut converter = OfflineUrlConverter::new(\n            initial_url,\n            base_url,\n            target_url,\n            Some(false_cb1),\n            Some(false_cb2),\n            None,\n        );\n        converter.convert_url_to_relative(true)\n    }\n\n    #[test]\n    fn utf8_czech_from_root() {\n        assert_eq!(\n            convert_utf8(\"https://example.com/\", \"https://example.com/české-výrobky\"),\n            \"české-výrobky.html\"\n        );\n    }\n\n    #[test]\n    fn utf8_czech_in_subdir() {\n        assert_eq!(\n            convert_utf8(\"https://example.com/\", \"https://example.com/products/české-výrobky\"),\n            \"products/české-výrobky.html\"\n        );\n    }\n\n    #[test]\n    fn utf8_german_from_root() {\n        assert_eq!(\n            convert_utf8(\"https://example.com/\", \"https://example.com/über-uns\"),\n            \"über-uns.html\"\n        );\n    }\n\n    #[test]\n    fn utf8_chinese_from_root() {\n        assert_eq!(\n            convert_utf8(\"https://example.com/\", \"https://example.com/电子产品\"),\n            \"电子产品.html\"\n        );\n    }\n\n    #[test]\n    fn utf8_czech_trailing_slash() {\n        assert_eq!(\n            convert_utf8(\"https://example.com/\", \"https://example.com/české-výrobky/\"),\n            \"české-výrobky/index.html\"\n        );\n    }\n\n    #[test]\n    fn utf8_chinese_trailing_slash() {\n        assert_eq!(\n            convert_utf8(\"https://example.com/\", \"https://example.com/电子产品/\"),\n            \"电子产品/index.html\"\n        );\n    }\n\n    #[test]\n    fn utf8_czech_from_subdir() {\n        assert_eq!(\n            convert_utf8(\"https://example.com/page/\", \"https://example.com/české-výrobky\"),\n            \"../české-výrobky.html\"\n        );\n    }\n\n    #[test]\n    fn utf8_chinese_from_subdir() {\n        assert_eq!(\n            convert_utf8(\"https://example.com/dir/\", \"https://example.com/电子产品\"),\n            \"../电子产品.html\"\n        );\n    }\n\n    #[test]\n    fn utf8_czech_with_fragment() {\n        assert_eq!(\n            convert_utf8(\"https://example.com/\", \"https://example.com/české#sekce\"),\n            \"české.html#sekce\"\n        );\n    }\n\n    // =========================================================================\n    // Existing tests preserved\n    // =========================================================================\n\n    #[test]\n    fn test_sanitize_file_path_basic() {\n        let result = OfflineUrlConverter::sanitize_file_path(\"/index.html\", true);\n        assert_eq!(result, \"/index.html\");\n    }\n\n    #[test]\n    fn test_sanitize_file_path_with_query() {\n        let result = OfflineUrlConverter::sanitize_file_path(\"/page.html?foo=bar\", false);\n        assert!(result.contains(\".html\"));\n        assert!(!result.contains('?'));\n    }\n\n    #[test]\n    fn test_extract_regex_pattern() {\n        assert_eq!(extract_regex_pattern(\"/foo/i\"), Some(\"(?i)foo\".to_string()));\n        assert_eq!(extract_regex_pattern(\"/bar/\"), Some(\"bar\".to_string()));\n        assert_eq!(extract_regex_pattern(\"#test#\"), Some(\"test\".to_string()));\n    }\n\n    // =========================================================================\n    // Preserve URL structure tests (--offline-export-preserve-url-structure)\n    // =========================================================================\n\n    fn convert_preserve(initial: &str, base: &str, target: &str) -> String {\n        let initial_url = ParsedUrl::parse(initial, None);\n        let base_url = ParsedUrl::parse(base, None);\n        let target_url = ParsedUrl::parse(target, None);\n\n        let false_cb1: Box<dyn Fn(&str) -> bool + Send + Sync> = Box::new(|_| false);\n        let false_cb2: Box<dyn Fn(&str) -> bool + Send + Sync> = Box::new(|_| false);\n\n        let mut converter = OfflineUrlConverter::new(\n            initial_url,\n            base_url,\n            target_url,\n            Some(false_cb1),\n            Some(false_cb2),\n            None,\n        );\n        converter.set_preserve_url_structure(true);\n        converter.convert_url_to_relative(true)\n    }\n\n    #[test]\n    fn preserve_extensionless_page_becomes_dir_index() {\n        // /about → /about/index.html (not /about.html)\n        assert_eq!(\n            convert_preserve(\n                \"https://example.com/\",\n                \"https://example.com/\",\n                \"https://example.com/about\"\n            ),\n            \"about/index.html\"\n        );\n    }\n\n    #[test]\n    fn preserve_trailing_slash_unchanged() {\n        // /about/ → /about/index.html (same as without preserve)\n        assert_eq!(\n            convert_preserve(\n                \"https://example.com/\",\n                \"https://example.com/\",\n                \"https://example.com/about/\"\n            ),\n            \"about/index.html\"\n        );\n    }\n\n    #[test]\n    fn preserve_with_real_extension_unchanged() {\n        // /style.css → /style.css (not /style.css/index.html)\n        assert_eq!(\n            convert_preserve(\n                \"https://example.com/\",\n                \"https://example.com/\",\n                \"https://example.com/style.css\"\n            ),\n            \"style.css\"\n        );\n    }\n\n    #[test]\n    fn preserve_nested_path() {\n        // /docs/guide → /docs/guide/index.html\n        assert_eq!(\n            convert_preserve(\n                \"https://example.com/\",\n                \"https://example.com/\",\n                \"https://example.com/docs/guide\"\n            ),\n            \"docs/guide/index.html\"\n        );\n    }\n\n    #[test]\n    fn preserve_with_query_string() {\n        // /about?lang=en → /about/index.HASH.html\n        let result = convert_preserve(\n            \"https://example.com/\",\n            \"https://example.com/\",\n            \"https://example.com/about?lang=en\",\n        );\n        assert!(\n            result.starts_with(\"about/index.\"),\n            \"expected 'about/index.HASH.html', got '{}'\",\n            result\n        );\n        assert!(result.ends_with(\".html\"), \"expected '*.html', got '{}'\", result);\n        assert!(!result.contains('?'));\n    }\n\n    #[test]\n    fn preserve_root_page_unchanged() {\n        // / → index.html (root is always index.html)\n        assert_eq!(\n            convert_preserve(\"https://example.com/\", \"https://example.com/\", \"https://example.com/\"),\n            \"index.html\"\n        );\n    }\n}\n"
  },
  {
    "path": "src/export/utils/target_domain_relation.rs",
    "content": "// SiteOne Crawler - TargetDomainRelation\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse crate::engine::parsed_url::ParsedUrl;\n\n/// Describes the relationship between initial URL, base URL (page where link was found),\n/// and target URL (the link destination).\n#[derive(Debug, Clone, Copy, PartialEq, Eq)]\npub enum TargetDomainRelation {\n    /// e.g. initial www.siteone.io, base www.siteone.io, target www.siteone.io\n    InitialSameBaseSame,\n    /// e.g. initial www.siteone.io, base nextjs.org, target www.siteone.io\n    InitialSameBaseDifferent,\n    /// e.g. initial www.siteone.io, base nextjs.org, target nextjs.org\n    InitialDifferentBaseSame,\n    /// e.g. initial www.siteone.io, base nextjs.org, target svelte.dev\n    InitialDifferentBaseDifferent,\n}\n\nimpl TargetDomainRelation {\n    /// Determine the domain relation given the hosts of the initial URL, base URL, and target URL.\n    /// If `target_host` is None or matches `base_host`, it's considered same as base.\n    /// Determine the domain relation given ParsedUrl references.\n    pub fn get_by_urls(initial_url: &ParsedUrl, base_url: &ParsedUrl, target_url: &ParsedUrl) -> Self {\n        Self::get_by_hosts(\n            initial_url.host.as_deref(),\n            base_url.host.as_deref(),\n            target_url.host.as_deref(),\n        )\n    }\n\n    /// Determine the domain relation given the hosts of the initial URL, base URL, and target URL.\n    /// If `target_host` is None or matches `base_host`, it's considered same as base.\n    pub fn get_by_hosts(initial_host: Option<&str>, base_host: Option<&str>, target_host: Option<&str>) -> Self {\n        let initial = initial_host.unwrap_or(\"\");\n        let base = base_host.unwrap_or(\"\");\n        let target = target_host.unwrap_or(\"\");\n\n        if target.is_empty() || target == base {\n            // base host is the same as target host\n            if base == initial {\n                TargetDomainRelation::InitialSameBaseSame\n            } else {\n                TargetDomainRelation::InitialDifferentBaseSame\n            }\n        } else {\n            // base host is different from target host\n            if target == initial {\n                TargetDomainRelation::InitialSameBaseDifferent\n            } else {\n                TargetDomainRelation::InitialDifferentBaseDifferent\n            }\n        }\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n    use crate::engine::parsed_url::ParsedUrl;\n\n    // =========================================================================\n    // All 12 domain relation cases\n    // =========================================================================\n\n    // INITIAL_SAME__BASE_SAME\n    #[test]\n    fn initial_same_base_same_relative() {\n        let initial = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        let base = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        let target = ParsedUrl::parse(\"/\", Some(&base));\n        assert_eq!(\n            TargetDomainRelation::get_by_urls(&initial, &base, &target),\n            TargetDomainRelation::InitialSameBaseSame\n        );\n    }\n\n    #[test]\n    fn initial_same_base_same_absolute() {\n        let initial = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        let base = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        let target = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        assert_eq!(\n            TargetDomainRelation::get_by_urls(&initial, &base, &target),\n            TargetDomainRelation::InitialSameBaseSame\n        );\n    }\n\n    #[test]\n    fn initial_same_base_same_protocol_relative() {\n        let initial = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        let base = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        let target = ParsedUrl::parse(\"//www.siteone.io/\", None);\n        assert_eq!(\n            TargetDomainRelation::get_by_urls(&initial, &base, &target),\n            TargetDomainRelation::InitialSameBaseSame\n        );\n    }\n\n    // INITIAL_SAME__BASE_DIFFERENT (backlink)\n    #[test]\n    fn initial_same_base_different_absolute() {\n        let initial = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        let base = ParsedUrl::parse(\"https://nextjs.org/\", None);\n        let target = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        assert_eq!(\n            TargetDomainRelation::get_by_urls(&initial, &base, &target),\n            TargetDomainRelation::InitialSameBaseDifferent\n        );\n    }\n\n    #[test]\n    fn initial_same_base_different_protocol_relative() {\n        let initial = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        let base = ParsedUrl::parse(\"https://nextjs.org/\", None);\n        let target = ParsedUrl::parse(\"//www.siteone.io/\", None);\n        assert_eq!(\n            TargetDomainRelation::get_by_urls(&initial, &base, &target),\n            TargetDomainRelation::InitialSameBaseDifferent\n        );\n    }\n\n    // INITIAL_DIFFERENT__BASE_SAME\n    #[test]\n    fn initial_different_base_same_relative() {\n        let initial = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        let base = ParsedUrl::parse(\"https://nextjs.org/\", None);\n        let target = ParsedUrl::parse(\"/\", Some(&base));\n        assert_eq!(\n            TargetDomainRelation::get_by_urls(&initial, &base, &target),\n            TargetDomainRelation::InitialDifferentBaseSame\n        );\n    }\n\n    #[test]\n    fn initial_different_base_same_absolute() {\n        let initial = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        let base = ParsedUrl::parse(\"https://nextjs.org/\", None);\n        let target = ParsedUrl::parse(\"https://nextjs.org/\", None);\n        assert_eq!(\n            TargetDomainRelation::get_by_urls(&initial, &base, &target),\n            TargetDomainRelation::InitialDifferentBaseSame\n        );\n    }\n\n    #[test]\n    fn initial_different_base_same_protocol_relative() {\n        let initial = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        let base = ParsedUrl::parse(\"https://nextjs.org/\", None);\n        let target = ParsedUrl::parse(\"//nextjs.org\", None);\n        assert_eq!(\n            TargetDomainRelation::get_by_urls(&initial, &base, &target),\n            TargetDomainRelation::InitialDifferentBaseSame\n        );\n    }\n\n    // INITIAL_DIFFERENT__BASE_DIFFERENT\n    #[test]\n    fn initial_different_base_different_absolute() {\n        let initial = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        let base = ParsedUrl::parse(\"https://nextjs.org/\", None);\n        let target = ParsedUrl::parse(\"https://svelte.dev/\", None);\n        assert_eq!(\n            TargetDomainRelation::get_by_urls(&initial, &base, &target),\n            TargetDomainRelation::InitialDifferentBaseDifferent\n        );\n    }\n\n    #[test]\n    fn initial_different_base_different_protocol_relative() {\n        let initial = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        let base = ParsedUrl::parse(\"https://nextjs.org/\", None);\n        let target = ParsedUrl::parse(\"//svelte.dev\", None);\n        assert_eq!(\n            TargetDomainRelation::get_by_urls(&initial, &base, &target),\n            TargetDomainRelation::InitialDifferentBaseDifferent\n        );\n    }\n\n    #[test]\n    fn initial_different_base_different_same_initial_base() {\n        let initial = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        let base = ParsedUrl::parse(\"https://www.siteone.io/\", None);\n        let target = ParsedUrl::parse(\"//svelte.dev\", None);\n        assert_eq!(\n            TargetDomainRelation::get_by_urls(&initial, &base, &target),\n            TargetDomainRelation::InitialDifferentBaseDifferent\n        );\n    }\n\n    // Host-level tests (existing, kept)\n    #[test]\n    fn test_target_empty() {\n        let result = TargetDomainRelation::get_by_hosts(Some(\"www.siteone.io\"), Some(\"www.siteone.io\"), None);\n        assert_eq!(result, TargetDomainRelation::InitialSameBaseSame);\n    }\n}\n"
  },
  {
    "path": "src/extra_column.rs",
    "content": "// SiteOne Crawler - ExtraColumn\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse regex::Regex;\nuse scraper::{Html, Selector};\n\nuse crate::error::CrawlerError;\n\npub const CUSTOM_METHOD_XPATH: &str = \"xpath\";\npub const CUSTOM_METHOD_REGEXP: &str = \"regexp\";\n\n#[derive(Debug, Clone, serde::Serialize)]\n#[serde(rename_all = \"camelCase\")]\npub struct ExtraColumn {\n    pub name: String,\n    pub length: Option<usize>,\n    pub truncate: bool,\n    pub custom_method: Option<String>,\n    pub custom_pattern: Option<String>,\n    pub custom_group: Option<usize>,\n    #[serde(skip)]\n    compiled_regex: Option<Regex>,\n}\n\nfn default_column_size(name: &str) -> Option<usize> {\n    match name {\n        \"Title\" => Some(20),\n        \"Description\" => Some(20),\n        \"Keywords\" => Some(20),\n        _ => None,\n    }\n}\n\nimpl ExtraColumn {\n    pub fn new(\n        name: String,\n        length: Option<usize>,\n        truncate: bool,\n        custom_method: Option<String>,\n        custom_pattern: Option<String>,\n        custom_group: Option<usize>,\n    ) -> Result<Self, CrawlerError> {\n        let validated_method = if let Some(ref method) = custom_method {\n            let method_lower = method.to_lowercase();\n            if method_lower != CUSTOM_METHOD_XPATH && method_lower != CUSTOM_METHOD_REGEXP {\n                return Err(CrawlerError::Config(format!(\n                    \"Invalid custom extraction method: {}. Expected '{}' or '{}'.\",\n                    method, CUSTOM_METHOD_XPATH, CUSTOM_METHOD_REGEXP\n                )));\n            }\n\n            if method_lower == CUSTOM_METHOD_REGEXP\n                && let Some(ref pattern) = custom_pattern\n            {\n                // Validate the regex pattern\n                if Regex::new(pattern).is_err() {\n                    return Err(CrawlerError::Config(format!(\n                        \"Invalid regexp pattern provided: {}\",\n                        pattern\n                    )));\n                }\n            }\n\n            Some(method_lower)\n        } else {\n            None\n        };\n\n        let compiled_regex = if validated_method.as_deref() == Some(CUSTOM_METHOD_REGEXP) {\n            custom_pattern.as_deref().and_then(|p| Regex::new(p).ok())\n        } else {\n            None\n        };\n\n        Ok(Self {\n            name,\n            length,\n            truncate,\n            custom_method: validated_method,\n            custom_pattern,\n            custom_group,\n            compiled_regex,\n        })\n    }\n\n    pub fn get_length(&self) -> usize {\n        self.length.unwrap_or(self.name.len())\n    }\n\n    pub fn get_truncated_value(&self, value: Option<&str>) -> Option<String> {\n        let value = value?;\n\n        let length = self.get_length();\n        if self.truncate && value.chars().count() > length {\n            let truncated: String = value.chars().take(length.saturating_sub(1)).collect();\n            Some(format!(\"{}…\", truncated.trim()))\n        } else {\n            Some(value.to_string())\n        }\n    }\n\n    pub fn from_text(text: &str) -> Result<ExtraColumn, CrawlerError> {\n        // If the string contains '=', then it is a custom extraction.\n        if text.contains('=') {\n            let re = Regex::new(r\"^([^=]+)=(xpath|regexp):(.+?)(?:#(\\d+))?(?:\\((\\d+)(>?)\\))?$\")\n                .map_err(|e| CrawlerError::Parse(e.to_string()))?;\n\n            if let Some(caps) = re.captures(text) {\n                let name = caps.get(1).map_or(\"\", |m| m.as_str()).trim().to_string();\n                let custom_method = Some(caps.get(2).map_or(\"\", |m| m.as_str()).to_lowercase());\n                let custom_pattern = Some(caps.get(3).map_or(\"\", |m| m.as_str()).trim().to_string());\n                let custom_group = caps\n                    .get(4)\n                    .and_then(|m| {\n                        let s = m.as_str();\n                        if s.is_empty() { None } else { s.parse::<usize>().ok() }\n                    })\n                    .or(Some(0));\n\n                let (length, truncate) = if let Some(len_match) = caps.get(5) {\n                    let len = len_match.as_str().parse::<usize>().unwrap_or(0);\n                    let trunc = caps.get(6).is_none_or(|m| m.as_str() != \">\");\n                    (Some(len), trunc)\n                } else {\n                    (None, true)\n                };\n\n                return ExtraColumn::new(name, length, truncate, custom_method, custom_pattern, custom_group);\n            }\n\n            // If parsing of the custom syntax fails, return a standard column.\n            return ExtraColumn::new(text.trim().to_string(), None, true, None, None, None);\n        }\n\n        // Standard column parsing\n        let re = Regex::new(r\"^([^(]+)(\\((\\d+)(>?)\\))?$\").map_err(|e| CrawlerError::Parse(e.to_string()))?;\n\n        if let Some(caps) = re.captures(text) {\n            let name = caps.get(1).map_or(\"\", |m| m.as_str()).trim().to_string();\n\n            let (length, truncate) = if let Some(len_match) = caps.get(3) {\n                let len = len_match.as_str().parse::<usize>().unwrap_or(0);\n                let trunc = caps.get(4).is_none_or(|m| m.as_str() != \">\");\n                (Some(len), trunc)\n            } else {\n                (default_column_size(&name), true)\n            };\n\n            ExtraColumn::new(name, length, truncate, None, None, None)\n        } else {\n            ExtraColumn::new(text.trim().to_string(), None, true, None, None, None)\n        }\n    }\n\n    pub fn extract_value(&self, text: &str) -> Option<String> {\n        let method = self.custom_method.as_deref()?;\n        let pattern = self.custom_pattern.as_deref()?;\n\n        match method {\n            CUSTOM_METHOD_REGEXP => {\n                let re = self.compiled_regex.as_ref()?;\n                let caps = re.captures(text)?;\n                let group = self.custom_group.unwrap_or(0);\n                caps.get(group).map(|m| m.as_str().to_string())\n            }\n            CUSTOM_METHOD_XPATH => {\n                let index = self.custom_group.unwrap_or(0);\n                Self::extract_xpath(text, pattern, index)\n            }\n            _ => None,\n        }\n    }\n\n    /// Extract value using XPath-like pattern via CSS selector conversion.\n    /// Supports common XPath patterns used in web scraping:\n    ///   //tag                     -> tag\n    ///   //tag[@attr='value']      -> tag[attr='value']\n    ///   //tag/@attr               -> tag (then read attribute)\n    ///   //tag[@attr='value']/@x   -> tag[attr='value'] (then read attribute x)\n    fn extract_xpath(html: &str, xpath: &str, index: usize) -> Option<String> {\n        let document = Html::parse_document(html);\n\n        // Strip /text() suffix — it's XPath shorthand for \"get text content\", which is the\n        // default behaviour when no attribute is requested. CSS selectors don't support text().\n        let xpath = xpath.strip_suffix(\"/text()\").unwrap_or(xpath);\n\n        // Detect if XPath ends with /@attribute — means we want an attribute value\n        let (xpath_base, target_attr) = if let Some(idx) = xpath.rfind(\"/@\") {\n            (&xpath[..idx], Some(&xpath[idx + 2..]))\n        } else {\n            (xpath, None)\n        };\n\n        // Convert XPath to CSS selector\n        let css = xpath_to_css(xpath_base);\n        let selector = Selector::parse(&css).ok()?;\n\n        let mut nodes = document.select(&selector);\n\n        if let Some(element) = nodes.nth(index) {\n            if let Some(attr) = target_attr {\n                // Return attribute value\n                element.value().attr(attr).map(|v| v.trim().to_string())\n            } else {\n                // Return text content\n                let text: String = element.text().collect::<Vec<_>>().join(\"\");\n                let trimmed = text.trim().to_string();\n                if trimmed.is_empty() { None } else { Some(trimmed) }\n            }\n        } else {\n            None\n        }\n    }\n}\n\n/// Convert common XPath expressions to CSS selectors.\nfn xpath_to_css(xpath: &str) -> String {\n    let mut s = xpath.to_string();\n\n    // Strip leading // or /\n    if s.starts_with(\"//\") {\n        s = s[2..].to_string();\n    } else if s.starts_with('/') {\n        s = s[1..].to_string();\n    }\n\n    // Replace // (descendant) with space (CSS descendant combinator)\n    s = s.replace(\"//\", \" \");\n\n    // Replace / (child) with > (CSS child combinator)\n    s = s.replace('/', \" > \");\n\n    s\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    // -- from_text parsing --\n\n    #[test]\n    fn parse_simple_name_uses_default_length() {\n        let col = ExtraColumn::from_text(\"Title\").unwrap();\n        assert_eq!(col.name, \"Title\");\n        assert_eq!(col.length, Some(20)); // default for \"Title\"\n        assert!(col.custom_method.is_none());\n    }\n\n    #[test]\n    fn parse_name_with_explicit_length() {\n        let col = ExtraColumn::from_text(\"Custom(50)\").unwrap();\n        assert_eq!(col.name, \"Custom\");\n        assert_eq!(col.length, Some(50));\n        assert!(col.truncate);\n    }\n\n    #[test]\n    fn parse_name_with_no_truncate() {\n        let col = ExtraColumn::from_text(\"Wide(30>)\").unwrap();\n        assert_eq!(col.name, \"Wide\");\n        assert_eq!(col.length, Some(30));\n        assert!(!col.truncate);\n    }\n\n    #[test]\n    fn parse_regexp_method() {\n        let col = ExtraColumn::from_text(\"X=regexp:<title>(.+?)</title>\").unwrap();\n        assert_eq!(col.custom_method.as_deref(), Some(\"regexp\"));\n        assert!(col.custom_pattern.is_some());\n    }\n\n    #[test]\n    fn parse_xpath_method() {\n        let col = ExtraColumn::from_text(\"X=xpath://h1\").unwrap();\n        assert_eq!(col.custom_method.as_deref(), Some(\"xpath\"));\n    }\n\n    #[test]\n    fn parse_invalid_method_returns_error() {\n        let result = ExtraColumn::from_text(\"X=invalid:foo\");\n        // \"invalid\" is not a valid method, but from_text falls back to standard column\n        // The actual error comes from ExtraColumn::new when method is validated\n        // from_text with unrecognized format returns a standard column, not an error\n        assert!(result.is_ok()); // falls back to standard column\n        let col = result.unwrap();\n        assert!(col.custom_method.is_none());\n    }\n\n    // -- extract_value regexp --\n\n    #[test]\n    fn extract_regexp_matching() {\n        let col = ExtraColumn::new(\n            \"X\".to_string(),\n            None,\n            true,\n            Some(\"regexp\".to_string()),\n            Some(\"<title>(.+?)</title>\".to_string()),\n            Some(1),\n        )\n        .unwrap();\n        assert_eq!(col.extract_value(\"<title>Hello</title>\"), Some(\"Hello\".to_string()));\n    }\n\n    #[test]\n    fn extract_regexp_not_matching() {\n        let col = ExtraColumn::new(\n            \"X\".to_string(),\n            None,\n            true,\n            Some(\"regexp\".to_string()),\n            Some(\"<title>(.+?)</title>\".to_string()),\n            Some(1),\n        )\n        .unwrap();\n        assert_eq!(col.extract_value(\"<p>No title here</p>\"), None);\n    }\n\n    // -- extract_value xpath --\n\n    #[test]\n    fn extract_xpath_h1() {\n        let col = ExtraColumn::new(\n            \"X\".to_string(),\n            None,\n            true,\n            Some(\"xpath\".to_string()),\n            Some(\"//h1\".to_string()),\n            Some(0),\n        )\n        .unwrap();\n        let html = \"<html><body><h1>Title</h1></body></html>\";\n        assert_eq!(col.extract_value(html), Some(\"Title\".to_string()));\n    }\n\n    #[test]\n    fn extract_xpath_h1_with_text_suffix() {\n        // //h1/text() is a common XPath pattern — the /text() suffix must be stripped\n        // before CSS conversion since CSS selectors don't support text().\n        let col = ExtraColumn::new(\n            \"X\".to_string(),\n            None,\n            true,\n            Some(\"xpath\".to_string()),\n            Some(\"//h1/text()\".to_string()),\n            Some(0),\n        )\n        .unwrap();\n        let html = \"<html><body><h1>My Heading</h1></body></html>\";\n        assert_eq!(col.extract_value(html), Some(\"My Heading\".to_string()));\n    }\n\n    #[test]\n    fn extract_xpath_attribute() {\n        let col = ExtraColumn::new(\n            \"X\".to_string(),\n            None,\n            true,\n            Some(\"xpath\".to_string()),\n            Some(\"//a/@href\".to_string()),\n            Some(0),\n        )\n        .unwrap();\n        let html = \"<html><body><a href=\\\"https://example.com\\\">Link</a></body></html>\";\n        assert_eq!(col.extract_value(html), Some(\"https://example.com\".to_string()));\n    }\n\n    #[test]\n    fn extract_xpath_not_found() {\n        let col = ExtraColumn::new(\n            \"X\".to_string(),\n            None,\n            true,\n            Some(\"xpath\".to_string()),\n            Some(\"//h2\".to_string()),\n            Some(0),\n        )\n        .unwrap();\n        let html = \"<html><body><h1>Only H1</h1></body></html>\";\n        assert_eq!(col.extract_value(html), None);\n    }\n\n    // -- get_truncated_value --\n\n    #[test]\n    fn truncated_value_truncates_when_longer() {\n        let col = ExtraColumn::new(\"X\".to_string(), Some(3), true, None, None, None).unwrap();\n        // Takes length-1 chars (2) and appends \"…\" → total 3 visible chars\n        assert_eq!(col.get_truncated_value(Some(\"Hello\")), Some(\"He…\".to_string()));\n    }\n\n    #[test]\n    fn truncated_value_none_returns_none() {\n        let col = ExtraColumn::new(\"X\".to_string(), Some(3), true, None, None, None).unwrap();\n        assert_eq!(col.get_truncated_value(None), None);\n    }\n}\n"
  },
  {
    "path": "src/info.rs",
    "content": "// SiteOne Crawler - Info\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\nuse serde::{Deserialize, Serialize};\r\n\r\n#[derive(Debug, Clone, Serialize, Deserialize)]\r\npub struct Info {\r\n    pub name: String,\r\n    pub version: String,\r\n    pub executed_at: String,\r\n    pub command: String,\r\n    pub hostname: String,\r\n    pub final_user_agent: String,\r\n    /// The initial URL passed via --url option\r\n    pub initial_url: String,\r\n}\r\n\r\nimpl Info {\r\n    pub fn new(\r\n        name: String,\r\n        version: String,\r\n        executed_at: String,\r\n        command: String,\r\n        hostname: String,\r\n        final_user_agent: String,\r\n        initial_url: String,\r\n    ) -> Self {\r\n        Self {\r\n            name,\r\n            version,\r\n            executed_at,\r\n            command,\r\n            hostname,\r\n            final_user_agent,\r\n            initial_url,\r\n        }\r\n    }\r\n\r\n    pub fn set_final_user_agent(&mut self, final_user_agent: String) {\r\n        self.final_user_agent = final_user_agent;\r\n    }\r\n}\r\n"
  },
  {
    "path": "src/lib.rs",
    "content": "// SiteOne Crawler - Library root\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\npub mod analysis;\r\npub mod components;\r\npub mod content_processor;\r\npub mod debugger;\r\npub mod engine;\r\npub mod error;\r\npub mod export;\r\npub mod extra_column;\r\npub mod info;\r\npub mod options;\r\npub mod output;\r\npub mod result;\r\npub mod scoring;\r\npub mod server;\r\npub mod types;\r\npub mod utils;\r\npub mod version;\r\npub mod wizard;\r\n"
  },
  {
    "path": "src/main.rs",
    "content": "// SiteOne Crawler - Main entry point\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse siteone_crawler::engine::initiator::Initiator;\nuse siteone_crawler::utils;\n\n#[tokio::main]\nasync fn main() {\n    // Install the default crypto provider for rustls (needed by SSL/TLS analyzer)\n    let _ = rustls::crypto::ring::default_provider().install_default();\n\n    // Force ANSI color output\n    utils::force_enabled_colors();\n\n    // Set timezone early, before tokio runtime spawns threads.\n    // We check argv directly to avoid duplicating full option parsing.\n    {\n        let argv: Vec<String> = std::env::args().collect();\n        for i in 0..argv.len() {\n            if let Some(tz) = argv[i].strip_prefix(\"--timezone=\") {\n                // SAFETY: Called before any threads are spawned by the runtime\n                unsafe {\n                    std::env::set_var(\"TZ\", tz);\n                }\n                break;\n            } else if argv[i] == \"--timezone\" && i + 1 < argv.len() {\n                unsafe {\n                    std::env::set_var(\"TZ\", &argv[i + 1]);\n                }\n                break;\n            }\n        }\n    }\n\n    let mut argv: Vec<String> = std::env::args().collect();\n\n    // Interactive wizard: when no args given AND stdin/stdout are interactive TTYs,\n    // show a guided wizard instead of the error + help wall. (GitHub issue #93)\n    let launched_via_wizard = argv.len() == 1 && siteone_crawler::wizard::is_interactive_tty();\n    if launched_via_wizard {\n        match siteone_crawler::wizard::run_wizard() {\n            Ok(wizard_argv) => argv = wizard_argv,\n            Err(_) => {\n                std::process::exit(0);\n            }\n        }\n    }\n\n    // Create initiator (parses CLI args, handles --help/--version)\n    // On error: show ERROR, then help, then ERROR again\n    let initiator = match Initiator::new(&argv) {\n        Ok(i) => i,\n        Err(e) => {\n            // Extract inner message (strip \"Config error: \" prefix for display)\n            let msg = match &e {\n                siteone_crawler::error::CrawlerError::Config(inner) => inner.clone(),\n                other => other.to_string(),\n            };\n            eprint!(\"{}\", utils::get_color_text(&format!(\"ERROR: {}\", msg), \"red\", false));\n            Initiator::print_help();\n            eprintln!(\n                \"{}\",\n                utils::get_color_text(&format!(\"\\nERROR: {}\\n\", msg), \"red\", false)\n            );\n            std::process::exit(101);\n        }\n    };\n\n    // Check for serve mode (built-in HTTP server for browsing exports)\n    let serve_markdown = initiator.get_options().serve_markdown_dir.clone();\n    let serve_offline = initiator.get_options().serve_offline_dir.clone();\n    let serve_port = initiator.get_options().serve_port as u16;\n    let serve_bind = initiator.get_options().serve_bind_address.clone();\n\n    if let Some(dir) = serve_markdown {\n        siteone_crawler::server::run(\n            std::path::PathBuf::from(dir),\n            siteone_crawler::server::ServeMode::Markdown,\n            serve_port,\n            &serve_bind,\n        )\n        .await;\n        return;\n    }\n    if let Some(dir) = serve_offline {\n        siteone_crawler::server::run(\n            std::path::PathBuf::from(dir),\n            siteone_crawler::server::ServeMode::Offline,\n            serve_port,\n            &serve_bind,\n        )\n        .await;\n        return;\n    }\n\n    // Check for html-to-markdown mode (standalone file conversion, no crawling)\n    if let Some(html_file) = initiator.get_options().html_to_markdown_file.clone() {\n        let options = initiator.get_options();\n        match siteone_crawler::export::markdown_exporter::convert_html_file_to_markdown(\n            &html_file,\n            options.markdown_exclude_selector.clone(),\n            options.markdown_disable_images,\n            options.markdown_disable_files,\n            options.markdown_move_content_before_h1_to_end,\n        ) {\n            Ok(markdown) => {\n                if let Some(output_path) = &options.html_to_markdown_output {\n                    if let Err(e) = std::fs::write(output_path, &markdown) {\n                        eprintln!(\n                            \"{}\",\n                            siteone_crawler::utils::get_color_text(\n                                &format!(\"ERROR: Cannot write output file '{}': {}\", output_path, e),\n                                \"red\",\n                                false,\n                            )\n                        );\n                        std::process::exit(1);\n                    }\n                    eprintln!(\n                        \"{}\",\n                        siteone_crawler::utils::get_color_text(\n                            &format!(\"Markdown written to '{}'\", output_path),\n                            \"green\",\n                            false,\n                        )\n                    );\n                } else {\n                    print!(\"{}\", markdown);\n                }\n            }\n            Err(e) => {\n                eprintln!(\n                    \"{}\",\n                    siteone_crawler::utils::get_color_text(&format!(\"ERROR: {}\", e), \"red\", false,)\n                );\n                std::process::exit(1);\n            }\n        }\n        return;\n    }\n\n    // Create manager from initiator\n    let mut manager = match initiator.create_manager() {\n        Ok(m) => m,\n        Err(e) => {\n            eprintln!(\"Error initializing crawler: {}\", e);\n            std::process::exit(1);\n        }\n    };\n\n    // Run the crawler\n    match manager.run().await {\n        Ok(exit_code) => {\n            if launched_via_wizard {\n                if let Some((dir, kind)) = siteone_crawler::wizard::offer_serve_after_export(&argv) {\n                    let serve_mode = if kind == \"offline\" {\n                        siteone_crawler::server::ServeMode::Offline\n                    } else {\n                        siteone_crawler::server::ServeMode::Markdown\n                    };\n                    siteone_crawler::server::run(std::path::PathBuf::from(&dir), serve_mode, serve_port, &serve_bind)\n                        .await;\n                } else {\n                    siteone_crawler::wizard::press_enter_to_exit();\n                }\n            }\n            if exit_code != 0 {\n                std::process::exit(exit_code);\n            }\n        }\n        Err(e) => {\n            eprintln!(\"Crawler error: {}\", e);\n            if launched_via_wizard {\n                siteone_crawler::wizard::press_enter_to_exit();\n            }\n            std::process::exit(1);\n        }\n    }\n}\n"
  },
  {
    "path": "src/options/core_options.rs",
    "content": "// SiteOne Crawler - Core options (all CLI options)\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n\nuse regex::Regex;\n\nuse crate::debugger;\nuse crate::error::CrawlerError;\nuse crate::extra_column::ExtraColumn;\nuse crate::types::{DeviceType, OutputType};\n\nuse super::group::OptionGroup;\nuse super::option::{CrawlerOption, OptionValue};\nuse super::option_type::OptionType;\nuse super::options::Options;\n\npub const GROUP_BASIC_SETTINGS: &str = \"basic-settings\";\npub const GROUP_OUTPUT_SETTINGS: &str = \"output-settings\";\npub const GROUP_RESOURCE_FILTERING: &str = \"resource-filtering\";\npub const GROUP_ADVANCED_CRAWLER_SETTINGS: &str = \"advanced-crawler-settings\";\npub const GROUP_EXPERT_SETTINGS: &str = \"expert-settings\";\npub const GROUP_FILE_EXPORT_SETTINGS: &str = \"file-export-settings\";\npub const GROUP_MAILER_SETTINGS: &str = \"mailer-settings\";\npub const GROUP_MARKDOWN_EXPORT_SETTINGS: &str = \"markdown-export-settings\";\npub const GROUP_OFFLINE_EXPORT_SETTINGS: &str = \"offline-export-settings\";\npub const GROUP_SITEMAP_SETTINGS: &str = \"sitemap-settings\";\npub const GROUP_UPLOAD_SETTINGS: &str = \"upload-settings\";\npub const GROUP_FASTEST_ANALYZER: &str = \"fastest-analyzer\";\npub const GROUP_SEO_AND_OPENGRAPH_ANALYZER: &str = \"seo-and-opengraph-analyzer\";\npub const GROUP_SLOWEST_ANALYZER: &str = \"slowest-analyzer\";\npub const GROUP_CI_CD_SETTINGS: &str = \"ci-cd-settings\";\npub const GROUP_SERVER_SETTINGS: &str = \"server-settings\";\n\n/// Result storage type\n#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]\n#[serde(rename_all = \"lowercase\")]\npub enum StorageType {\n    Memory,\n    File,\n}\n\nimpl StorageType {\n    pub fn from_text(text: &str) -> Result<Self, CrawlerError> {\n        match text.trim().to_lowercase().as_str() {\n            \"memory\" => Ok(StorageType::Memory),\n            \"file\" => Ok(StorageType::File),\n            other => Err(CrawlerError::Config(format!(\n                \"Unknown storage type '{}'. Supported values are: memory, file\",\n                other\n            ))),\n        }\n    }\n\n    pub fn as_str(&self) -> &'static str {\n        match self {\n            StorageType::Memory => \"memory\",\n            StorageType::File => \"file\",\n        }\n    }\n}\n\n#[derive(Debug, Clone, serde::Serialize)]\n#[serde(rename_all = \"camelCase\")]\npub struct CoreOptions {\n    // basic settings\n    pub url: String,\n    pub single_page: bool,\n    pub max_depth: i64,\n    pub device: DeviceType,\n    pub user_agent: Option<String>,\n    pub timeout: i64,\n    pub proxy: Option<String>,\n    pub http_auth: Option<String>,\n    pub accept_invalid_certs: bool,\n    pub timezone: Option<String>,\n    pub show_version_only: bool,\n    pub show_help_only: bool,\n\n    // output settings\n    pub output_type: OutputType,\n    pub url_column_size: Option<i64>,\n    pub show_inline_criticals: bool,\n    pub show_inline_warnings: bool,\n    pub rows_limit: i64,\n    pub extra_columns: Vec<ExtraColumn>,\n    pub extra_columns_names_only: Vec<String>,\n    pub show_scheme_and_host: bool,\n    pub do_not_truncate_url: bool,\n    pub hide_progress_bar: bool,\n    pub hide_columns: Vec<String>,\n    pub no_color: bool,\n    pub force_color: bool,\n    pub console_width: Option<i64>,\n\n    // resource filtering\n    pub disable_all_assets: bool,\n    pub disable_javascript: bool,\n    pub disable_styles: bool,\n    pub disable_fonts: bool,\n    pub disable_images: bool,\n    pub disable_files: bool,\n    pub remove_all_anchor_listeners: bool,\n\n    // advanced crawler settings\n    pub workers: i64,\n    pub max_reqs_per_sec: f64,\n    pub memory_limit: String,\n    pub resolve: Vec<String>,\n    pub websocket_server: Option<String>,\n    pub ignore_robots_txt: bool,\n    pub allowed_domains_for_external_files: Vec<String>,\n    pub allowed_domains_for_crawling: Vec<String>,\n    pub single_foreign_page: bool,\n    pub result_storage: StorageType,\n    pub result_storage_dir: String,\n    pub result_storage_compression: bool,\n    pub accept_encoding: String,\n    pub max_queue_length: i64,\n    pub max_visited_urls: i64,\n    pub max_url_length: i64,\n    pub max_skipped_urls: i64,\n    pub max_non200_responses_per_basename: i64,\n    pub include_regex: Vec<String>,\n    pub ignore_regex: Vec<String>,\n    pub regex_filtering_only_for_pages: bool,\n    pub analyzer_filter_regex: Option<String>,\n    pub add_random_query_params: bool,\n    pub remove_query_params: bool,\n    pub keep_query_params: Vec<String>,\n    pub transform_url: Vec<String>,\n    pub force_relative_urls: bool,\n\n    // file export settings\n    pub output_html_report: Option<String>,\n    pub html_report_options: Option<String>,\n    pub output_json_file: Option<String>,\n    pub output_text_file: Option<String>,\n    pub add_host_to_output_file: bool,\n    pub add_timestamp_to_output_file: bool,\n\n    // sitemap settings\n    pub sitemap_xml_file: Option<String>,\n    pub sitemap_txt_file: Option<String>,\n    pub sitemap_base_priority: f64,\n    pub sitemap_priority_increase: f64,\n\n    // offline export settings\n    pub offline_export_dir: Option<String>,\n    pub offline_export_store_only_url_regex: Vec<String>,\n    pub offline_export_remove_unwanted_code: bool,\n    pub offline_export_no_auto_redirect_html: bool,\n    pub offline_export_preserve_url_structure: bool,\n    pub offline_export_preserve_urls: bool,\n    pub replace_content: Vec<String>,\n    pub replace_query_string: Vec<String>,\n    pub offline_export_lowercase: bool,\n    pub ignore_store_file_error: bool,\n    pub disable_astro_inline_modules: bool,\n\n    // markdown export settings\n    pub markdown_export_dir: Option<String>,\n    pub markdown_export_single_file: Option<String>,\n    pub markdown_move_content_before_h1_to_end: bool,\n    pub markdown_disable_images: bool,\n    pub markdown_disable_files: bool,\n    pub markdown_remove_links_and_images_from_single_file: bool,\n    pub markdown_exclude_selector: Vec<String>,\n    pub markdown_replace_content: Vec<String>,\n    pub markdown_replace_query_string: Vec<String>,\n    pub markdown_export_store_only_url_regex: Vec<String>,\n    pub markdown_ignore_store_file_error: bool,\n\n    // mailer settings\n    pub mail_to: Vec<String>,\n    pub mail_from: String,\n    pub mail_from_name: String,\n    pub mail_subject_template: String,\n    pub mail_smtp_host: String,\n    pub mail_smtp_port: i64,\n    pub mail_smtp_user: Option<String>,\n    pub mail_smtp_pass: Option<String>,\n\n    // upload settings\n    pub upload_enabled: bool,\n    pub upload_to: String,\n    pub upload_retention: String,\n    pub upload_password: Option<String>,\n    pub upload_timeout: i64,\n\n    // expert settings\n    pub http_cache_dir: Option<String>,\n    pub http_cache_compression: bool,\n    pub http_cache_ttl: Option<u64>,\n    pub debug: bool,\n    pub debug_log_file: Option<String>,\n    pub debug_url_regex: Vec<String>,\n\n    // fastest analyzer settings\n    pub fastest_top_limit: i64,\n    pub fastest_max_time: f64,\n\n    // seo and opengraph analyzer settings\n    pub max_heading_level: i64,\n\n    // slowest analyzer settings\n    pub slowest_top_limit: i64,\n    pub slowest_min_time: f64,\n    pub slowest_max_time: f64,\n\n    // server settings\n    pub serve_markdown_dir: Option<String>,\n    pub serve_offline_dir: Option<String>,\n    pub serve_port: i64,\n    pub serve_bind_address: String,\n\n    // html-to-markdown converter mode (standalone, no crawl)\n    pub html_to_markdown_file: Option<String>,\n    pub html_to_markdown_output: Option<String>,\n\n    // ci/cd settings\n    pub ci: bool,\n    pub ci_min_score: f64,\n    pub ci_min_performance: Option<f64>,\n    pub ci_min_seo: Option<f64>,\n    pub ci_min_security: Option<f64>,\n    pub ci_min_accessibility: Option<f64>,\n    pub ci_min_best_practices: Option<f64>,\n    pub ci_max_404: i64,\n    pub ci_max_5xx: i64,\n    pub ci_max_criticals: i64,\n    pub ci_max_warnings: Option<i64>,\n    pub ci_max_avg_response: Option<f64>,\n    pub ci_min_pages: i64,\n    pub ci_min_assets: i64,\n    pub ci_min_documents: i64,\n}\n\nimpl CoreOptions {\n    /// Create CoreOptions by parsing values from a populated Options registry.\n    pub fn from_options(options: &Options) -> Result<Self, CrawlerError> {\n        // Determine output directory prefix: try ./tmp/ first, fallback to system data dir\n        let output_prefix = default_output_prefix();\n\n        let mut core = CoreOptions {\n            // basic settings\n            url: String::new(),\n            single_page: false,\n            max_depth: 0,\n            device: DeviceType::Desktop,\n            user_agent: None,\n            timeout: 5,\n            proxy: None,\n            http_auth: None,\n            accept_invalid_certs: false,\n            timezone: None,\n            show_version_only: false,\n            show_help_only: false,\n\n            // output settings\n            output_type: OutputType::Text,\n            url_column_size: None,\n            show_inline_criticals: false,\n            show_inline_warnings: false,\n            rows_limit: 200,\n            extra_columns: Vec::new(),\n            extra_columns_names_only: Vec::new(),\n            show_scheme_and_host: false,\n            do_not_truncate_url: false,\n            hide_progress_bar: false,\n            hide_columns: Vec::new(),\n            no_color: false,\n            force_color: false,\n            console_width: None,\n\n            // resource filtering\n            disable_all_assets: false,\n            disable_javascript: false,\n            disable_styles: false,\n            disable_fonts: false,\n            disable_images: false,\n            disable_files: false,\n            remove_all_anchor_listeners: false,\n\n            // advanced crawler settings\n            workers: 3,\n            max_reqs_per_sec: 10.0,\n            memory_limit: \"2048M\".to_string(),\n            resolve: Vec::new(),\n            websocket_server: None,\n            ignore_robots_txt: false,\n            allowed_domains_for_external_files: Vec::new(),\n            allowed_domains_for_crawling: Vec::new(),\n            single_foreign_page: false,\n            result_storage: StorageType::Memory,\n            result_storage_dir: format!(\"{output_prefix}/result-storage\"),\n            result_storage_compression: false,\n            accept_encoding: \"gzip, deflate, br\".to_string(),\n            max_queue_length: 9000,\n            max_visited_urls: 10000,\n            max_url_length: 2083,\n            max_skipped_urls: 10000,\n            max_non200_responses_per_basename: 5,\n            include_regex: Vec::new(),\n            ignore_regex: Vec::new(),\n            regex_filtering_only_for_pages: false,\n            analyzer_filter_regex: None,\n            add_random_query_params: false,\n            remove_query_params: false,\n            keep_query_params: Vec::new(),\n            transform_url: Vec::new(),\n            force_relative_urls: false,\n\n            // file export settings (all 3 reports enabled by default)\n            output_html_report: Some(\n                std::path::Path::new(&output_prefix)\n                    .join(\"%domain%.report.%datetime%.html\")\n                    .to_string_lossy()\n                    .to_string(),\n            ),\n            html_report_options: None,\n            output_json_file: Some(\n                std::path::Path::new(&output_prefix)\n                    .join(\"%domain%.output.%datetime%.json\")\n                    .to_string_lossy()\n                    .to_string(),\n            ),\n            output_text_file: Some(\n                std::path::Path::new(&output_prefix)\n                    .join(\"%domain%.output.%datetime%.txt\")\n                    .to_string_lossy()\n                    .to_string(),\n            ),\n            add_host_to_output_file: false,\n            add_timestamp_to_output_file: false,\n\n            // sitemap settings\n            sitemap_xml_file: None,\n            sitemap_txt_file: None,\n            sitemap_base_priority: 0.5,\n            sitemap_priority_increase: 0.1,\n\n            // offline export settings\n            offline_export_dir: None,\n            offline_export_store_only_url_regex: Vec::new(),\n            offline_export_remove_unwanted_code: true,\n            offline_export_no_auto_redirect_html: false,\n            offline_export_preserve_url_structure: false,\n            offline_export_preserve_urls: false,\n            replace_content: Vec::new(),\n            replace_query_string: Vec::new(),\n            offline_export_lowercase: false,\n            ignore_store_file_error: false,\n            disable_astro_inline_modules: false,\n\n            // markdown export settings\n            markdown_export_dir: None,\n            markdown_export_single_file: None,\n            markdown_move_content_before_h1_to_end: false,\n            markdown_disable_images: false,\n            markdown_disable_files: false,\n            markdown_remove_links_and_images_from_single_file: false,\n            markdown_exclude_selector: Vec::new(),\n            markdown_replace_content: Vec::new(),\n            markdown_replace_query_string: Vec::new(),\n            markdown_export_store_only_url_regex: Vec::new(),\n            markdown_ignore_store_file_error: false,\n\n            // mailer settings\n            mail_to: Vec::new(),\n            mail_from: \"siteone-crawler@your-hostname.com\".to_string(),\n            mail_from_name: \"SiteOne Crawler\".to_string(),\n            mail_subject_template: \"Crawler Report for %domain% (%date%)\".to_string(),\n            mail_smtp_host: \"localhost\".to_string(),\n            mail_smtp_port: 25,\n            mail_smtp_user: None,\n            mail_smtp_pass: None,\n\n            // upload settings\n            upload_enabled: false,\n            upload_to: \"https://crawler.siteone.io/up\".to_string(),\n            upload_retention: \"30d\".to_string(),\n            upload_password: None,\n            upload_timeout: 3600,\n\n            // expert settings\n            http_cache_dir: Some(default_http_cache_dir()),\n            http_cache_compression: false,\n            http_cache_ttl: Some(24 * 3600), // 24 hours in seconds\n            debug: false,\n            debug_log_file: None,\n            debug_url_regex: Vec::new(),\n\n            // fastest analyzer settings\n            fastest_top_limit: 20,\n            fastest_max_time: 1.0,\n\n            // seo and opengraph analyzer settings\n            max_heading_level: 3,\n\n            // slowest analyzer settings\n            slowest_top_limit: 20,\n            slowest_min_time: 0.01,\n            slowest_max_time: 3.0,\n\n            // server settings\n            serve_markdown_dir: None,\n            serve_offline_dir: None,\n            serve_port: 8321,\n            serve_bind_address: \"127.0.0.1\".to_string(),\n\n            html_to_markdown_file: None,\n            html_to_markdown_output: None,\n\n            // ci/cd settings\n            ci: false,\n            ci_min_score: 5.0,\n            ci_min_performance: Some(5.0),\n            ci_min_seo: Some(5.0),\n            ci_min_security: Some(5.0),\n            ci_min_accessibility: Some(3.0),\n            ci_min_best_practices: Some(5.0),\n            ci_max_404: 0,\n            ci_max_5xx: 0,\n            ci_max_criticals: 0,\n            ci_max_warnings: None,\n            ci_max_avg_response: None,\n            ci_min_pages: 10,\n            ci_min_assets: 10,\n            ci_min_documents: 0,\n        };\n\n        // Populate from option groups\n        for (_apl_code, group) in options.get_groups() {\n            for (_prop_name, option) in &group.options {\n                let value = option.get_value()?;\n                core.apply_option_value(&option.property_to_fill, value)?;\n            }\n        }\n\n        // Disable all assets if set\n        if core.disable_all_assets {\n            core.disable_javascript = true;\n            core.disable_styles = true;\n            core.disable_fonts = true;\n            core.disable_images = true;\n            core.disable_files = true;\n        }\n\n        // In CI mode, disable default report outputs (user cares about exit code, not files).\n        // Only suppress outputs that weren't explicitly set by the user on the command line.\n        if core.ci {\n            if !options.is_explicitly_set(\"outputHtmlReport\") {\n                core.output_html_report = None;\n            }\n            if !options.is_explicitly_set(\"outputJsonFile\") {\n                core.output_json_file = None;\n            }\n            if !options.is_explicitly_set(\"outputTextFile\") {\n                core.output_text_file = None;\n            }\n        }\n\n        // Warn if --html-to-markdown-output is set without --html-to-markdown\n        if core.html_to_markdown_output.is_some() && core.html_to_markdown_file.is_none() {\n            return Err(CrawlerError::Config(\n                \"--html-to-markdown-output requires --html-to-markdown to be set.\".to_string(),\n            ));\n        }\n\n        // In html-to-markdown mode, validate input file and return early\n        if let Some(ref html_file) = core.html_to_markdown_file {\n            if !std::path::Path::new(html_file).exists() {\n                return Err(CrawlerError::Config(format!(\n                    \"HTML file '{}' does not exist.\",\n                    html_file\n                )));\n            }\n            if !std::path::Path::new(html_file).is_file() {\n                return Err(CrawlerError::Config(format!(\"'{}' is not a file.\", html_file)));\n            }\n            return Ok(core);\n        }\n\n        // In serve mode, skip normal crawl validation and return early\n        if core.serve_markdown_dir.is_some() || core.serve_offline_dir.is_some() {\n            return Ok(core);\n        }\n\n        // Validate required fields\n        if core.url.is_empty() {\n            return Err(CrawlerError::Config(\n                \"Invalid or undefined --url parameter.\".to_string(),\n            ));\n        }\n        if core.workers < 1 {\n            return Err(CrawlerError::Config(format!(\n                \"Invalid value '{}' (minimum is 1) for --workers\",\n                core.workers\n            )));\n        }\n\n        // Build extra_columns_names_only\n        core.extra_columns_names_only = core\n            .extra_columns\n            .iter()\n            .map(|ec| {\n                let re = Regex::new(r\"\\s*\\(.+$\").ok();\n                match re {\n                    Some(r) => r.replace(&ec.name, \"\").to_string(),\n                    None => ec.name.clone(),\n                }\n            })\n            .collect();\n\n        // Configure debugger\n        debugger::set_config(core.debug, core.debug_log_file.as_deref());\n\n        Ok(core)\n    }\n\n    fn apply_option_value(&mut self, property: &str, value: &OptionValue) -> Result<(), CrawlerError> {\n        match property {\n            \"url\" => {\n                if let Some(s) = value.as_str() {\n                    self.url = s.to_string();\n                }\n            }\n            \"singlePage\" => {\n                if let Some(b) = value.as_bool() {\n                    self.single_page = b;\n                }\n            }\n            \"maxDepth\" => {\n                if let Some(n) = value.as_int() {\n                    self.max_depth = n;\n                }\n            }\n            \"device\" => {\n                if let Some(s) = value.as_str() {\n                    self.device = DeviceType::from_text(s)?;\n                }\n            }\n            \"userAgent\" => {\n                if let Some(s) = value.as_str() {\n                    self.user_agent = Some(s.to_string());\n                }\n            }\n            \"timeout\" => {\n                if let Some(n) = value.as_int() {\n                    self.timeout = n;\n                }\n            }\n            \"proxy\" => {\n                if let Some(s) = value.as_str() {\n                    self.proxy = Some(s.to_string());\n                }\n            }\n            \"httpAuth\" => {\n                if let Some(s) = value.as_str() {\n                    self.http_auth = Some(s.to_string());\n                }\n            }\n            \"acceptInvalidCerts\" => {\n                if let Some(b) = value.as_bool() {\n                    self.accept_invalid_certs = b;\n                }\n            }\n            \"timezone\" => {\n                if let Some(s) = value.as_str() {\n                    self.timezone = Some(s.to_string());\n                }\n            }\n            \"showHelpOnly\" => {\n                if let Some(b) = value.as_bool() {\n                    self.show_help_only = b;\n                }\n            }\n            \"showVersionOnly\" => {\n                if let Some(b) = value.as_bool() {\n                    self.show_version_only = b;\n                }\n            }\n            \"outputType\" => {\n                if let Some(s) = value.as_str() {\n                    self.output_type = OutputType::from_text(s)?;\n                }\n            }\n            \"urlColumnSize\" => {\n                if let Some(n) = value.as_int() {\n                    self.url_column_size = Some(n);\n                }\n            }\n            \"showInlineCriticals\" => {\n                if let Some(b) = value.as_bool() {\n                    self.show_inline_criticals = b;\n                }\n            }\n            \"showInlineWarnings\" => {\n                if let Some(b) = value.as_bool() {\n                    self.show_inline_warnings = b;\n                }\n            }\n            \"rowsLimit\" => {\n                if let Some(n) = value.as_int() {\n                    self.rows_limit = n;\n                }\n            }\n            \"extraColumns\" => {\n                if let Some(arr) = value.as_array() {\n                    for column_text in arr {\n                        self.extra_columns.push(ExtraColumn::from_text(column_text)?);\n                    }\n                }\n            }\n            \"showSchemeAndHost\" => {\n                if let Some(b) = value.as_bool() {\n                    self.show_scheme_and_host = b;\n                }\n            }\n            \"doNotTruncateUrl\" => {\n                if let Some(b) = value.as_bool() {\n                    self.do_not_truncate_url = b;\n                }\n            }\n            \"hideProgressBar\" => {\n                if let Some(b) = value.as_bool() {\n                    self.hide_progress_bar = b;\n                }\n            }\n            \"hideColumns\" => {\n                if let Some(s) = value.as_str() {\n                    self.hide_columns = s.split(',').map(|c| c.trim().to_lowercase()).collect();\n                }\n            }\n            \"noColor\" => {\n                if let Some(b) = value.as_bool() {\n                    self.no_color = b;\n                }\n            }\n            \"forceColor\" => {\n                if let Some(b) = value.as_bool() {\n                    self.force_color = b;\n                }\n            }\n            \"consoleWidth\" => {\n                if let Some(n) = value.as_int() {\n                    self.console_width = Some(n);\n                }\n            }\n            \"disableAllAssets\" => {\n                if let Some(b) = value.as_bool() {\n                    self.disable_all_assets = b;\n                }\n            }\n            \"disableJavascript\" => {\n                if let Some(b) = value.as_bool() {\n                    self.disable_javascript = b;\n                }\n            }\n            \"disableStyles\" => {\n                if let Some(b) = value.as_bool() {\n                    self.disable_styles = b;\n                }\n            }\n            \"disableFonts\" => {\n                if let Some(b) = value.as_bool() {\n                    self.disable_fonts = b;\n                }\n            }\n            \"disableImages\" => {\n                if let Some(b) = value.as_bool() {\n                    self.disable_images = b;\n                }\n            }\n            \"disableFiles\" => {\n                if let Some(b) = value.as_bool() {\n                    self.disable_files = b;\n                }\n            }\n            \"removeAllAnchorListeners\" => {\n                if let Some(b) = value.as_bool() {\n                    self.remove_all_anchor_listeners = b;\n                }\n            }\n            \"workers\" => {\n                if let Some(n) = value.as_int() {\n                    self.workers = n;\n                }\n            }\n            \"maxReqsPerSec\" => {\n                if let Some(n) = value.as_float() {\n                    self.max_reqs_per_sec = n;\n                }\n            }\n            \"memoryLimit\" => {\n                if let Some(s) = value.as_str() {\n                    self.memory_limit = s.to_string();\n                }\n            }\n            \"resolve\" => {\n                if let Some(arr) = value.as_array() {\n                    self.resolve = arr.clone();\n                }\n            }\n            \"websocketServer\" => {\n                if let Some(s) = value.as_str() {\n                    self.websocket_server = Some(s.to_string());\n                }\n            }\n            \"ignoreRobotsTxt\" => {\n                if let Some(b) = value.as_bool() {\n                    self.ignore_robots_txt = b;\n                }\n            }\n            \"allowedDomainsForExternalFiles\" => {\n                if let Some(arr) = value.as_array() {\n                    self.allowed_domains_for_external_files = arr.clone();\n                }\n            }\n            \"allowedDomainsForCrawling\" => {\n                if let Some(arr) = value.as_array() {\n                    self.allowed_domains_for_crawling = arr.clone();\n                }\n            }\n            \"singleForeignPage\" => {\n                if let Some(b) = value.as_bool() {\n                    self.single_foreign_page = b;\n                }\n            }\n            \"resultStorage\" => {\n                if let Some(s) = value.as_str() {\n                    self.result_storage = StorageType::from_text(s)?;\n                }\n            }\n            \"resultStorageDir\" => {\n                if let Some(s) = value.as_str() {\n                    self.result_storage_dir = s.to_string();\n                }\n            }\n            \"resultStorageCompression\" => {\n                if let Some(b) = value.as_bool() {\n                    self.result_storage_compression = b;\n                }\n            }\n            \"acceptEncoding\" => {\n                if let Some(s) = value.as_str() {\n                    self.accept_encoding = s.to_string();\n                }\n            }\n            \"maxQueueLength\" => {\n                if let Some(n) = value.as_int() {\n                    self.max_queue_length = n;\n                }\n            }\n            \"maxVisitedUrls\" => {\n                if let Some(n) = value.as_int() {\n                    self.max_visited_urls = n;\n                }\n            }\n            \"maxUrlLength\" => {\n                if let Some(n) = value.as_int() {\n                    self.max_url_length = n;\n                }\n            }\n            \"maxSkippedUrls\" => {\n                if let Some(n) = value.as_int() {\n                    self.max_skipped_urls = n;\n                }\n            }\n            \"maxNon200ResponsesPerBasename\" => {\n                if let Some(n) = value.as_int() {\n                    self.max_non200_responses_per_basename = n;\n                }\n            }\n            \"includeRegex\" => {\n                if let Some(arr) = value.as_array() {\n                    self.include_regex = arr.clone();\n                }\n            }\n            \"ignoreRegex\" => {\n                if let Some(arr) = value.as_array() {\n                    self.ignore_regex = arr.clone();\n                }\n            }\n            \"regexFilteringOnlyForPages\" => {\n                if let Some(b) = value.as_bool() {\n                    self.regex_filtering_only_for_pages = b;\n                }\n            }\n            \"analyzerFilterRegex\" => {\n                if let Some(s) = value.as_str() {\n                    self.analyzer_filter_regex = Some(s.to_string());\n                }\n            }\n            \"addRandomQueryParams\" => {\n                if let Some(b) = value.as_bool() {\n                    self.add_random_query_params = b;\n                }\n            }\n            \"removeQueryParams\" => {\n                if let Some(b) = value.as_bool() {\n                    self.remove_query_params = b;\n                }\n            }\n            \"keepQueryParams\" => {\n                if let Some(arr) = value.as_array() {\n                    self.keep_query_params = arr.clone();\n                }\n            }\n            \"transformUrl\" => {\n                if let Some(arr) = value.as_array() {\n                    self.transform_url = arr.clone();\n                }\n            }\n            \"forceRelativeUrls\" => {\n                if let Some(b) = value.as_bool() {\n                    self.force_relative_urls = b;\n                }\n            }\n            // file export options — support empty string to disable (set to None)\n            \"outputHtmlReport\" => match value.as_str() {\n                Some(s) => self.output_html_report = Some(s.to_string()),\n                None => self.output_html_report = None,\n            },\n            \"htmlReportOptions\" => {\n                if let Some(s) = value.as_str() {\n                    self.html_report_options = Some(s.to_string());\n                }\n            }\n            \"outputJsonFile\" => match value.as_str() {\n                Some(s) => self.output_json_file = Some(s.to_string()),\n                None => self.output_json_file = None,\n            },\n            \"outputTextFile\" => match value.as_str() {\n                Some(s) => self.output_text_file = Some(s.to_string()),\n                None => self.output_text_file = None,\n            },\n            \"addHostToOutputFile\" => {\n                if let Some(b) = value.as_bool() {\n                    self.add_host_to_output_file = b;\n                }\n            }\n            \"addTimestampToOutputFile\" => {\n                if let Some(b) = value.as_bool() {\n                    self.add_timestamp_to_output_file = b;\n                }\n            }\n            // sitemap options\n            \"outputSitemapXml\" => {\n                if let Some(s) = value.as_str() {\n                    self.sitemap_xml_file = Some(s.to_string());\n                }\n            }\n            \"outputSitemapTxt\" => {\n                if let Some(s) = value.as_str() {\n                    self.sitemap_txt_file = Some(s.to_string());\n                }\n            }\n            \"sitemapBasePriority\" => {\n                if let Some(n) = value.as_float() {\n                    self.sitemap_base_priority = n;\n                }\n            }\n            \"sitemapPriorityIncrease\" => {\n                if let Some(n) = value.as_float() {\n                    self.sitemap_priority_increase = n;\n                }\n            }\n            // offline export options\n            \"offlineExportDirectory\" => {\n                if let Some(s) = value.as_str() {\n                    self.offline_export_dir = Some(s.to_string());\n                }\n            }\n            \"offlineExportStoreOnlyUrlRegex\" => {\n                if let Some(arr) = value.as_array() {\n                    self.offline_export_store_only_url_regex = arr.clone();\n                }\n            }\n            \"offlineExportRemoveUnwantedCode\" => {\n                if let Some(b) = value.as_bool() {\n                    self.offline_export_remove_unwanted_code = b;\n                }\n            }\n            \"offlineExportNoAutoRedirectHtml\" => {\n                if let Some(b) = value.as_bool() {\n                    self.offline_export_no_auto_redirect_html = b;\n                }\n            }\n            \"offlineExportPreserveUrlStructure\" => {\n                if let Some(b) = value.as_bool() {\n                    self.offline_export_preserve_url_structure = b;\n                }\n            }\n            \"offlineExportPreserveUrls\" => {\n                if let Some(b) = value.as_bool() {\n                    self.offline_export_preserve_urls = b;\n                }\n            }\n            \"replaceContent\" => {\n                if let Some(arr) = value.as_array() {\n                    self.replace_content = arr.clone();\n                }\n            }\n            \"replaceQueryString\" => {\n                if let Some(arr) = value.as_array() {\n                    self.replace_query_string = arr.clone();\n                }\n            }\n            \"offlineExportLowercase\" => {\n                if let Some(b) = value.as_bool() {\n                    self.offline_export_lowercase = b;\n                }\n            }\n            \"ignoreStoreFileError\" => {\n                if let Some(b) = value.as_bool() {\n                    self.ignore_store_file_error = b;\n                }\n            }\n            \"disableAstroInlineModules\" => {\n                if let Some(b) = value.as_bool() {\n                    self.disable_astro_inline_modules = b;\n                }\n            }\n            // markdown export options\n            \"markdownExportDirectory\" => {\n                if let Some(s) = value.as_str() {\n                    self.markdown_export_dir = Some(s.to_string());\n                }\n            }\n            \"markdownExportSingleFile\" => {\n                if let Some(s) = value.as_str() {\n                    self.markdown_export_single_file = Some(s.to_string());\n                }\n            }\n            \"markdownMoveContentBeforeH1ToEnd\" => {\n                if let Some(b) = value.as_bool() {\n                    self.markdown_move_content_before_h1_to_end = b;\n                }\n            }\n            \"markdownDisableImages\" => {\n                if let Some(b) = value.as_bool() {\n                    self.markdown_disable_images = b;\n                }\n            }\n            \"markdownDisableFiles\" => {\n                if let Some(b) = value.as_bool() {\n                    self.markdown_disable_files = b;\n                }\n            }\n            \"markdownRemoveLinksAndImagesFromSingleFile\" => {\n                if let Some(b) = value.as_bool() {\n                    self.markdown_remove_links_and_images_from_single_file = b;\n                }\n            }\n            \"markdownExcludeSelector\" => {\n                if let Some(arr) = value.as_array() {\n                    self.markdown_exclude_selector = arr.clone();\n                }\n            }\n            \"markdownReplaceContent\" => {\n                if let Some(arr) = value.as_array() {\n                    self.markdown_replace_content = arr.clone();\n                }\n            }\n            \"markdownReplaceQueryString\" => {\n                if let Some(arr) = value.as_array() {\n                    self.markdown_replace_query_string = arr.clone();\n                }\n            }\n            \"markdownExportStoreOnlyUrlRegex\" => {\n                if let Some(arr) = value.as_array() {\n                    self.markdown_export_store_only_url_regex = arr.clone();\n                }\n            }\n            \"markdownIgnoreStoreFileError\" => {\n                if let Some(b) = value.as_bool() {\n                    self.markdown_ignore_store_file_error = b;\n                }\n            }\n            // mailer options\n            \"mailTo\" => {\n                if let Some(arr) = value.as_array() {\n                    self.mail_to = arr.clone();\n                }\n            }\n            \"mailFrom\" => {\n                if let Some(s) = value.as_str() {\n                    self.mail_from = s.to_string();\n                }\n            }\n            \"mailFromName\" => {\n                if let Some(s) = value.as_str() {\n                    self.mail_from_name = s.to_string();\n                }\n            }\n            \"mailSubjectTemplate\" => {\n                if let Some(s) = value.as_str() {\n                    self.mail_subject_template = s.to_string();\n                }\n            }\n            \"mailSmtpHost\" => {\n                if let Some(s) = value.as_str() {\n                    self.mail_smtp_host = s.to_string();\n                }\n            }\n            \"mailSmtpPort\" => {\n                if let Some(n) = value.as_int() {\n                    self.mail_smtp_port = n;\n                }\n            }\n            \"mailSmtpUser\" => {\n                if let Some(s) = value.as_str() {\n                    self.mail_smtp_user = Some(s.to_string());\n                }\n            }\n            \"mailSmtpPass\" => {\n                if let Some(s) = value.as_str() {\n                    self.mail_smtp_pass = Some(s.to_string());\n                }\n            }\n            // upload options\n            \"uploadEnabled\" => {\n                if let Some(b) = value.as_bool() {\n                    self.upload_enabled = b;\n                }\n            }\n            \"uploadTo\" => {\n                if let Some(s) = value.as_str() {\n                    self.upload_to = s.to_string();\n                }\n            }\n            \"uploadRetention\" => {\n                if let Some(s) = value.as_str() {\n                    self.upload_retention = s.to_string();\n                }\n            }\n            \"uploadPassword\" => {\n                if let Some(s) = value.as_str() {\n                    self.upload_password = Some(s.to_string());\n                }\n            }\n            \"uploadTimeout\" => {\n                if let Some(n) = value.as_int() {\n                    self.upload_timeout = n;\n                }\n            }\n            \"httpCacheDir\" => match value.as_str() {\n                Some(s) => self.http_cache_dir = Some(s.to_string()),\n                None => self.http_cache_dir = None,\n            },\n            \"httpCacheCompression\" => {\n                if let Some(b) = value.as_bool() {\n                    self.http_cache_compression = b;\n                }\n            }\n            \"httpCacheTtl\" => {\n                if let Some(s) = value.as_str() {\n                    if s == \"0\" || s.is_empty() || s == \"off\" {\n                        self.http_cache_ttl = None; // infinite\n                    } else {\n                        self.http_cache_ttl = Some(parse_duration_to_secs(s));\n                    }\n                }\n            }\n            \"noCache\" => {\n                if value.as_bool() == Some(true) {\n                    self.http_cache_dir = Some(\"off\".to_string());\n                }\n            }\n            \"debug\" => {\n                if let Some(b) = value.as_bool() {\n                    self.debug = b;\n                }\n            }\n            \"debugLogFile\" => {\n                if let Some(s) = value.as_str() {\n                    self.debug_log_file = Some(s.to_string());\n                }\n            }\n            \"debugUrlRegex\" => {\n                if let Some(arr) = value.as_array() {\n                    self.debug_url_regex = arr.clone();\n                }\n            }\n            // fastest analyzer options\n            \"fastestTopLimit\" => {\n                if let Some(n) = value.as_int() {\n                    self.fastest_top_limit = n;\n                }\n            }\n            \"fastestMaxTime\" => {\n                if let Some(n) = value.as_float() {\n                    self.fastest_max_time = n;\n                }\n            }\n            // seo and opengraph analyzer options\n            \"maxHeadingLevel\" => {\n                if let Some(n) = value.as_int() {\n                    self.max_heading_level = n;\n                }\n            }\n            // slowest analyzer options\n            \"slowestTopLimit\" => {\n                if let Some(n) = value.as_int() {\n                    self.slowest_top_limit = n;\n                }\n            }\n            \"slowestMinTime\" => {\n                if let Some(n) = value.as_float() {\n                    self.slowest_min_time = n;\n                }\n            }\n            \"slowestMaxTime\" => {\n                if let Some(n) = value.as_float() {\n                    self.slowest_max_time = n;\n                }\n            }\n            // ci/cd options\n            \"ci\" => {\n                if let Some(b) = value.as_bool() {\n                    self.ci = b;\n                }\n            }\n            \"ciMinScore\" => {\n                if let Some(n) = value.as_float() {\n                    self.ci_min_score = n;\n                }\n            }\n            \"ciMinPerformance\" => {\n                if let Some(n) = value.as_float() {\n                    self.ci_min_performance = Some(n);\n                }\n            }\n            \"ciMinSeo\" => {\n                if let Some(n) = value.as_float() {\n                    self.ci_min_seo = Some(n);\n                }\n            }\n            \"ciMinSecurity\" => {\n                if let Some(n) = value.as_float() {\n                    self.ci_min_security = Some(n);\n                }\n            }\n            \"ciMinAccessibility\" => {\n                if let Some(n) = value.as_float() {\n                    self.ci_min_accessibility = Some(n);\n                }\n            }\n            \"ciMinBestPractices\" => {\n                if let Some(n) = value.as_float() {\n                    self.ci_min_best_practices = Some(n);\n                }\n            }\n            \"ciMax404\" => {\n                if let Some(n) = value.as_int() {\n                    self.ci_max_404 = n;\n                }\n            }\n            \"ciMax5xx\" => {\n                if let Some(n) = value.as_int() {\n                    self.ci_max_5xx = n;\n                }\n            }\n            \"ciMaxCriticals\" => {\n                if let Some(n) = value.as_int() {\n                    self.ci_max_criticals = n;\n                }\n            }\n            \"ciMaxWarnings\" => {\n                if let Some(n) = value.as_int() {\n                    self.ci_max_warnings = Some(n);\n                }\n            }\n            \"ciMaxAvgResponse\" => {\n                if let Some(n) = value.as_float() {\n                    self.ci_max_avg_response = Some(n);\n                }\n            }\n            \"ciMinPages\" => {\n                if let Some(n) = value.as_int() {\n                    self.ci_min_pages = n;\n                }\n            }\n            \"ciMinAssets\" => {\n                if let Some(n) = value.as_int() {\n                    self.ci_min_assets = n;\n                }\n            }\n            \"ciMinDocuments\" => {\n                if let Some(n) = value.as_int() {\n                    self.ci_min_documents = n;\n                }\n            }\n            \"serveMarkdownDirectory\" => {\n                if let Some(s) = value.as_str() {\n                    self.serve_markdown_dir = Some(s.to_string());\n                }\n            }\n            \"serveOfflineDirectory\" => {\n                if let Some(s) = value.as_str() {\n                    self.serve_offline_dir = Some(s.to_string());\n                }\n            }\n            \"servePort\" => {\n                if let Some(n) = value.as_int() {\n                    self.serve_port = n;\n                }\n            }\n            \"serveBindAddress\" => {\n                if let Some(s) = value.as_str() {\n                    self.serve_bind_address = s.to_string();\n                }\n            }\n            \"htmlToMarkdownFile\" => {\n                if let Some(s) = value.as_str() {\n                    self.html_to_markdown_file = Some(s.to_string());\n                }\n            }\n            \"htmlToMarkdownOutput\" => {\n                if let Some(s) = value.as_str() {\n                    self.html_to_markdown_output = Some(s.to_string());\n                }\n            }\n            _ => {\n                // Unknown property - ignore (may be from analyzer/exporter options)\n            }\n        }\n        Ok(())\n    }\n\n    pub fn has_header_to_table(&self, header_name: &str) -> bool {\n        self.extra_columns_names_only.iter().any(|name| name == header_name)\n    }\n\n    pub fn is_url_selected_for_debug(&self, url: &str) -> bool {\n        if self.debug_url_regex.is_empty() {\n            return false;\n        }\n\n        for regex_str in &self.debug_url_regex {\n            if let Ok(re) = Regex::new(regex_str)\n                && re.is_match(url)\n            {\n                return true;\n            }\n        }\n\n        false\n    }\n\n    pub fn crawl_only_html_files(&self) -> bool {\n        self.disable_all_assets\n            || (self.disable_javascript\n                && self.disable_styles\n                && self.disable_fonts\n                && self.disable_images\n                && self.disable_files)\n    }\n\n    /// Get initial host from URL (with port if explicitly set)\n    pub fn get_initial_host(&self, include_port_if_defined: bool) -> String {\n        if let Ok(parsed) = url::Url::parse(&self.url) {\n            let host = parsed.host_str().unwrap_or(\"\").to_string();\n            if include_port_if_defined && let Some(port) = parsed.port() {\n                return format!(\"{}:{}\", host, port);\n            }\n            host\n        } else {\n            String::new()\n        }\n    }\n\n    /// Get scheme from initial URL\n    pub fn get_initial_scheme(&self) -> String {\n        if let Ok(parsed) = url::Url::parse(&self.url) {\n            parsed.scheme().to_string()\n        } else {\n            String::new()\n        }\n    }\n}\n\n/// Build the complete Options registry with all option groups.\npub fn get_options() -> Options {\n    let mut options = Options::new();\n\n    // -------------------------------------------------------------------------\n    // Basic settings (CoreOptions group 1)\n    // -------------------------------------------------------------------------\n    options.add_group(OptionGroup::new(\n        GROUP_BASIC_SETTINGS,\n        \"Basic settings\",\n        vec![\n            CrawlerOption::new(\n                \"--url\", Some(\"-u\"), \"url\", OptionType::Url, false,\n                \"Required URL. It can also be the URL to sitemap.xml. Enclose in quotes if URL contains query parameters.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--single-page\", Some(\"-sp\"), \"singlePage\", OptionType::Bool, false,\n                \"Load only one page to which the URL is given (and its assets), but do not follow other pages.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--max-depth\", Some(\"-md\"), \"maxDepth\", OptionType::Int, false,\n                \"Maximum crawling depth (for pages, not assets). Default is `0` (no limit). `1` means `/about` or `/about/`, `2` means `/about/contacts` etc.\",\n                Some(\"0\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--device\", Some(\"-d\"), \"device\", OptionType::String, false,\n                \"Device type for User-Agent selection. Values `desktop`, `tablet`, `mobile`. Ignored with `--user-agent`.\",\n                Some(\"desktop\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--user-agent\", Some(\"-ua\"), \"userAgent\", OptionType::String, false,\n                \"Override User-Agent selected by --device. If you add `!` at the end, the siteone-crawler/version will not be added as a signature at the end of the final user-agent.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--timeout\", Some(\"-t\"), \"timeout\", OptionType::Int, false,\n                \"Request timeout (in sec).\",\n                Some(\"5\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--proxy\", Some(\"-p\"), \"proxy\", OptionType::HostAndPort, false,\n                \"HTTP proxy in `host:port` format.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--http-auth\", Some(\"-ha\"), \"httpAuth\", OptionType::String, false,\n                \"Basic HTTP authentication in `username:password` format.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--accept-invalid-certs\", Some(\"-aic\"), \"acceptInvalidCerts\", OptionType::Bool, false,\n                \"Accept invalid or incomplete SSL/TLS certificates (e.g. expired, self-signed, or missing intermediate CA). Use with caution.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--help\", Some(\"-h\"), \"showHelpOnly\", OptionType::Bool, false,\n                \"Show help and exit.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--version\", Some(\"-v\"), \"showVersionOnly\", OptionType::Bool, false,\n                \"Show crawler version and exit.\",\n                Some(\"false\"), false, false, None,\n            ),\n        ],\n    ));\n\n    // -------------------------------------------------------------------------\n    // Output settings (CoreOptions group 2)\n    // -------------------------------------------------------------------------\n    options.add_group(OptionGroup::new(\n        GROUP_OUTPUT_SETTINGS,\n        \"Output settings\",\n        vec![\n            CrawlerOption::new(\n                \"--output\", Some(\"-o\"), \"outputType\", OptionType::String, false,\n                \"Output type `text` or `json`.\",\n                Some(\"text\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--extra-columns\", Some(\"-ec\"), \"extraColumns\", OptionType::String, true,\n                \"Extra table headers for output table with option to set width and do-not-truncate (>), e.g., `DOM,X-Cache(10),Title(40>)`.\",\n                None, true, true, None,\n            ),\n            CrawlerOption::new(\n                \"--url-column-size\", Some(\"-ucs\"), \"urlColumnSize\", OptionType::Int, false,\n                \"URL column width. By default, it is calculated from the size of your terminal window.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--timezone\", Some(\"-tz\"), \"timezone\", OptionType::String, false,\n                \"Timezone for datetimes in HTML reports and timestamps in output folders/files, e.g., `Europe/Prague`. Default is `UTC`.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--rows-limit\", Some(\"-rl\"), \"rowsLimit\", OptionType::Int, false,\n                \"Max. number of rows to display in tables with analysis results (protection against very long and slow report)\",\n                Some(\"200\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--show-inline-criticals\", Some(\"-sic\"), \"showInlineCriticals\", OptionType::Bool, false,\n                \"Show criticals from the analyzer directly in the URL table.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--show-inline-warnings\", Some(\"-siw\"), \"showInlineWarnings\", OptionType::Bool, false,\n                \"Show warnings from the analyzer directly in the URL table.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--do-not-truncate-url\", Some(\"-dntu\"), \"doNotTruncateUrl\", OptionType::Bool, false,\n                \"Avoid truncating URLs to `--url-column-size`.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--show-scheme-and-host\", Some(\"-ssah\"), \"showSchemeAndHost\", OptionType::Bool, false,\n                \"Show the schema://host also of the original domain URL as well. By default, only path+query is displayed for original domain.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--hide-progress-bar\", Some(\"-hpb\"), \"hideProgressBar\", OptionType::Bool, false,\n                \"Suppress progress bar in output.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--hide-columns\", Some(\"-hc\"), \"hideColumns\", OptionType::String, false,\n                \"Hide specified columns from the progress table. Comma-separated list: type, time, size, cache.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--no-color\", Some(\"-nc\"), \"noColor\", OptionType::Bool, false,\n                \"Disable colored output.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--force-color\", Some(\"-fc\"), \"forceColor\", OptionType::Bool, false,\n                \"Force colored output regardless of support detection.\",\n                Some(\"false\"), false, false, None,\n            ),\n        ],\n    ));\n\n    // -------------------------------------------------------------------------\n    // Resource filtering (CoreOptions group 3)\n    // -------------------------------------------------------------------------\n    options.add_group(OptionGroup::new(\n        GROUP_RESOURCE_FILTERING,\n        \"Resource filtering\",\n        vec![\n            CrawlerOption::new(\n                \"--disable-all-assets\", Some(\"-das\"), \"disableAllAssets\", OptionType::Bool, false,\n                \"Disables crawling of all assets and files and only crawls pages in href attributes. Shortcut for calling all other `--disable-*` flags.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--disable-javascript\", Some(\"-dj\"), \"disableJavascript\", OptionType::Bool, false,\n                \"Disables JavaScript downloading and removes all JavaScript code from HTML, including onclick and other on* handlers.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--disable-styles\", Some(\"-ds\"), \"disableStyles\", OptionType::Bool, false,\n                \"Disables CSS file downloading and at the same time removes all style definitions by <style> tag or inline by style attributes.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--disable-fonts\", Some(\"-dfo\"), \"disableFonts\", OptionType::Bool, false,\n                \"Disables font downloading and also removes all font/font-face definitions from CSS.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--disable-images\", Some(\"-di\"), \"disableImages\", OptionType::Bool, false,\n                \"Disables downloading of all images and replaces found images in HTML with placeholder image only.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--disable-files\", Some(\"-df\"), \"disableFiles\", OptionType::Bool, false,\n                \"Disables downloading of any files (typically downloadable documents) to which various links point.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--remove-all-anchor-listeners\", Some(\"-raal\"), \"removeAllAnchorListeners\", OptionType::Bool, false,\n                \"On all links on the page remove any event listeners. Useful on some types of sites with modern JS frameworks.\",\n                Some(\"false\"), false, false, None,\n            ),\n        ],\n    ));\n\n    // -------------------------------------------------------------------------\n    // Advanced crawler settings (CoreOptions group 4)\n    // -------------------------------------------------------------------------\n    options.add_group(OptionGroup::new(\n        GROUP_ADVANCED_CRAWLER_SETTINGS,\n        \"Advanced crawler settings\",\n        vec![\n            CrawlerOption::new(\n                \"--workers\", Some(\"-w\"), \"workers\", OptionType::Int, false,\n                \"Max concurrent workers (threads). Crawler will not make more simultaneous requests to the server than this number.\",\n                Some(\"3\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--max-reqs-per-sec\", Some(\"-rps\"), \"maxReqsPerSec\", OptionType::Float, false,\n                \"Max requests/s for whole crawler. Be careful not to cause a DoS attack.\",\n                Some(\"10\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--memory-limit\", Some(\"-ml\"), \"memoryLimit\", OptionType::SizeMG, false,\n                \"Memory limit in units M (Megabytes) or G (Gigabytes).\",\n                Some(\"2048M\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--resolve\", Some(\"-res\"), \"resolve\", OptionType::Resolve, true,\n                \"The ability to force the domain+port to resolve to its own IP address, just like CURL --resolve does. Example: `--resolve='www.mydomain.tld:80:127.0.0.1'`\",\n                None, true, true, None,\n            ),\n            CrawlerOption::new(\n                \"--allowed-domain-for-external-files\", Some(\"-adf\"), \"allowedDomainsForExternalFiles\", OptionType::String, true,\n                \"Primarily, the crawler crawls only the URL within the domain for initial URL. This allows you to enable loading of file content from another domain as well (e.g. if you want to load assets from a CDN). Can be specified multiple times. Use can use domains with wildcard '*'.\",\n                None, true, true, None,\n            ),\n            CrawlerOption::new(\n                \"--allowed-domain-for-crawling\", Some(\"-adc\"), \"allowedDomainsForCrawling\", OptionType::String, true,\n                \"This option will allow you to crawl all content from other listed domains - typically in the case of language mutations on other domains. Can be specified multiple times. Use can use domains with wildcard '*'.\",\n                None, true, true, None,\n            ),\n            CrawlerOption::new(\n                \"--single-foreign-page\", Some(\"-sfp\"), \"singleForeignPage\", OptionType::Bool, false,\n                \"If crawling of other domains is allowed (using `--allowed-domain-for-crawling`), it ensures that when another domain is not on same second-level domain, only that linked page and its assets are crawled from that foreign domain.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--include-regex\", Some(\"--include-regexp\"), \"includeRegex\", OptionType::Regex, true,\n                \"Include only URLs matching at least one PCRE regex. Can be specified multiple times.\",\n                None, false, true, None,\n            ),\n            CrawlerOption::new(\n                \"--ignore-regex\", Some(\"--ignore-regexp\"), \"ignoreRegex\", OptionType::Regex, true,\n                \"Ignore URLs matching any PCRE regex. Can be specified multiple times.\",\n                None, false, true, None,\n            ),\n            CrawlerOption::new(\n                \"--regex-filtering-only-for-pages\", None, \"regexFilteringOnlyForPages\", OptionType::Bool, false,\n                \"Set if you want filtering by `*-regex` rules apply only to page URLs, but static assets are loaded regardless of filtering.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--analyzer-filter-regex\", Some(\"--analyzer-filter-regexp\"), \"analyzerFilterRegex\", OptionType::Regex, false,\n                \"Use only analyzers that match the specified regexp.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--accept-encoding\", None, \"acceptEncoding\", OptionType::String, false,\n                \"Set `Accept-Encoding` request header.\",\n                Some(\"gzip, deflate, br\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--remove-query-params\", Some(\"-rqp\"), \"removeQueryParams\", OptionType::Bool, false,\n                \"Remove URL query parameters from crawled URLs.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--keep-query-param\", Some(\"-kqp\"), \"keepQueryParams\", OptionType::String, true,\n                \"Keep only the specified query parameter(s) in discovered URLs. All other query parameters are removed. Can be specified multiple times. Ignored when `--remove-query-params` is active.\",\n                None, true, true, None,\n            ),\n            CrawlerOption::new(\n                \"--add-random-query-params\", Some(\"-arqp\"), \"addRandomQueryParams\", OptionType::Bool, false,\n                \"Add random query parameters to each crawled URL.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--transform-url\", Some(\"-tu\"), \"transformUrl\", OptionType::ReplaceContent, true,\n                \"Transform URLs before crawling. Format: `from -> to` or `/regex/ -> replacement`. Example: `live-site.com -> local-site.local` or `/live-site\\\\.com\\\\/wp/ -> local-site.local/`. Can be specified multiple times.\",\n                None, true, true, None,\n            ),\n            CrawlerOption::new(\n                \"--force-relative-urls\", Some(\"-fru\"), \"forceRelativeUrls\", OptionType::Bool, false,\n                \"Normalize all discovered URLs matching the initial domain (incl. www variant and protocol differences) to relative paths. Prevents duplicate files in offline export when the site uses inconsistent URL formats.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--ignore-robots-txt\", Some(\"-irt\"), \"ignoreRobotsTxt\", OptionType::Bool, false,\n                \"Should robots.txt content be ignored? Useful for crawling an otherwise private/unindexed site.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--max-queue-length\", Some(\"-mql\"), \"maxQueueLength\", OptionType::Int, false,\n                \"Max URL queue length. It affects memory requirements.\",\n                Some(\"9000\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--max-visited-urls\", Some(\"-mvu\"), \"maxVisitedUrls\", OptionType::Int, false,\n                \"Max visited URLs. It affects memory requirements.\",\n                Some(\"10000\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--max-skipped-urls\", Some(\"-msu\"), \"maxSkippedUrls\", OptionType::Int, false,\n                \"Max skipped URLs. It affects memory requirements.\",\n                Some(\"10000\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--max-url-length\", Some(\"-mul\"), \"maxUrlLength\", OptionType::Int, false,\n                \"Max URL length in chars. It affects memory requirements.\",\n                Some(\"2083\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--max-non200-responses-per-basename\", Some(\"-mnrpb\"), \"maxNon200ResponsesPerBasename\", OptionType::Int, false,\n                \"Protection against looping with dynamic non-200 URLs. If a basename (the last part of the URL after the last slash) has more non-200 responses than this limit, other URLs with same basename will be ignored/skipped.\",\n                Some(\"5\"), false, false, None,\n            ),\n        ],\n    ));\n\n    // -------------------------------------------------------------------------\n    // Expert settings (CoreOptions group 5)\n    // -------------------------------------------------------------------------\n    options.add_group(OptionGroup::new(\n        GROUP_EXPERT_SETTINGS,\n        \"Expert settings\",\n        vec![\n            CrawlerOption::new(\n                \"--debug\", None, \"debug\", OptionType::Bool, false,\n                \"Activate debug mode.\",\n                Some(\"false\"), true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--debug-log-file\", None, \"debugLogFile\", OptionType::File, false,\n                \"Log file where to save debug messages. When --debug is not set and --debug-log-file is set, logging will be active without visible output.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--debug-url-regex\", None, \"debugUrlRegex\", OptionType::Regex, true,\n                \"Regex for URL(s) to debug. When crawled URL is matched, parsing, URL replacing and other actions are printed to output. Can be specified multiple times.\",\n                None, true, true, None,\n            ),\n            CrawlerOption::new(\n                \"--result-storage\", Some(\"-rs\"), \"resultStorage\", OptionType::String, false,\n                \"Result storage type for content and headers. Values: `memory` or `file`. Use `file` for large websites.\",\n                Some(\"memory\"), false, false, None,\n            ),\n            {\n                let prefix = default_output_prefix();\n                CrawlerOption::new(\n                    \"--result-storage-dir\", Some(\"-rsd\"), \"resultStorageDir\", OptionType::Dir, false,\n                    \"Directory for --result-storage=file.\",\n                    Some(&format!(\"{prefix}/result-storage\")), false, false, None,\n                )\n            },\n            CrawlerOption::new(\n                \"--result-storage-compression\", Some(\"-rsc\"), \"resultStorageCompression\", OptionType::Bool, false,\n                \"Enable compression for results storage. Saves disk space, but uses more CPU.\",\n                Some(\"false\"), false, false, None,\n            ),\n            {\n                let cache_default = default_http_cache_dir();\n                CrawlerOption::new(\n                    \"--http-cache-dir\", Some(\"-hcd\"), \"httpCacheDir\", OptionType::Dir, false,\n                    \"Cache dir for HTTP responses. Disable with --http-cache-dir='off' or --no-cache.\",\n                    Some(&cache_default), false, false, None,\n                )\n            },\n            CrawlerOption::new(\n                \"--http-cache-compression\", Some(\"-hcc\"), \"httpCacheCompression\", OptionType::Bool, false,\n                \"Enable compression for HTTP cache storage. Saves disk space, but uses more CPU.\",\n                Some(\"false\"), true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--http-cache-ttl\", Some(\"-hct\"), \"httpCacheTtl\", OptionType::String, false,\n                \"TTL for HTTP cache entries (e.g. '1h', '7d', '30m'). Use '0' for infinite. Default: 24h.\",\n                Some(\"24h\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--no-cache\", None, \"noCache\", OptionType::Bool, false,\n                \"Disable HTTP cache completely. Shortcut for --http-cache-dir='off'.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--websocket-server\", Some(\"-ws\"), \"websocketServer\", OptionType::HostAndPort, false,\n                \"Start crawler with websocket server on given host:port, typically `0.0.0.0:8000`.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--console-width\", Some(\"-cw\"), \"consoleWidth\", OptionType::Int, false,\n                \"Enforce the definition of the console width and disable automatic detection.\",\n                None, true, false, None,\n            ),\n        ],\n    ));\n\n    // -------------------------------------------------------------------------\n    // File export settings (FileExporter - alphabetically first exporter)\n    // -------------------------------------------------------------------------\n    options.add_group(OptionGroup::new(\n        GROUP_FILE_EXPORT_SETTINGS,\n        \"File export settings\",\n        vec![\n            {\n                let prefix = default_output_prefix();\n                let sep = std::path::MAIN_SEPARATOR;\n                CrawlerOption::new(\n                    \"--output-html-report\", None, \"outputHtmlReport\", OptionType::File, false,\n                    \"Save HTML report into that file. Set to empty '' to disable HTML report.\",\n                    Some(&format!(\"{prefix}{sep}%domain%.report.%datetime%.html\")), true, false, None,\n                )\n            },\n            CrawlerOption::new(\n                \"--html-report-options\", None, \"htmlReportOptions\", OptionType::String, false,\n                \"Comma-separated list of sections to include in HTML report. Available sections: summary, seo-opengraph, image-gallery, video-gallery, visited-urls, dns-ssl, crawler-stats, crawler-info, headers, content-types, skipped-urls, caching, best-practices, accessibility, security, redirects, 404-pages, slowest-urls, fastest-urls, source-domains. Default: all sections.\",\n                None, true, false, None,\n            ),\n            {\n                let prefix = default_output_prefix();\n                let sep = std::path::MAIN_SEPARATOR;\n                CrawlerOption::new(\n                    \"--output-json-file\", None, \"outputJsonFile\", OptionType::File, false,\n                    \"Save report as JSON. Set to empty '' to disable JSON report.\",\n                    Some(&format!(\"{prefix}{sep}%domain%.output.%datetime%.json\")), true, false, None,\n                )\n            },\n            {\n                let prefix = default_output_prefix();\n                let sep = std::path::MAIN_SEPARATOR;\n                CrawlerOption::new(\n                    \"--output-text-file\", None, \"outputTextFile\", OptionType::File, false,\n                    \"Save output as TXT. Set to empty '' to disable TXT report.\",\n                    Some(&format!(\"{prefix}{sep}%domain%.output.%datetime%.txt\")), true, false, None,\n                )\n            },\n            CrawlerOption::new(\n                \"--add-host-to-output-file\", None, \"addHostToOutputFile\", OptionType::Bool, false,\n                \"Append initial URL host to filename except sitemaps.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--add-timestamp-to-output-file\", None, \"addTimestampToOutputFile\", OptionType::Bool, false,\n                \"Append timestamp to filename except sitemaps.\",\n                Some(\"false\"), false, false, None,\n            ),\n        ],\n    ));\n\n    // -------------------------------------------------------------------------\n    // Mailer options (MailerExporter)\n    // -------------------------------------------------------------------------\n    options.add_group(OptionGroup::new(\n        GROUP_MAILER_SETTINGS,\n        \"Mailer options\",\n        vec![\n            CrawlerOption::new(\n                \"--mail-to\",\n                None,\n                \"mailTo\",\n                OptionType::Email,\n                true,\n                \"E-mail report recipient address(es). Can be specified multiple times.\",\n                None,\n                true,\n                true,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--mail-from\",\n                None,\n                \"mailFrom\",\n                OptionType::Email,\n                false,\n                \"E-mail sender address.\",\n                Some(\"siteone-crawler@your-hostname.com\"),\n                false,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--mail-from-name\",\n                None,\n                \"mailFromName\",\n                OptionType::String,\n                false,\n                \"E-mail sender name\",\n                Some(\"SiteOne Crawler\"),\n                false,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--mail-subject-template\",\n                None,\n                \"mailSubjectTemplate\",\n                OptionType::String,\n                false,\n                \"E-mail subject template. You can use dynamic variables %domain% and %datetime%\",\n                Some(\"Crawler Report for %domain% (%date%)\"),\n                true,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--mail-smtp-host\",\n                None,\n                \"mailSmtpHost\",\n                OptionType::String,\n                false,\n                \"SMTP host.\",\n                Some(\"localhost\"),\n                true,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--mail-smtp-port\",\n                None,\n                \"mailSmtpPort\",\n                OptionType::Int,\n                false,\n                \"SMTP port.\",\n                Some(\"25\"),\n                true,\n                false,\n                Some(vec![\"1\".to_string(), \"65535\".to_string()]),\n            ),\n            CrawlerOption::new(\n                \"--mail-smtp-user\",\n                None,\n                \"mailSmtpUser\",\n                OptionType::String,\n                false,\n                \"SMTP user for authentication.\",\n                None,\n                true,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--mail-smtp-pass\",\n                None,\n                \"mailSmtpPass\",\n                OptionType::String,\n                false,\n                \"SMTP password for authentication.\",\n                None,\n                true,\n                false,\n                None,\n            ),\n        ],\n    ));\n\n    // -------------------------------------------------------------------------\n    // Markdown exporter options (MarkdownExporter)\n    // -------------------------------------------------------------------------\n    options.add_group(OptionGroup::new(\n        GROUP_MARKDOWN_EXPORT_SETTINGS,\n        \"Markdown exporter options\",\n        vec![\n            CrawlerOption::new(\n                \"--markdown-export-dir\", Some(\"-med\"), \"markdownExportDirectory\", OptionType::Dir, false,\n                \"Path to directory where to save the markdown version of the website.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--markdown-export-single-file\", None, \"markdownExportSingleFile\", OptionType::File, false,\n                \"Path to a file where to save the combined markdown files into one document. Requires --markdown-export-dir to be set.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--markdown-move-content-before-h1-to-end\", None, \"markdownMoveContentBeforeH1ToEnd\", OptionType::Bool, false,\n                \"Move all content before the main H1 heading (typically the header with the menu) to the end of the markdown.\",\n                Some(\"false\"), true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--markdown-disable-images\", Some(\"-mdi\"), \"markdownDisableImages\", OptionType::Bool, false,\n                \"Do not export and show images in markdown files. Images are enabled by default.\",\n                Some(\"false\"), true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--markdown-disable-files\", Some(\"-mdf\"), \"markdownDisableFiles\", OptionType::Bool, false,\n                \"Do not export and link files other than HTML/CSS/JS/fonts/images - eg. PDF, ZIP, etc. These files are enabled by default.\",\n                Some(\"false\"), true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--markdown-remove-links-and-images-from-single-file\", None, \"markdownRemoveLinksAndImagesFromSingleFile\", OptionType::Bool, false,\n                \"Remove links and images from the combined single markdown file. Useful for AI tools that don't need these elements.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--markdown-exclude-selector\", Some(\"-mes\"), \"markdownExcludeSelector\", OptionType::String, true,\n                \"Exclude some page content (DOM elements) from markdown export defined by CSS selectors like 'header', '.header', '#header', etc.\",\n                None, false, true, None,\n            ),\n            CrawlerOption::new(\n                \"--markdown-replace-content\", None, \"markdownReplaceContent\", OptionType::ReplaceContent, true,\n                \"Replace text content with `foo -> bar` or regexp in PREG format: `/card[0-9]/i -> card`\",\n                None, true, true, None,\n            ),\n            CrawlerOption::new(\n                \"--markdown-replace-query-string\", None, \"markdownReplaceQueryString\", OptionType::ReplaceContent, true,\n                \"Instead of using a short hash instead of a query string in the filename, just replace some characters. You can use simple format 'foo -> bar' or regexp in PREG format, e.g. '/([a-z]+)=([^&]*)(&|$)/i -> $1__$2'\",\n                None, true, true, None,\n            ),\n            CrawlerOption::new(\n                \"--markdown-export-store-only-url-regex\", None, \"markdownExportStoreOnlyUrlRegex\", OptionType::Regex, true,\n                \"For debug - when filled it will activate debug mode and store only URLs which match one of these PCRE regexes. Can be specified multiple times.\",\n                None, true, true, None,\n            ),\n            CrawlerOption::new(\n                \"--markdown-ignore-store-file-error\", None, \"markdownIgnoreStoreFileError\", OptionType::Bool, false,\n                \"Ignores any file storing errors. The export process will continue.\",\n                Some(\"false\"), false, false, None,\n            ),\n        ],\n    ));\n\n    // -------------------------------------------------------------------------\n    // Offline exporter options (OfflineWebsiteExporter)\n    // -------------------------------------------------------------------------\n    options.add_group(OptionGroup::new(\n        GROUP_OFFLINE_EXPORT_SETTINGS,\n        \"Offline exporter options\",\n        vec![\n            CrawlerOption::new(\n                \"--offline-export-dir\", Some(\"-oed\"), \"offlineExportDirectory\", OptionType::Dir, false,\n                \"Path to directory where to save the offline version of the website.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--offline-export-store-only-url-regex\", None, \"offlineExportStoreOnlyUrlRegex\", OptionType::Regex, true,\n                \"For debug - when filled it will activate debug mode and store only URLs which match one of these PCRE regexes. Can be specified multiple times.\",\n                None, true, true, None,\n            ),\n            CrawlerOption::new(\n                \"--offline-export-remove-unwanted-code\", None, \"offlineExportRemoveUnwantedCode\", OptionType::Bool, false,\n                \"Remove unwanted code for offline mode? Typically JS of the analytics, social networks, cookie consent, cross origins, etc.\",\n                Some(\"true\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--offline-export-no-auto-redirect-html\", None, \"offlineExportNoAutoRedirectHtml\", OptionType::Bool, false,\n                \"Disable automatic creation of redirect HTML files for subfolders that contain an index.html file. This solves situations for URLs where sometimes the URL ends with a slash, sometimes it doesn't.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--offline-export-preserve-url-structure\", None, \"offlineExportPreserveUrlStructure\", OptionType::Bool, false,\n                \"Preserve the original URL path structure. E.g. /about is stored as about/index.html instead of about.html. Useful for web server deployment.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--offline-export-preserve-urls\", None, \"offlineExportPreserveUrls\", OptionType::Bool, false,\n                \"Preserve original URL format in exported HTML/CSS/JS. Same-domain links become root-relative (/path), cross-domain links stay absolute. Useful when exported HTML is processed by tools that need production URLs.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--replace-content\", None, \"replaceContent\", OptionType::ReplaceContent, true,\n                \"Replace HTML/JS/CSS content with `foo -> bar` or regexp in PREG format: `/card[0-9]/i -> card`\",\n                None, true, true, None,\n            ),\n            CrawlerOption::new(\n                \"--replace-query-string\", None, \"replaceQueryString\", OptionType::ReplaceContent, true,\n                \"Instead of using a short hash instead of a query string in the filename, just replace some characters. You can use simple format 'foo -> bar' or regexp in PREG format, e.g. '/([a-z]+)=([^&]*)(&|$)/i -> $1__$2'\",\n                None, true, true, None,\n            ),\n            CrawlerOption::new(\n                \"--offline-export-lowercase\", None, \"offlineExportLowercase\", OptionType::Bool, false,\n                \"Convert all filenames to lowercase for offline export. Useful for case-insensitive filesystems.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--ignore-store-file-error\", None, \"ignoreStoreFileError\", OptionType::Bool, false,\n                \"Ignores any file storing errors. The export process will continue.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--disable-astro-inline-modules\", None, \"disableAstroInlineModules\", OptionType::Bool, false,\n                \"Disables inlining of Astro module scripts for offline export. Scripts will remain as external files with corrected relative paths.\",\n                Some(\"false\"), false, false, None,\n            ),\n        ],\n    ));\n\n    // -------------------------------------------------------------------------\n    // Sitemap options (SitemapExporter)\n    // -------------------------------------------------------------------------\n    options.add_group(OptionGroup::new(\n        GROUP_SITEMAP_SETTINGS,\n        \"Sitemap options\",\n        vec![\n            CrawlerOption::new(\n                \"--sitemap-xml-file\",\n                None,\n                \"outputSitemapXml\",\n                OptionType::File,\n                false,\n                \"Save sitemap to XML. `.xml` added if missing.\",\n                None,\n                true,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--sitemap-txt-file\",\n                None,\n                \"outputSitemapTxt\",\n                OptionType::File,\n                false,\n                \"Save sitemap to TXT. `.txt` added if missing.\",\n                None,\n                true,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--sitemap-base-priority\",\n                None,\n                \"sitemapBasePriority\",\n                OptionType::Float,\n                false,\n                \"Base priority for XML sitemap.\",\n                Some(\"0.5\"),\n                false,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--sitemap-priority-increase\",\n                None,\n                \"sitemapPriorityIncrease\",\n                OptionType::Float,\n                false,\n                \"Priority increase value based on slashes count in the URL\",\n                Some(\"0.1\"),\n                false,\n                false,\n                None,\n            ),\n        ],\n    ));\n\n    // -------------------------------------------------------------------------\n    // Upload options (UploadExporter)\n    // -------------------------------------------------------------------------\n    options.add_group(OptionGroup::new(\n        GROUP_UPLOAD_SETTINGS,\n        \"Upload options\",\n        vec![\n            CrawlerOption::new(\n                \"--upload\", Some(\"-up\"), \"uploadEnabled\", OptionType::Bool, false,\n                \"Enable HTML report upload to `--upload-to`.\",\n                Some(\"false\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--upload-to\", Some(\"-upt\"), \"uploadTo\", OptionType::Url, false,\n                \"URL of the endpoint where to send the HTML report.\",\n                Some(\"https://crawler.siteone.io/up\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--upload-retention\", Some(\"-upr\"), \"uploadRetention\", OptionType::String, false,\n                \"How long should the HTML report be kept in the online version? Values: 1h / 4h / 12h / 24h / 3d / 7d / 30d / 365d / forever\",\n                Some(\"30d\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--upload-password\", Some(\"-uppass\"), \"uploadPassword\", OptionType::String, false,\n                \"Optional password, which must be entered (the user will be 'crawler') to display the online HTML report.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--upload-timeout\", Some(\"-upti\"), \"uploadTimeout\", OptionType::Int, false,\n                \"Upload timeout in seconds.\",\n                Some(\"3600\"), false, false, None,\n            ),\n        ],\n    ));\n\n    // -------------------------------------------------------------------------\n    // Fastest URL analyzer (FastestAnalyzer)\n    // -------------------------------------------------------------------------\n    options.add_group(OptionGroup::new(\n        GROUP_FASTEST_ANALYZER,\n        \"Fastest URL analyzer\",\n        vec![\n            CrawlerOption::new(\n                \"--fastest-urls-top-limit\",\n                None,\n                \"fastestTopLimit\",\n                OptionType::Int,\n                false,\n                \"Number of URL addresses in TOP fastest URL addresses.\",\n                Some(\"20\"),\n                false,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--fastest-urls-max-time\",\n                None,\n                \"fastestMaxTime\",\n                OptionType::Float,\n                false,\n                \"The maximum response time for an URL address to be evaluated as fast.\",\n                Some(\"1\"),\n                false,\n                false,\n                None,\n            ),\n        ],\n    ));\n\n    // -------------------------------------------------------------------------\n    // SEO and OpenGraph analyzer (SeoAndOpenGraphAnalyzer)\n    // -------------------------------------------------------------------------\n    options.add_group(OptionGroup::new(\n        GROUP_SEO_AND_OPENGRAPH_ANALYZER,\n        \"SEO and OpenGraph analyzer\",\n        vec![CrawlerOption::new(\n            \"--max-heading-level\",\n            None,\n            \"maxHeadingLevel\",\n            OptionType::Int,\n            false,\n            \"Maximal analyzer heading level from 1 to 6.\",\n            Some(\"3\"),\n            false,\n            false,\n            Some(vec![\"1\".to_string(), \"6\".to_string()]),\n        )],\n    ));\n\n    // -------------------------------------------------------------------------\n    // Slowest URL analyzer (SlowestAnalyzer)\n    // -------------------------------------------------------------------------\n    options.add_group(OptionGroup::new(\n        GROUP_SLOWEST_ANALYZER,\n        \"Slowest URL analyzer\",\n        vec![\n            CrawlerOption::new(\n                \"--slowest-urls-top-limit\",\n                None,\n                \"slowestTopLimit\",\n                OptionType::Int,\n                false,\n                \"Number of URL addresses in TOP slowest URL addresses.\",\n                Some(\"20\"),\n                false,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--slowest-urls-min-time\",\n                None,\n                \"slowestMinTime\",\n                OptionType::Float,\n                false,\n                \"The minimum response time for an URL address to be added to TOP slow selection.\",\n                Some(\"0.01\"),\n                false,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--slowest-urls-max-time\",\n                None,\n                \"slowestMaxTime\",\n                OptionType::Float,\n                false,\n                \"The maximum response time for an URL address to be evaluated as very slow.\",\n                Some(\"3\"),\n                false,\n                false,\n                None,\n            ),\n        ],\n    ));\n\n    // -------------------------------------------------------------------------\n    // CI/CD settings\n    // -------------------------------------------------------------------------\n    options.add_group(OptionGroup::new(\n        GROUP_CI_CD_SETTINGS,\n        \"CI/CD settings\",\n        vec![\n            CrawlerOption::new(\n                \"--ci\",\n                None,\n                \"ci\",\n                OptionType::Bool,\n                false,\n                \"Enable CI/CD quality gate. Crawler exits with code 10 if thresholds are not met.\",\n                Some(\"false\"),\n                false,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--ci-min-score\",\n                None,\n                \"ciMinScore\",\n                OptionType::Float,\n                false,\n                \"Minimum overall quality score (0.0-10.0).\",\n                Some(\"5.0\"),\n                false,\n                false,\n                Some(vec![\"0.0\".into(), \"10.0\".into()]),\n            ),\n            CrawlerOption::new(\n                \"--ci-min-performance\",\n                None,\n                \"ciMinPerformance\",\n                OptionType::Float,\n                false,\n                \"Minimum Performance category score (0.0-10.0). Default value is `5`.\",\n                Some(\"5\"),\n                true,\n                false,\n                Some(vec![\"0.0\".into(), \"10.0\".into()]),\n            ),\n            CrawlerOption::new(\n                \"--ci-min-seo\",\n                None,\n                \"ciMinSeo\",\n                OptionType::Float,\n                false,\n                \"Minimum SEO category score (0.0-10.0). Default value is `5`.\",\n                Some(\"5\"),\n                true,\n                false,\n                Some(vec![\"0.0\".into(), \"10.0\".into()]),\n            ),\n            CrawlerOption::new(\n                \"--ci-min-security\",\n                None,\n                \"ciMinSecurity\",\n                OptionType::Float,\n                false,\n                \"Minimum Security category score (0.0-10.0). Default value is `5`.\",\n                Some(\"5\"),\n                true,\n                false,\n                Some(vec![\"0.0\".into(), \"10.0\".into()]),\n            ),\n            CrawlerOption::new(\n                \"--ci-min-accessibility\",\n                None,\n                \"ciMinAccessibility\",\n                OptionType::Float,\n                false,\n                \"Minimum Accessibility category score (0.0-10.0). Default value is `3`.\",\n                Some(\"3\"),\n                true,\n                false,\n                Some(vec![\"0.0\".into(), \"10.0\".into()]),\n            ),\n            CrawlerOption::new(\n                \"--ci-min-best-practices\",\n                None,\n                \"ciMinBestPractices\",\n                OptionType::Float,\n                false,\n                \"Minimum Best Practices category score (0.0-10.0). Default value is `5`.\",\n                Some(\"5\"),\n                true,\n                false,\n                Some(vec![\"0.0\".into(), \"10.0\".into()]),\n            ),\n            CrawlerOption::new(\n                \"--ci-max-404\",\n                None,\n                \"ciMax404\",\n                OptionType::Int,\n                false,\n                \"Maximum number of 404 responses allowed.\",\n                Some(\"0\"),\n                false,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--ci-max-5xx\",\n                None,\n                \"ciMax5xx\",\n                OptionType::Int,\n                false,\n                \"Maximum number of 5xx server error responses allowed.\",\n                Some(\"0\"),\n                false,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--ci-max-criticals\",\n                None,\n                \"ciMaxCriticals\",\n                OptionType::Int,\n                false,\n                \"Maximum number of critical analysis findings allowed.\",\n                Some(\"0\"),\n                false,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--ci-max-warnings\",\n                None,\n                \"ciMaxWarnings\",\n                OptionType::Int,\n                false,\n                \"Maximum number of warning analysis findings allowed.\",\n                None,\n                true,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--ci-max-avg-response\",\n                None,\n                \"ciMaxAvgResponse\",\n                OptionType::Float,\n                false,\n                \"Maximum average response time in seconds.\",\n                None,\n                true,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--ci-min-pages\",\n                None,\n                \"ciMinPages\",\n                OptionType::Int,\n                false,\n                \"Minimum number of HTML pages that must be found.\",\n                Some(\"10\"),\n                false,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--ci-min-assets\",\n                None,\n                \"ciMinAssets\",\n                OptionType::Int,\n                false,\n                \"Minimum number of assets (JS, CSS, images, fonts) that must be found.\",\n                Some(\"10\"),\n                false,\n                false,\n                None,\n            ),\n            CrawlerOption::new(\n                \"--ci-min-documents\",\n                None,\n                \"ciMinDocuments\",\n                OptionType::Int,\n                false,\n                \"Minimum number of documents (PDF, etc.) that must be found.\",\n                Some(\"0\"),\n                false,\n                false,\n                None,\n            ),\n        ],\n    ));\n\n    // -------------------------------------------------------------------------\n    // Server options (built-in HTTP server for serving exports)\n    // -------------------------------------------------------------------------\n    options.add_group(OptionGroup::new(\n        GROUP_SERVER_SETTINGS,\n        \"Server options\",\n        vec![\n            CrawlerOption::new(\n                \"--serve-markdown\", Some(\"-sm\"), \"serveMarkdownDirectory\", OptionType::Dir, false,\n                \"Start HTTP server to browse a markdown export directory. Renders .md files as styled HTML with table and accordion support. No crawling is performed.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--serve-offline\", Some(\"-so\"), \"serveOfflineDirectory\", OptionType::Dir, false,\n                \"Start HTTP server to browse an offline HTML export directory. Serves files with Content-Security-Policy restricting to same origin. No crawling is performed.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--serve-port\", Some(\"-sport\"), \"servePort\", OptionType::Int, false,\n                \"Port for the built-in HTTP server (used with --serve-markdown or --serve-offline).\",\n                Some(\"8321\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--serve-bind-address\", Some(\"-sba\"), \"serveBindAddress\", OptionType::String, false,\n                \"Bind address for the built-in HTTP server. Default is 127.0.0.1 (localhost only). Use 0.0.0.0 to listen on all network interfaces.\",\n                Some(\"127.0.0.1\"), false, false, None,\n            ),\n            CrawlerOption::new(\n                \"--html-to-markdown\", Some(\"-htm\"), \"htmlToMarkdownFile\", OptionType::String, false,\n                \"Convert a local HTML file to Markdown and print to stdout. Uses the same pipeline as --markdown-export-dir. Respects --markdown-disable-images, --markdown-disable-files, --markdown-move-content-before-h1-to-end, and --markdown-exclude-selector. No crawling is performed.\",\n                None, true, false, None,\n            ),\n            CrawlerOption::new(\n                \"--html-to-markdown-output\", Some(\"-htmo\"), \"htmlToMarkdownOutput\", OptionType::String, false,\n                \"Output file path for --html-to-markdown. If not set, markdown is printed to stdout.\",\n                None, true, false, None,\n            ),\n        ],\n    ));\n\n    options\n}\n\n/// Parse CLI arguments (raw argv) into a fully populated CoreOptions.\n/// Read config file and return its lines as CLI-style arguments.\n/// Config file format: one argument per line, `#` for comments, blank lines ignored.\n/// Example:\n///   --workers=5\n///   --max-reqs-per-sec=20\n///   # This is a comment\n///   --output=json\nfn read_config_file(path: &str) -> Result<Vec<String>, CrawlerError> {\n    let content = std::fs::read_to_string(path)\n        .map_err(|e| CrawlerError::Config(format!(\"Cannot read config file '{}': {}\", path, e)))?;\n    let args: Vec<String> = content\n        .lines()\n        .map(|line| line.trim())\n        .filter(|line| !line.is_empty() && !line.starts_with('#'))\n        .map(|line| line.to_string())\n        .collect();\n    Ok(args)\n}\n\n/// Load config from file: --config-file=PATH, ~/.siteone-crawler.conf, or /etc/siteone-crawler.conf.\n/// Returns merged argv with config args prepended (CLI args take precedence).\nfn merge_config_file_args(argv: &[String]) -> Result<Vec<String>, CrawlerError> {\n    // Extract --config-file from argv\n    let mut config_path: Option<String> = None;\n    for arg in argv {\n        if let Some(path) = arg.strip_prefix(\"--config-file=\") {\n            config_path = Some(path.to_string());\n            break;\n        }\n    }\n\n    // If no explicit config file, try auto-discovery\n    if config_path.is_none() {\n        let home_conf = std::env::var(\"HOME\")\n            .ok()\n            .map(|h| format!(\"{}/.siteone-crawler.conf\", h));\n        let candidates = [home_conf, Some(\"/etc/siteone-crawler.conf\".to_string())];\n        for candidate in candidates.iter().flatten() {\n            if std::path::Path::new(candidate).exists() {\n                config_path = Some(candidate.clone());\n                break;\n            }\n        }\n    }\n\n    if let Some(ref path) = config_path {\n        let config_args = read_config_file(path)?;\n        // Merge: config args first, then real argv (CLI overrides config)\n        // Filter out --config-file from real argv\n        let real_args: Vec<String> = argv\n            .iter()\n            .filter(|a| !a.starts_with(\"--config-file=\"))\n            .cloned()\n            .collect();\n        let mut merged = Vec::new();\n        if !real_args.is_empty() {\n            merged.push(real_args[0].clone()); // binary name\n        }\n        merged.extend(config_args);\n        if real_args.len() > 1 {\n            merged.extend_from_slice(&real_args[1..]);\n        }\n        Ok(merged)\n    } else {\n        Ok(argv.to_vec())\n    }\n}\n\n/// This is the main entry point for option parsing.\npub fn parse_argv(argv: &[String]) -> Result<CoreOptions, CrawlerError> {\n    // Merge config file args with CLI args (CLI takes precedence)\n    let merged_argv = merge_config_file_args(argv)?;\n    let argv = &merged_argv;\n\n    let mut options = get_options();\n\n    // Collect all known option names and alt names for unknown detection\n    let mut known_options: Vec<String> = Vec::new();\n    let mut bool_options: std::collections::HashSet<String> = std::collections::HashSet::new();\n    for (_apl_code, group) in options.get_groups() {\n        for (_prop_name, option) in &group.options {\n            known_options.push(option.name.clone());\n            if matches!(option.option_type, OptionType::Bool) {\n                bool_options.insert(option.name.clone());\n            }\n            if let Some(ref alt) = option.alt_name {\n                known_options.push(alt.clone());\n                if matches!(option.option_type, OptionType::Bool) {\n                    bool_options.insert(alt.clone());\n                }\n            }\n        }\n    }\n    // Also accept --config-file as known\n    known_options.push(\"--config-file\".to_string());\n\n    // Check for unknown options\n    let mut unknown_options: Vec<String> = Vec::new();\n    let mut i = 0;\n    while i < argv.len() {\n        let arg = argv[i].trim();\n        if arg.is_empty() || arg.starts_with('#') {\n            i += 1;\n            continue;\n        }\n        // Skip the program name (first argv element or any non-option arg)\n        if !arg.starts_with('-') {\n            // Check if this is a value consumed by a previous space-separated option\n            // (non-option args that aren't the script name are potentially unknown)\n            if i > 0 {\n                // Check if previous arg was a known option that could consume this as value\n                let prev = &argv[i - 1];\n                let prev_name = prev.split('=').next().unwrap_or(prev);\n                let is_prev_known_non_bool = known_options.iter().any(|k| k == prev_name) && !prev.contains('=');\n                if !is_prev_known_non_bool {\n                    // Not a consumed value — could be unknown, but skip argv[0] (binary name)\n                    // We just skip non-dash args silently (they might be the binary path)\n                }\n            }\n            i += 1;\n            continue;\n        }\n        // Extract option name without value (strip =...)\n        let arg_without_value = if let Some(eq_pos) = arg.find('=') {\n            &arg[..eq_pos]\n        } else {\n            arg\n        };\n        if !known_options.iter().any(|k| k == arg_without_value) {\n            unknown_options.push(arg.to_string());\n        } else if !arg.contains('=') && !bool_options.contains(arg_without_value) {\n            // Known non-bool option without '=' — the next token is its value, skip it\n            i += 1;\n        }\n        i += 1;\n    }\n    if !unknown_options.is_empty() {\n        return Err(CrawlerError::Config(format!(\n            \"Unknown options: {}\",\n            unknown_options.join(\", \")\n        )));\n    }\n\n    // Parse all options from argv\n    for (_apl_code, group) in options.get_groups_mut() {\n        for (_prop_name, option) in group.options.iter_mut() {\n            option.set_value_from_argv(argv)?;\n\n            // Set domain for use in file/dir %domain% placeholder\n            if option.property_to_fill == \"url\"\n                && let Ok(value) = option.get_value()\n                && let Some(url_str) = value.as_str()\n                && let Ok(parsed) = url::Url::parse(url_str)\n            {\n                CrawlerOption::set_extras_domain(parsed.host_str());\n            }\n        }\n    }\n\n    CoreOptions::from_options(&options)\n}\n\n/// Generate help text for all options, organized by groups.\npub fn get_help_text() -> String {\n    use crate::options::option_type::OptionType;\n    use crate::utils;\n\n    let options = get_options();\n    let mut help = String::new();\n\n    for (_apl_code, group) in options.get_groups() {\n        let group_label = format!(\"{}:\", group.name);\n        let dashes = \"-\".repeat(group_label.len());\n        help.push_str(&format!(\n            \"{}\\n{}\\n\",\n            utils::get_color_text(&group_label, \"yellow\", false),\n            utils::get_color_text(&dashes, \"yellow\", false),\n        ));\n\n        for (_prop_name, option) in &group.options {\n            // Build option name with type suffix\n            let type_suffix = match option.option_type {\n                OptionType::Int => \"=<int>\",\n                OptionType::String | OptionType::Float | OptionType::ReplaceContent => \"=<val>\",\n                OptionType::SizeMG => \"=<size>\",\n                OptionType::Regex => \"=<regex>\",\n                OptionType::Email => \"=<email>\",\n                OptionType::Url => \"=<url>\",\n                OptionType::File => \"=<file>\",\n                OptionType::Dir => \"=<dir>\",\n                OptionType::HostAndPort => \"=<host:port>\",\n                OptionType::Resolve => \"=<domain:port:ip>\",\n                OptionType::Bool => \"\",\n            };\n            let name_and_value = format!(\"{}{}\", option.name, type_suffix);\n\n            // Description: trim trailing '. ' then append '.'\n            let desc = option.description.trim_end_matches(['.', ' ']);\n            let desc_with_period = format!(\"{}.\", desc);\n\n            // Default value display logic:\n            // Bool options with default false don't show a default.\n            // Bool options with default true show as \"1\".\n            let default_info = match option.default_value {\n                Some(ref dv) if !dv.is_empty() && !desc_with_period.contains(\"Default\") => {\n                    if option.option_type == OptionType::Bool {\n                        // true displays as \"1\", false is not shown\n                        if dv == \"true\" || dv == \"1\" {\n                            \" Default value is `1`.\".to_string()\n                        } else {\n                            String::new()\n                        }\n                    } else {\n                        format!(\" Default value is `{}`.\", dv)\n                    }\n                }\n                _ => String::new(),\n            };\n\n            // Ensure at least one space between name+type and description\n            let padded = if name_and_value.len() >= 33 {\n                format!(\"{} \", name_and_value)\n            } else {\n                format!(\"{:<33}\", name_and_value)\n            };\n\n            help.push_str(&format!(\"{}{}{}\\n\", padded, desc_with_period, default_info));\n        }\n\n        help.push('\\n');\n    }\n\n    help\n}\n\n/// Parse a human-readable duration string (e.g. \"24h\", \"7d\", \"30m\", \"3600s\", \"3600\") to seconds.\nfn parse_duration_to_secs(s: &str) -> u64 {\n    let s = s.trim();\n    if let Some(num) = s.strip_suffix('d') {\n        num.parse::<u64>().unwrap_or(1) * 86400\n    } else if let Some(num) = s.strip_suffix('h') {\n        num.parse::<u64>().unwrap_or(1) * 3600\n    } else if let Some(num) = s.strip_suffix('m') {\n        num.parse::<u64>().unwrap_or(1) * 60\n    } else if let Some(num) = s.strip_suffix('s') {\n        num.parse::<u64>().unwrap_or(0)\n    } else {\n        // Plain number = seconds\n        s.parse::<u64>().unwrap_or(86400)\n    }\n}\n\n/// Returns the platform-appropriate default HTTP cache directory.\n/// Uses dirs::cache_dir() for XDG/macOS/Windows compliance:\n///   Linux:   ~/.cache/siteone-crawler/http-cache\n///   macOS:   ~/Library/Caches/siteone-crawler/http-cache\n///   Windows: C:\\Users\\<user>\\AppData\\Local\\siteone-crawler\\http-cache\n/// Falls back to \"tmp/http-client-cache\" if system cache dir is unavailable.\nfn default_http_cache_dir() -> String {\n    dirs::cache_dir()\n        .map(|p| {\n            p.join(\"siteone-crawler\")\n                .join(\"http-cache\")\n                .to_string_lossy()\n                .to_string()\n        })\n        .unwrap_or_else(|| \"tmp/http-client-cache\".to_string())\n}\n\n/// Returns the default output directory prefix for reports and result storage.\n/// Tries `./tmp/` in CWD first; if it can't be created (e.g. read-only filesystem),\n/// falls back to `dirs::data_local_dir()/siteone-crawler/` (platform-appropriate).\n/// Result is cached via OnceLock so the notice is printed at most once.\nfn default_output_prefix() -> String {\n    static PREFIX: std::sync::OnceLock<String> = std::sync::OnceLock::new();\n    PREFIX\n        .get_or_init(|| {\n            let tmp_path = std::path::Path::new(\"tmp\");\n            if tmp_path.is_dir() || std::fs::create_dir_all(tmp_path).is_ok() {\n                return \"tmp\".to_string();\n            }\n            if let Some(data_dir) = dirs::data_local_dir() {\n                let fallback = data_dir.join(\"siteone-crawler\");\n                if fallback.is_dir() || std::fs::create_dir_all(&fallback).is_ok() {\n                    let path = fallback.to_string_lossy().to_string();\n                    eprintln!(\n                        \"Notice: Cannot create ./tmp/ in current directory. Output files will be stored in: {}\",\n                        path\n                    );\n                    return path;\n                }\n            }\n            // Last resort — use tmp and let it fail later with a clear error\n            \"tmp\".to_string()\n        })\n        .clone()\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n    use crate::options::option::OptionValue;\n\n    fn make_default_core_options() -> CoreOptions {\n        CoreOptions {\n            url: \"https://test.com\".to_string(),\n            single_page: false,\n            max_depth: 0,\n            device: DeviceType::Desktop,\n            user_agent: None,\n            timeout: 5,\n            proxy: None,\n            http_auth: None,\n            accept_invalid_certs: false,\n            timezone: None,\n            show_version_only: false,\n            show_help_only: false,\n            output_type: OutputType::Text,\n            url_column_size: None,\n            show_inline_criticals: false,\n            show_inline_warnings: false,\n            rows_limit: 200,\n            extra_columns: Vec::new(),\n            extra_columns_names_only: Vec::new(),\n            show_scheme_and_host: false,\n            do_not_truncate_url: false,\n            hide_progress_bar: false,\n            hide_columns: Vec::new(),\n            no_color: false,\n            force_color: false,\n            console_width: None,\n            disable_all_assets: false,\n            disable_javascript: false,\n            disable_styles: false,\n            disable_fonts: false,\n            disable_images: false,\n            disable_files: false,\n            remove_all_anchor_listeners: false,\n            workers: 3,\n            max_reqs_per_sec: 10.0,\n            memory_limit: \"2048M\".to_string(),\n            resolve: Vec::new(),\n            websocket_server: None,\n            ignore_robots_txt: false,\n            allowed_domains_for_external_files: Vec::new(),\n            allowed_domains_for_crawling: Vec::new(),\n            single_foreign_page: false,\n            result_storage: StorageType::Memory,\n            result_storage_dir: \"tmp/result-storage\".to_string(),\n            result_storage_compression: false,\n            accept_encoding: \"gzip, deflate, br\".to_string(),\n            max_queue_length: 9000,\n            max_visited_urls: 10000,\n            max_url_length: 2083,\n            max_skipped_urls: 10000,\n            max_non200_responses_per_basename: 5,\n            include_regex: Vec::new(),\n            ignore_regex: Vec::new(),\n            regex_filtering_only_for_pages: false,\n            analyzer_filter_regex: None,\n            add_random_query_params: false,\n            remove_query_params: false,\n            keep_query_params: Vec::new(),\n            transform_url: Vec::new(),\n            force_relative_urls: false,\n            output_html_report: None,\n            html_report_options: None,\n            output_json_file: None,\n            output_text_file: None,\n            add_host_to_output_file: false,\n            add_timestamp_to_output_file: false,\n            sitemap_xml_file: None,\n            sitemap_txt_file: None,\n            sitemap_base_priority: 0.5,\n            sitemap_priority_increase: 0.1,\n            offline_export_dir: None,\n            offline_export_store_only_url_regex: Vec::new(),\n            offline_export_remove_unwanted_code: true,\n            offline_export_no_auto_redirect_html: false,\n            offline_export_preserve_url_structure: false,\n            offline_export_preserve_urls: false,\n            replace_content: Vec::new(),\n            replace_query_string: Vec::new(),\n            offline_export_lowercase: false,\n            ignore_store_file_error: false,\n            disable_astro_inline_modules: false,\n            markdown_export_dir: None,\n            markdown_export_single_file: None,\n            markdown_move_content_before_h1_to_end: false,\n            markdown_disable_images: false,\n            markdown_disable_files: false,\n            markdown_remove_links_and_images_from_single_file: false,\n            markdown_exclude_selector: Vec::new(),\n            markdown_replace_content: Vec::new(),\n            markdown_replace_query_string: Vec::new(),\n            markdown_export_store_only_url_regex: Vec::new(),\n            markdown_ignore_store_file_error: false,\n            mail_to: Vec::new(),\n            mail_from: \"test@test.com\".to_string(),\n            mail_from_name: \"Test\".to_string(),\n            mail_subject_template: \"Test\".to_string(),\n            mail_smtp_host: \"localhost\".to_string(),\n            mail_smtp_port: 25,\n            mail_smtp_user: None,\n            mail_smtp_pass: None,\n            upload_enabled: false,\n            upload_to: String::new(),\n            upload_retention: \"30d\".to_string(),\n            upload_password: None,\n            upload_timeout: 3600,\n            http_cache_dir: None,\n            http_cache_compression: false,\n            http_cache_ttl: None,\n            debug: false,\n            debug_log_file: None,\n            debug_url_regex: Vec::new(),\n            fastest_top_limit: 20,\n            fastest_max_time: 1.0,\n            max_heading_level: 3,\n            slowest_top_limit: 20,\n            slowest_min_time: 0.01,\n            slowest_max_time: 3.0,\n            serve_markdown_dir: None,\n            serve_offline_dir: None,\n            serve_port: 8321,\n            serve_bind_address: \"127.0.0.1\".to_string(),\n            html_to_markdown_file: None,\n            html_to_markdown_output: None,\n            ci: false,\n            ci_min_score: 5.0,\n            ci_min_performance: Some(5.0),\n            ci_min_seo: Some(5.0),\n            ci_min_security: Some(5.0),\n            ci_min_accessibility: Some(3.0),\n            ci_min_best_practices: Some(5.0),\n            ci_max_404: 0,\n            ci_max_5xx: 0,\n            ci_max_criticals: 0,\n            ci_max_warnings: None,\n            ci_max_avg_response: None,\n            ci_min_pages: 10,\n            ci_min_assets: 10,\n            ci_min_documents: 0,\n        }\n    }\n\n    #[test]\n    fn ci_defaults() {\n        let opts = make_default_core_options();\n        assert!(!opts.ci);\n        assert_eq!(opts.ci_min_score, 5.0);\n        assert_eq!(opts.ci_max_404, 0);\n        assert_eq!(opts.ci_max_5xx, 0);\n        assert_eq!(opts.ci_max_criticals, 0);\n    }\n\n    #[test]\n    fn apply_ci_bool() {\n        let mut opts = make_default_core_options();\n        opts.apply_option_value(\"ci\", &OptionValue::Bool(true)).unwrap();\n        assert!(opts.ci);\n    }\n\n    #[test]\n    fn apply_ci_min_score() {\n        let mut opts = make_default_core_options();\n        opts.apply_option_value(\"ciMinScore\", &OptionValue::Float(7.5)).unwrap();\n        assert_eq!(opts.ci_min_score, 7.5);\n    }\n\n    #[test]\n    fn apply_ci_max_404() {\n        let mut opts = make_default_core_options();\n        opts.apply_option_value(\"ciMax404\", &OptionValue::Int(5)).unwrap();\n        assert_eq!(opts.ci_max_404, 5);\n    }\n\n    #[test]\n    fn apply_ci_max_warnings() {\n        let mut opts = make_default_core_options();\n        opts.apply_option_value(\"ciMaxWarnings\", &OptionValue::Int(10)).unwrap();\n        assert_eq!(opts.ci_max_warnings, Some(10));\n    }\n\n    #[test]\n    fn apply_ci_max_avg_response() {\n        let mut opts = make_default_core_options();\n        opts.apply_option_value(\"ciMaxAvgResponse\", &OptionValue::Float(2.0))\n            .unwrap();\n        assert_eq!(opts.ci_max_avg_response, Some(2.0));\n    }\n\n    #[test]\n    fn apply_unknown_key_no_error() {\n        let mut opts = make_default_core_options();\n        let result = opts.apply_option_value(\"nonExistent\", &OptionValue::Bool(true));\n        assert!(result.is_ok());\n    }\n\n    #[test]\n    fn ci_option_group_exists() {\n        let options = get_options();\n        let group = options.get_group(GROUP_CI_CD_SETTINGS);\n        assert!(group.is_some());\n        let group = group.unwrap();\n        assert_eq!(group.options.len(), 15);\n    }\n\n    // ---- Duration parsing tests ----\n\n    #[test]\n    fn parse_duration_days() {\n        assert_eq!(parse_duration_to_secs(\"7d\"), 7 * 86400);\n    }\n\n    #[test]\n    fn parse_duration_hours() {\n        assert_eq!(parse_duration_to_secs(\"24h\"), 24 * 3600);\n    }\n\n    #[test]\n    fn parse_duration_minutes() {\n        assert_eq!(parse_duration_to_secs(\"30m\"), 30 * 60);\n    }\n\n    #[test]\n    fn parse_duration_seconds() {\n        assert_eq!(parse_duration_to_secs(\"3600s\"), 3600);\n        assert_eq!(parse_duration_to_secs(\"3600\"), 3600);\n    }\n\n    #[test]\n    fn parse_duration_invalid_number() {\n        // \"abcd\" suffix 'd' → parse \"abc\" fails → fallback to 1 day\n        assert_eq!(parse_duration_to_secs(\"abcd\"), 86400);\n    }\n\n    // ---- Config file parsing tests ----\n\n    #[test]\n    fn read_config_file_parses_args() {\n        let dir = std::env::temp_dir();\n        let path = dir.join(\"test_crawler_config_1.conf\");\n        std::fs::write(&path, \"--workers=5\\n--max-reqs-per-sec=20\\n\").unwrap();\n        let args = read_config_file(path.to_str().unwrap()).unwrap();\n        assert_eq!(args, vec![\"--workers=5\", \"--max-reqs-per-sec=20\"]);\n        std::fs::remove_file(&path).ok();\n    }\n\n    #[test]\n    fn read_config_file_ignores_comments_and_blank_lines() {\n        let dir = std::env::temp_dir();\n        let path = dir.join(\"test_crawler_config_2.conf\");\n        std::fs::write(&path, \"# comment\\n\\n--workers=3\\n  # another comment\\n  \\n--debug\\n\").unwrap();\n        let args = read_config_file(path.to_str().unwrap()).unwrap();\n        assert_eq!(args, vec![\"--workers=3\", \"--debug\"]);\n        std::fs::remove_file(&path).ok();\n    }\n\n    #[test]\n    fn read_config_file_nonexistent_returns_error() {\n        let result = read_config_file(\"/nonexistent/path/config.conf\");\n        assert!(result.is_err());\n    }\n\n    #[test]\n    fn merge_config_file_args_with_explicit_config() {\n        let dir = std::env::temp_dir();\n        let path = dir.join(\"test_crawler_config_3.conf\");\n        std::fs::write(&path, \"--workers=5\\n--debug\\n\").unwrap();\n        let argv = vec![\n            \"siteone-crawler\".to_string(),\n            format!(\"--config-file={}\", path.display()),\n            \"--url=https://example.com\".to_string(),\n        ];\n        let merged = merge_config_file_args(&argv).unwrap();\n        // Config args prepended after binary name, CLI args follow\n        assert_eq!(merged[0], \"siteone-crawler\");\n        assert!(merged.contains(&\"--workers=5\".to_string()));\n        assert!(merged.contains(&\"--debug\".to_string()));\n        assert!(merged.contains(&\"--url=https://example.com\".to_string()));\n        // --config-file itself should be filtered out\n        assert!(!merged.iter().any(|a| a.starts_with(\"--config-file=\")));\n        std::fs::remove_file(&path).ok();\n    }\n\n    #[test]\n    fn merge_config_file_args_without_config() {\n        let argv = vec![\"siteone-crawler\".to_string(), \"--url=https://example.com\".to_string()];\n        let merged = merge_config_file_args(&argv).unwrap();\n        // No config file exists, so argv is returned as-is\n        assert_eq!(merged, argv);\n    }\n\n    // ---- New option apply tests for recent features ----\n\n    #[test]\n    fn apply_force_relative_urls() {\n        let mut opts = make_default_core_options();\n        assert!(!opts.force_relative_urls);\n        opts.apply_option_value(\"forceRelativeUrls\", &OptionValue::Bool(true))\n            .unwrap();\n        assert!(opts.force_relative_urls);\n    }\n\n    #[test]\n    fn apply_offline_export_preserve_url_structure() {\n        let mut opts = make_default_core_options();\n        assert!(!opts.offline_export_preserve_url_structure);\n        opts.apply_option_value(\"offlineExportPreserveUrlStructure\", &OptionValue::Bool(true))\n            .unwrap();\n        assert!(opts.offline_export_preserve_url_structure);\n    }\n\n    #[test]\n    fn apply_offline_export_preserve_urls() {\n        let mut opts = make_default_core_options();\n        assert!(!opts.offline_export_preserve_urls);\n        opts.apply_option_value(\"offlineExportPreserveUrls\", &OptionValue::Bool(true))\n            .unwrap();\n        assert!(opts.offline_export_preserve_urls);\n    }\n}\n"
  },
  {
    "path": "src/options/group.rs",
    "content": "// SiteOne Crawler - Option group for organizing options\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n//\r\n\r\nuse indexmap::IndexMap;\r\n\r\nuse super::option::CrawlerOption;\r\n\r\n#[derive(Debug, Clone)]\r\npub struct OptionGroup {\r\n    /// Unique application code for the group\r\n    pub apl_code: String,\r\n\r\n    /// Readable name for the group\r\n    pub name: String,\r\n\r\n    /// Options indexed by property_to_fill name\r\n    pub options: IndexMap<String, CrawlerOption>,\r\n}\r\n\r\nimpl OptionGroup {\r\n    pub fn new(apl_code: &str, name: &str, options: Vec<CrawlerOption>) -> Self {\r\n        let mut options_map = IndexMap::new();\r\n        for option in options {\r\n            options_map.insert(option.property_to_fill.clone(), option);\r\n        }\r\n\r\n        Self {\r\n            apl_code: apl_code.to_string(),\r\n            name: name.to_string(),\r\n            options: options_map,\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "src/options/mod.rs",
    "content": "// SiteOne Crawler - Options module\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n//\r\n// CLI option definitions and parsing\r\n\r\npub mod core_options;\r\npub mod group;\r\npub mod option;\r\npub mod option_type;\r\n#[allow(clippy::module_inception)]\r\npub mod options;\r\n"
  },
  {
    "path": "src/options/option.rs",
    "content": "// SiteOne Crawler - Option definition and value parsing\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n\nuse std::sync::Mutex;\n\nuse regex::Regex;\n\nuse crate::error::CrawlerError;\nuse crate::utils;\n\nuse super::option_type::OptionType;\n\nstatic EXTRAS_DOMAIN: Mutex<Option<String>> = Mutex::new(None);\n\n#[derive(Debug, Clone)]\npub enum OptionValue {\n    None,\n    Bool(bool),\n    Int(i64),\n    Float(f64),\n    Str(String),\n    Array(Vec<String>),\n}\n\nimpl OptionValue {\n    pub fn as_bool(&self) -> Option<bool> {\n        match self {\n            OptionValue::Bool(v) => Some(*v),\n            _ => None,\n        }\n    }\n\n    pub fn as_int(&self) -> Option<i64> {\n        match self {\n            OptionValue::Int(v) => Some(*v),\n            _ => None,\n        }\n    }\n\n    pub fn as_float(&self) -> Option<f64> {\n        match self {\n            OptionValue::Float(v) => Some(*v),\n            _ => None,\n        }\n    }\n\n    pub fn as_str(&self) -> Option<&str> {\n        match self {\n            OptionValue::Str(v) => Some(v.as_str()),\n            _ => None,\n        }\n    }\n\n    pub fn as_array(&self) -> Option<&Vec<String>> {\n        match self {\n            OptionValue::Array(v) => Some(v),\n            _ => None,\n        }\n    }\n\n    pub fn is_none(&self) -> bool {\n        matches!(self, OptionValue::None)\n    }\n}\n\n#[derive(Debug, Clone)]\npub struct CrawlerOption {\n    /// Option name with '--' prefix, for example \"--user-agent\"\n    pub name: String,\n\n    /// Optional alternative (short) name with '-', for example \"-ua\" for \"--user-agent\"\n    pub alt_name: Option<String>,\n\n    /// Property name to fill in CoreOptions struct\n    pub property_to_fill: String,\n\n    /// Option value type\n    pub option_type: OptionType,\n\n    /// Is array of comma delimited values\n    pub is_array: bool,\n\n    /// Description for help\n    pub description: String,\n\n    /// Default value as string representation\n    pub default_value: Option<String>,\n\n    /// Whether the value can be null/empty\n    pub is_nullable: bool,\n\n    /// Whether the option can be specified multiple times\n    pub callable_multiple_times: bool,\n\n    /// Optional extras (e.g. min/max range for numeric types)\n    pub extras: Option<Vec<String>>,\n\n    /// Parsed value from argv\n    value: Option<OptionValue>,\n\n    /// Whether value has been set from argv\n    is_value_set: bool,\n\n    /// Whether the user explicitly provided this option on the command line\n    /// (as opposed to using the default value)\n    is_explicitly_set: bool,\n}\n\nimpl CrawlerOption {\n    #[allow(clippy::too_many_arguments)]\n    pub fn new(\n        name: &str,\n        alt_name: Option<&str>,\n        property_to_fill: &str,\n        option_type: OptionType,\n        is_array: bool,\n        description: &str,\n        default_value: Option<&str>,\n        is_nullable: bool,\n        callable_multiple_times: bool,\n        extras: Option<Vec<String>>,\n    ) -> Self {\n        Self {\n            name: name.to_string(),\n            alt_name: alt_name.map(|s| s.to_string()),\n            property_to_fill: property_to_fill.to_string(),\n            option_type,\n            is_array,\n            description: description.to_string(),\n            default_value: default_value.map(|s| s.to_string()),\n            is_nullable,\n            callable_multiple_times,\n            extras,\n            value: None,\n            is_value_set: false,\n            is_explicitly_set: false,\n        }\n    }\n\n    pub fn set_value_from_argv(&mut self, argv: &[String]) -> Result<(), CrawlerError> {\n        if self.is_value_set {\n            return Err(CrawlerError::Config(format!(\n                \"Value for option {} is already set. Did you call set_value_from_argv() twice?\",\n                self.name\n            )));\n        }\n\n        let mut value: Option<String> = self.default_value.clone();\n        let mut array_values: Vec<String> = if self.is_array {\n            if let Some(ref dv) = self.default_value {\n                if dv.is_empty() { Vec::new() } else { vec![dv.clone()] }\n            } else {\n                Vec::new()\n            }\n        } else {\n            Vec::new()\n        };\n        let mut has_default_been_replaced = false;\n        let mut defined_by_alt_name = false;\n\n        // Find value in arguments\n        let mut i = 0;\n        while i < argv.len() {\n            let arg = &argv[i];\n            let mut arg_value: Option<String> = None;\n\n            if arg == &self.name || self.alt_name.as_deref() == Some(arg.as_str()) {\n                if self.option_type == OptionType::Bool {\n                    // Flag-style: --debug or -d (no value, implies true)\n                    arg_value = Some(\"true\".to_string());\n                } else {\n                    // Non-bool option without '=': look for value in next argument\n                    if i + 1 < argv.len() && !argv[i + 1].starts_with('-') {\n                        i += 1;\n                        arg_value = Some(argv[i].clone());\n                    } else {\n                        // No value provided — set to empty so validation catches it\n                        arg_value = Some(String::new());\n                    }\n                }\n            } else if let Some(rest) = arg.strip_prefix(&format!(\"{}=\", self.name)) {\n                arg_value = Some(rest.to_string());\n            } else if let Some(ref alt) = self.alt_name\n                && let Some(rest) = arg.strip_prefix(&format!(\"{}=\", alt))\n            {\n                arg_value = Some(rest.to_string());\n                defined_by_alt_name = true;\n            }\n\n            if let Some(ref mut av) = arg_value {\n                self.is_explicitly_set = true;\n                unquote_value(av);\n\n                if self.is_array {\n                    if !has_default_been_replaced {\n                        // First user-provided value replaces the default\n                        array_values.clear();\n                        has_default_been_replaced = true;\n                    }\n                    if av.contains(',') {\n                        let parts: Vec<String> = av\n                            .split(',')\n                            .map(|s| s.trim().to_string())\n                            .filter(|s| !s.is_empty())\n                            .map(|mut s| {\n                                unquote_value(&mut s);\n                                s\n                            })\n                            .collect();\n                        array_values.extend(parts);\n                    } else {\n                        array_values.push(av.clone());\n                    }\n                } else {\n                    value = Some(av.clone());\n                }\n            }\n            i += 1;\n        }\n\n        // Handle array default from string\n        if self.is_array\n            && let Some(ref v) = value\n            && !v.is_empty()\n            && array_values.is_empty()\n        {\n            let mut unquoted = v.clone();\n            unquote_value(&mut unquoted);\n            let parts: Vec<String> = unquoted\n                .split(',')\n                .map(|s| s.trim().to_string())\n                .filter(|s| !s.is_empty())\n                .map(|mut s| {\n                    unquote_value(&mut s);\n                    s\n                })\n                .collect();\n            array_values = parts;\n        }\n\n        // Validate and correct types\n        if self.is_array {\n            for item in &array_values {\n                self.validate_value(Some(item), defined_by_alt_name)?;\n            }\n            // Filter out empty strings\n            let filtered: Vec<String> = array_values.into_iter().filter(|s| !s.trim().is_empty()).collect();\n            self.value = Some(OptionValue::Array(filtered));\n        } else {\n            self.validate_value(value.as_deref(), defined_by_alt_name)?;\n            self.value = Some(self.correct_value_type(value.as_deref())?);\n        }\n\n        self.is_value_set = true;\n        Ok(())\n    }\n\n    pub fn is_explicitly_set(&self) -> bool {\n        self.is_explicitly_set\n    }\n\n    pub fn get_value(&self) -> Result<&OptionValue, CrawlerError> {\n        if !self.is_value_set {\n            return Err(CrawlerError::Config(format!(\n                \"Value for option {} is not set. Did you call set_value_from_argv()?\",\n                self.name\n            )));\n        }\n        match &self.value {\n            Some(v) => Ok(v),\n            None => Err(CrawlerError::Config(format!(\n                \"Value for option {} is not set\",\n                self.name\n            ))),\n        }\n    }\n\n    fn validate_value(&self, value: Option<&str>, _defined_by_alt_name: bool) -> Result<(), CrawlerError> {\n        // Always use the long name for error messages\n        let display_name = &self.name;\n\n        // Handle nullable\n        if self.is_nullable && (value.is_none() || value == Some(\"\")) {\n            return Ok(());\n        }\n\n        let val = match value {\n            Some(v) => v,\n            None => {\n                if !self.is_nullable {\n                    // URL type gives specific error, not generic \"is required\"\n                    if self.option_type == OptionType::Url {\n                        return Err(CrawlerError::Config(format!(\n                            \"Option {} must be valid URL (starting with http:// or https://)\",\n                            display_name\n                        )));\n                    }\n                    return Err(CrawlerError::Config(format!(\"Option {} is required\", display_name)));\n                }\n                return Ok(());\n            }\n        };\n\n        match self.option_type {\n            OptionType::Int => {\n                let parsed: Result<i64, _> = val.parse();\n                match parsed {\n                    Ok(n) if n < 0 => {\n                        return Err(CrawlerError::Config(format!(\n                            \"Option {} ({}) must be positive integer\",\n                            display_name, val\n                        )));\n                    }\n                    Err(_) => {\n                        return Err(CrawlerError::Config(format!(\n                            \"Option {} ({}) must be positive integer\",\n                            display_name, val\n                        )));\n                    }\n                    _ => {}\n                }\n            }\n            OptionType::Float => {\n                if val.parse::<f64>().is_err() {\n                    return Err(CrawlerError::Config(format!(\n                        \"Option {} ({}) must be float\",\n                        display_name, val\n                    )));\n                }\n            }\n            OptionType::Bool => {\n                if ![\"1\", \"0\", \"yes\", \"no\", \"true\", \"false\"].contains(&val) {\n                    return Err(CrawlerError::Config(format!(\n                        \"Option {} ({}) must be boolean (1/0, yes/no, true/false)\",\n                        display_name, val\n                    )));\n                }\n            }\n            OptionType::String => {\n                // Strings are always valid\n            }\n            OptionType::SizeMG => {\n                let re = Regex::new(r\"^\\d+(\\.\\d+)?[MG]$\").map_err(|e| CrawlerError::Config(e.to_string()))?;\n                if !re.is_match(val) {\n                    return Err(CrawlerError::Config(format!(\n                        \"Option {} ({}) must be string with M/G suffix (for example 512M or 1.5G)\",\n                        display_name, val\n                    )));\n                }\n            }\n            OptionType::Regex => {\n                if fancy_regex::Regex::new(val).is_err() {\n                    return Err(CrawlerError::Config(format!(\n                        \"Option {} ({}) must be valid PCRE regular expression\",\n                        display_name, val\n                    )));\n                }\n            }\n            OptionType::Url => {\n                let corrected = correct_url(val);\n                if corrected.is_empty() {\n                    return Err(CrawlerError::Config(format!(\n                        \"Option {} must be valid URL (starting with http:// or https://)\",\n                        display_name\n                    )));\n                }\n                if url::Url::parse(&corrected).is_err() {\n                    // Try with URL-encoded version for international characters\n                    let encoded: String = corrected\n                        .chars()\n                        .map(|c| {\n                            if c.is_ascii_graphic() || c == ' ' {\n                                c.to_string()\n                            } else {\n                                percent_encoding::utf8_percent_encode(\n                                    &c.to_string(),\n                                    percent_encoding::NON_ALPHANUMERIC,\n                                )\n                                .to_string()\n                            }\n                        })\n                        .collect();\n                    if url::Url::parse(&encoded).is_err() {\n                        return Err(CrawlerError::Config(format!(\n                            \"Option {} ({}) must be valid URL\",\n                            display_name, val\n                        )));\n                    }\n                }\n            }\n            OptionType::Email => {\n                // Simple email validation\n                if !val.contains('@') || !val.contains('.') {\n                    return Err(CrawlerError::Config(format!(\n                        \"Option {} ({}) must be valid email '{}'\",\n                        display_name, val, val\n                    )));\n                }\n            }\n            OptionType::File => {\n                // File path validation - just ensure it's a non-empty string.\n                // Writability is checked at export time.\n            }\n            OptionType::Dir => {\n                if val == \"off\" || val.is_empty() {\n                    return Ok(());\n                }\n                let mut path = val.to_string();\n                replace_placeholders(&mut path);\n                let abs_path = utils::get_absolute_path(&path);\n                if abs_path.trim().is_empty() {\n                    return Err(CrawlerError::Config(format!(\n                        \"Option {} ({}) must be string\",\n                        display_name, val\n                    )));\n                }\n                let dir_path = std::path::Path::new(&abs_path);\n                if !dir_path.exists() && std::fs::create_dir_all(dir_path).is_err() {\n                    return Err(CrawlerError::Config(format!(\n                        \"Option {} ({}) must be valid and writable directory. Check permissions.\",\n                        display_name, abs_path\n                    )));\n                }\n            }\n            OptionType::HostAndPort => {\n                let re = Regex::new(r\"^[a-zA-Z0-9\\-.:]{1,100}:[0-9]{1,5}$\")\n                    .map_err(|e| CrawlerError::Config(e.to_string()))?;\n                if !re.is_match(val) {\n                    return Err(CrawlerError::Config(format!(\n                        \"Option {} ({}) must be in format host:port\",\n                        display_name, val\n                    )));\n                }\n            }\n            OptionType::ReplaceContent => {\n                let re = Regex::new(r\"^.+->\").map_err(|e| CrawlerError::Config(e.to_string()))?;\n                if !re.is_match(val) {\n                    return Err(CrawlerError::Config(format!(\n                        \"Option {} ({}) must be in format `foo -> bar` or `/preg-regexp/ -> bar`)\",\n                        display_name, val\n                    )));\n                }\n\n                let parts: Vec<&str> = val.splitn(2, \"->\").collect();\n                let replace_from = parts[0].trim();\n                let is_regex = crate::utils::is_regex_pattern(replace_from);\n\n                if is_regex && Regex::new(replace_from).is_err() {\n                    return Err(CrawlerError::Config(format!(\n                        \"Option {} and its first part ({}) must be valid PCRE regular expression\",\n                        display_name, replace_from\n                    )));\n                }\n            }\n            OptionType::Resolve => {\n                // --resolve is in the same format as curl --resolve (ipv4 and ipv6 supported)\n                let re = Regex::new(r\"^[a-zA-Z0-9\\-.]{1,200}:[0-9]{1,5}:[a-fA-F0-9\\-.:]{1,100}$\")\n                    .map_err(|e| CrawlerError::Config(e.to_string()))?;\n                if !re.is_match(val) {\n                    return Err(CrawlerError::Config(format!(\n                        \"Option {} ({}) must be in format `domain:port:ip`\",\n                        display_name, val\n                    )));\n                }\n            }\n        }\n\n        // Extra validations for numeric range\n        if (self.option_type == OptionType::Int || self.option_type == OptionType::Float)\n            && self.extras.as_ref().map(|e| e.len()) == Some(2)\n            && let Ok(num) = val.parse::<f64>()\n        {\n            let extras = self.extras.as_ref().map(|e| {\n                let min = e[0].parse::<f64>().unwrap_or(f64::MIN);\n                let max = e[1].parse::<f64>().unwrap_or(f64::MAX);\n                (min, max)\n            });\n            if let Some((min, max)) = extras\n                && (num < min || num > max)\n            {\n                return Err(CrawlerError::Config(format!(\n                    \"Option {} ({}) must be in range {}-{}\",\n                    display_name, val, min, max\n                )));\n            }\n        }\n\n        Ok(())\n    }\n\n    fn correct_value_type(&self, value: Option<&str>) -> Result<OptionValue, CrawlerError> {\n        if self.is_nullable && (value.is_none() || value == Some(\"\")) {\n            return Ok(OptionValue::None);\n        }\n\n        let val = match value {\n            Some(v) => v,\n            None => return Ok(OptionValue::None),\n        };\n\n        match self.option_type {\n            OptionType::Int => {\n                let n = val\n                    .parse::<i64>()\n                    .map_err(|_| CrawlerError::Config(format!(\"Cannot parse '{}' as integer\", val)))?;\n                Ok(OptionValue::Int(n))\n            }\n            OptionType::Float => {\n                let n = val\n                    .parse::<f64>()\n                    .map_err(|_| CrawlerError::Config(format!(\"Cannot parse '{}' as float\", val)))?;\n                Ok(OptionValue::Float(n))\n            }\n            OptionType::Bool => {\n                let b = [\"1\", \"yes\", \"true\"].contains(&val);\n                Ok(OptionValue::Bool(b))\n            }\n            OptionType::String\n            | OptionType::SizeMG\n            | OptionType::Regex\n            | OptionType::Email\n            | OptionType::HostAndPort\n            | OptionType::ReplaceContent\n            | OptionType::Resolve => Ok(OptionValue::Str(val.to_string())),\n            OptionType::Url => {\n                let corrected = correct_url(val);\n                Ok(OptionValue::Str(corrected))\n            }\n            OptionType::File => {\n                let mut path = val.to_string();\n                replace_placeholders(&mut path);\n                Ok(OptionValue::Str(utils::get_absolute_path(&path)))\n            }\n            OptionType::Dir => {\n                if val == \"off\" || val.is_empty() {\n                    return Ok(OptionValue::Str(val.to_string()));\n                }\n                let mut path = val.to_string();\n                replace_placeholders(&mut path);\n                Ok(OptionValue::Str(utils::get_absolute_path(&path)))\n            }\n        }\n    }\n\n    pub fn set_extras_domain(domain: Option<&str>) {\n        if let Ok(mut d) = EXTRAS_DOMAIN.lock() {\n            *d = domain.map(|s| s.to_string());\n        }\n    }\n}\n\n/// Correct URL to valid URL, e.g. crawler.siteone.io => https://crawler.siteone.io,\n/// or localhost to http://localhost\nfn correct_url(url: &str) -> String {\n    if !url.starts_with(\"http\") {\n        let re = Regex::new(r\"^[a-zA-Z0-9\\-.:]{1,100}$\").ok();\n        if re.map(|r| r.is_match(url)).unwrap_or(false) {\n            let default_protocol = if url.contains('.') { \"https\" } else { \"http\" };\n            return format!(\"{}://{}\", default_protocol, url.trim_start_matches('/'));\n        }\n    }\n    url.to_string()\n}\n\n/// Remove quotes from given string - as a quote we consider chars \" ' `\nfn unquote_value(value: &mut String) {\n    let bytes = value.as_bytes();\n    if bytes.len() >= 2 {\n        let first = bytes[0];\n        let last = bytes[bytes.len() - 1];\n        if (first == b'\"' && last == b'\"') || (first == b'\\'' && last == b'\\'') || (first == b'`' && last == b'`') {\n            *value = value[1..value.len() - 1].to_string();\n        }\n    }\n}\n\n/// Replace placeholders like %domain%, %date%, %datetime% in file/dir paths\nfn replace_placeholders(value: &mut String) {\n    let domain = EXTRAS_DOMAIN.lock().ok().and_then(|d| d.clone()).unwrap_or_default();\n\n    let now = chrono::Local::now();\n    let date = now.format(\"%Y-%m-%d\").to_string();\n    let datetime = now.format(\"%Y%m%d-%H%M%S\").to_string();\n\n    *value = value\n        .replace(\"%domain%\", &domain)\n        .replace(\"%date%\", &date)\n        .replace(\"%datetime%\", &datetime);\n}\n"
  },
  {
    "path": "src/options/option_type.rs",
    "content": "// SiteOne Crawler - Option type definitions\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n//\r\n\r\nuse std::fmt;\r\n\r\n#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]\r\npub enum OptionType {\r\n    Int,\r\n    Float,\r\n    Bool,\r\n    String,\r\n    SizeMG,\r\n    Email,\r\n    Url,\r\n    Regex,\r\n    File,\r\n    Dir,\r\n    HostAndPort,\r\n    ReplaceContent,\r\n    Resolve,\r\n}\r\n\r\nimpl fmt::Display for OptionType {\r\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\r\n        match self {\r\n            OptionType::Int => write!(f, \"INT\"),\r\n            OptionType::Float => write!(f, \"FLOAT\"),\r\n            OptionType::Bool => write!(f, \"BOOL\"),\r\n            OptionType::String => write!(f, \"STRING\"),\r\n            OptionType::SizeMG => write!(f, \"SIZE_M_G\"),\r\n            OptionType::Email => write!(f, \"EMAIL\"),\r\n            OptionType::Url => write!(f, \"URL\"),\r\n            OptionType::Regex => write!(f, \"REGEX\"),\r\n            OptionType::File => write!(f, \"FILE\"),\r\n            OptionType::Dir => write!(f, \"DIR\"),\r\n            OptionType::HostAndPort => write!(f, \"HOST_AND_PORT\"),\r\n            OptionType::ReplaceContent => write!(f, \"REPLACE_CONTENT\"),\r\n            OptionType::Resolve => write!(f, \"RESOLVE\"),\r\n        }\r\n    }\r\n}\r\n"
  },
  {
    "path": "src/options/options.rs",
    "content": "// SiteOne Crawler - Options registry\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n//\r\n\r\nuse indexmap::IndexMap;\r\n\r\nuse super::group::OptionGroup;\r\n\r\n#[derive(Debug, Clone)]\r\npub struct Options {\r\n    groups: IndexMap<String, OptionGroup>,\r\n}\r\n\r\nimpl Options {\r\n    pub fn new() -> Self {\r\n        Self {\r\n            groups: IndexMap::new(),\r\n        }\r\n    }\r\n\r\n    pub fn add_group(&mut self, group: OptionGroup) {\r\n        self.groups.insert(group.apl_code.clone(), group);\r\n    }\r\n\r\n    pub fn get_groups(&self) -> &IndexMap<String, OptionGroup> {\r\n        &self.groups\r\n    }\r\n\r\n    pub fn get_groups_mut(&mut self) -> &mut IndexMap<String, OptionGroup> {\r\n        &mut self.groups\r\n    }\r\n\r\n    pub fn get_group(&self, apl_code: &str) -> Option<&OptionGroup> {\r\n        self.groups.get(apl_code)\r\n    }\r\n\r\n    pub fn get_group_mut(&mut self, apl_code: &str) -> Option<&mut OptionGroup> {\r\n        self.groups.get_mut(apl_code)\r\n    }\r\n\r\n    /// Check if a specific option was explicitly provided on the command line\r\n    /// (as opposed to using its default value). `property` is the camelCase property name.\r\n    pub fn is_explicitly_set(&self, property: &str) -> bool {\r\n        self.groups\r\n            .values()\r\n            .any(|g| g.options.get(property).is_some_and(|o| o.is_explicitly_set()))\r\n    }\r\n}\r\n\r\nimpl Default for Options {\r\n    fn default() -> Self {\r\n        Self::new()\r\n    }\r\n}\r\n"
  },
  {
    "path": "src/output/json_output.rs",
    "content": "// SiteOne Crawler - JsonOutput (JSON output)\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n\nuse std::collections::HashMap;\nuse std::io::Write;\n\nuse serde_json::{Value, json};\n\nuse crate::components::summary::summary::Summary;\nuse crate::components::super_table::SuperTable;\nuse crate::extra_column::ExtraColumn;\nuse crate::output::output::{BasicStats, CrawlerInfo, Output};\nuse crate::output::output_type::OutputType;\nuse crate::scoring::ci_gate::CiGateResult;\nuse crate::scoring::quality_score::QualityScores;\nuse crate::utils;\n\npub struct JsonOutput {\n    crawler_info: CrawlerInfo,\n    print_to_output: bool,\n\n    json: serde_json::Map<String, Value>,\n\n    /// Extra columns from options (user-specified)\n    extra_columns: Vec<ExtraColumn>,\n\n    /// Serialized options for JSON output\n    options_json: Option<Value>,\n\n    /// For progress display on stderr\n    hide_progress_bar: bool,\n    max_stderr_length: usize,\n}\n\nimpl JsonOutput {\n    pub fn new(\n        crawler_info: CrawlerInfo,\n        extra_columns: Vec<ExtraColumn>,\n        hide_progress_bar: bool,\n        print_to_output: bool,\n        options_json: Option<Value>,\n    ) -> Self {\n        Self {\n            crawler_info,\n            print_to_output,\n            json: serde_json::Map::new(),\n            extra_columns,\n            options_json,\n            hide_progress_bar,\n            max_stderr_length: 0,\n        }\n    }\n\n    pub fn get_json(&self) -> String {\n        let value = Value::Object(self.json.clone());\n        serde_json::to_string_pretty(&value)\n            .unwrap_or_else(|e| format!(\"{{\\\"error\\\": \\\"unable to serialize JSON: {}\\\"}}\", e))\n    }\n}\n\nimpl Output for JsonOutput {\n    fn add_banner(&mut self) {\n        self.json.insert(\n            \"crawler\".to_string(),\n            serde_json::to_value(&self.crawler_info).unwrap_or(Value::Null),\n        );\n    }\n\n    fn add_used_options(&mut self) {\n        if let Some(ref options) = self.options_json {\n            self.json.insert(\"options\".to_string(), options.clone());\n        }\n    }\n\n    fn set_extra_columns_from_analysis(&mut self, extra_columns: Vec<ExtraColumn>) {\n        let columns_json: Vec<Value> = extra_columns\n            .iter()\n            .map(|col| serde_json::to_value(col).unwrap_or(Value::Null))\n            .collect();\n        self.json\n            .insert(\"extraColumnsFromAnalysis\".to_string(), Value::Array(columns_json));\n    }\n\n    fn add_table_header(&mut self) {\n        self.json.insert(\"results\".to_string(), Value::Array(Vec::new()));\n    }\n\n    fn add_table_row(\n        &mut self,\n        response_headers: &HashMap<String, String>,\n        url: &str,\n        status: i32,\n        elapsed_time: f64,\n        size: i64,\n        content_type: i32,\n        extra_parsed_content: &HashMap<String, String>,\n        progress_status: &str,\n        cache_type_flags: i32,\n        cache_lifetime: Option<i32>,\n    ) {\n        let status_str = utils::get_http_client_code_with_error_description(status, false);\n\n        // extras: empty array [] when no extra columns, object {} when populated\n        let extras_value = if self.extra_columns.is_empty() {\n            Value::Array(Vec::new())\n        } else {\n            let mut extras = serde_json::Map::new();\n            for extra_column in &self.extra_columns {\n                let header_name = &extra_column.name;\n                let value = if let Some(v) = extra_parsed_content.get(header_name) {\n                    v.trim().to_string()\n                } else if let Some(v) = response_headers.get(&header_name.to_lowercase()) {\n                    v.trim().to_string()\n                } else {\n                    String::new()\n                };\n                extras.insert(header_name.clone(), Value::String(value));\n            }\n            Value::Object(extras)\n        };\n\n        let row = json!({\n            \"url\": url,\n            \"status\": status_str,\n            \"elapsedTime\": (elapsed_time * 1000.0).round() / 1000.0,\n            \"size\": size,\n            \"type\": content_type,\n            \"cacheTypeFlags\": cache_type_flags,\n            \"cacheLifetime\": cache_lifetime,\n            \"extras\": extras_value,\n        });\n\n        if let Some(Value::Array(results)) = self.json.get_mut(\"results\") {\n            results.push(row);\n        }\n\n        // Print progress to stderr in JSON mode\n        if !self.hide_progress_bar && self.print_to_output {\n            let parts: Vec<&str> = progress_status.splitn(2, '/').collect();\n            let done: usize = parts.first().and_then(|s| s.parse().ok()).unwrap_or(0);\n            let total: usize = parts.get(1).and_then(|s| s.parse().ok()).unwrap_or(1);\n\n            let console_width = utils::get_console_width();\n            let text_width_without_url: usize = 65;\n\n            let truncated_url = utils::truncate_in_two_thirds(\n                url,\n                console_width.saturating_sub(text_width_without_url),\n                \"\\u{2026}\",\n                None,\n            );\n\n            let progress_to_stderr = format!(\n                \"\\rProgress: {:<7} | {} {} | {}\",\n                progress_status,\n                utils::get_progress_bar(done, total, 25),\n                utils::get_formatted_duration(elapsed_time),\n                truncated_url,\n            );\n\n            self.max_stderr_length = self.max_stderr_length.max(progress_to_stderr.len());\n            let padded = format!(\"{:<width$}\", progress_to_stderr, width = self.max_stderr_length);\n\n            eprint!(\"{}\", padded);\n            let _ = std::io::stderr().flush();\n        }\n    }\n\n    fn add_super_table(&mut self, table: &SuperTable) {\n        if !self.json.contains_key(\"tables\") {\n            self.json\n                .insert(\"tables\".to_string(), Value::Object(serde_json::Map::new()));\n        }\n\n        if let Some(table_json) = table.get_json_output()\n            && let Some(Value::Object(tables)) = self.json.get_mut(\"tables\")\n        {\n            tables.insert(table.apl_code.clone(), table_json);\n        }\n    }\n\n    fn add_total_stats(&mut self, stats: &BasicStats) {\n        if self.print_to_output {\n            eprintln!(\"\\n\");\n        }\n\n        // Build countByStatus as string-keyed object (JSON requires string keys)\n        let count_by_status: serde_json::Map<String, Value> = stats\n            .count_by_status\n            .iter()\n            .map(|(k, v)| (k.to_string(), json!(*v)))\n            .collect();\n\n        let stats_json = json!({\n            \"totalUrls\": stats.total_urls,\n            \"totalSize\": stats.total_size,\n            \"totalSizeFormatted\": stats.total_size_formatted,\n            \"totalExecutionTime\": stats.total_execution_time,\n            \"totalRequestsTimes\": stats.total_requests_times,\n            \"totalRequestsTimesAvg\": stats.total_requests_times_avg,\n            \"totalRequestsTimesMin\": stats.total_requests_times_min,\n            \"totalRequestsTimesMax\": stats.total_requests_times_max,\n            \"countByStatus\": count_by_status,\n        });\n        self.json.insert(\"stats\".to_string(), stats_json);\n    }\n\n    fn add_notice(&mut self, text: &str) {\n        if !self.json.contains_key(\"notice\") {\n            self.json.insert(\"notice\".to_string(), Value::Array(Vec::new()));\n        }\n\n        let now = chrono::Local::now();\n        let timestamped = format!(\"{} | {}\", now.format(\"%Y-%m-%d %H:%M:%S\"), text);\n\n        if let Some(Value::Array(notices)) = self.json.get_mut(\"notice\") {\n            notices.push(Value::String(timestamped));\n        }\n    }\n\n    fn add_error(&mut self, text: &str) {\n        if !self.json.contains_key(\"error\") {\n            self.json.insert(\"error\".to_string(), Value::Array(Vec::new()));\n        }\n\n        let now = chrono::Local::now();\n        let timestamped = format!(\"{} | {}\", now.format(\"%Y-%m-%d %H:%M:%S\"), text);\n\n        if let Some(Value::Array(errors)) = self.json.get_mut(\"error\") {\n            errors.push(Value::String(timestamped));\n        }\n    }\n\n    fn add_quality_scores(&mut self, scores: &QualityScores) {\n        if let Ok(value) = serde_json::to_value(scores) {\n            self.json.insert(\"qualityScores\".to_string(), value);\n        }\n    }\n\n    fn add_ci_gate_result(&mut self, result: &CiGateResult) {\n        if let Ok(value) = serde_json::to_value(result) {\n            self.json.insert(\"ciGate\".to_string(), value);\n        }\n    }\n\n    fn add_summary(&mut self, summary: &mut Summary) {\n        if let Ok(summary_value) = serde_json::to_value(summary) {\n            self.json.insert(\"summary\".to_string(), summary_value);\n        }\n    }\n\n    fn set_export_file_paths(\n        &mut self,\n        offline_paths: Option<&HashMap<String, String>>,\n        markdown_paths: Option<&HashMap<String, String>>,\n    ) {\n        if let Some(Value::Array(results)) = self.json.get_mut(\"results\") {\n            for result in results.iter_mut() {\n                if let Some(url) = result.get(\"url\").and_then(|v| v.as_str()) {\n                    let url_owned = url.to_string();\n                    if let Some(paths) = offline_paths\n                        && let Some(path) = paths.get(&url_owned)\n                        && let Some(obj) = result.as_object_mut()\n                    {\n                        obj.insert(\"offlineFilePath\".to_string(), Value::String(path.clone()));\n                    }\n                    if let Some(paths) = markdown_paths\n                        && let Some(path) = paths.get(&url_owned)\n                        && let Some(obj) = result.as_object_mut()\n                    {\n                        obj.insert(\"markdownFilePath\".to_string(), Value::String(path.clone()));\n                    }\n                }\n            }\n        }\n    }\n\n    fn get_type(&self) -> OutputType {\n        OutputType::Json\n    }\n\n    fn end(&mut self) {\n        if !self.print_to_output {\n            return;\n        }\n\n        let json = self.get_json();\n        println!(\"{}\", json);\n    }\n\n    fn get_json_content(&self) -> Option<String> {\n        Some(self.get_json())\n    }\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n    use crate::scoring::ci_gate::{CiCheck, CiGateResult};\n    use crate::scoring::quality_score::{CategoryScore, QualityScores};\n\n    fn make_json_output() -> JsonOutput {\n        JsonOutput::new(CrawlerInfo::default(), vec![], true, false, None)\n    }\n\n    fn make_pass_result() -> CiGateResult {\n        CiGateResult {\n            passed: true,\n            exit_code: 0,\n            checks: vec![],\n        }\n    }\n\n    fn make_fail_result() -> CiGateResult {\n        CiGateResult {\n            passed: false,\n            exit_code: 10,\n            checks: vec![\n                CiCheck {\n                    metric: \"Overall score\".into(),\n                    operator: \">=\".into(),\n                    threshold: 5.0,\n                    actual: 3.0,\n                    passed: false,\n                },\n                CiCheck {\n                    metric: \"404 errors\".into(),\n                    operator: \"<=\".into(),\n                    threshold: 0.0,\n                    actual: 2.0,\n                    passed: false,\n                },\n                CiCheck {\n                    metric: \"5xx errors\".into(),\n                    operator: \"<=\".into(),\n                    threshold: 0.0,\n                    actual: 0.0,\n                    passed: true,\n                },\n            ],\n        }\n    }\n\n    fn parse_json(output: &JsonOutput) -> serde_json::Value {\n        serde_json::from_str(&output.get_json()).unwrap()\n    }\n\n    #[test]\n    fn ci_gate_present_when_added() {\n        let mut output = make_json_output();\n        output.add_ci_gate_result(&make_pass_result());\n        let json = parse_json(&output);\n        assert!(json.get(\"ciGate\").is_some());\n    }\n\n    #[test]\n    fn ci_gate_absent_when_not_added() {\n        let output = make_json_output();\n        let json = parse_json(&output);\n        assert!(json.get(\"ciGate\").is_none());\n    }\n\n    #[test]\n    fn ci_gate_passed_true() {\n        let mut output = make_json_output();\n        output.add_ci_gate_result(&make_pass_result());\n        let json = parse_json(&output);\n        let ci_gate = json.get(\"ciGate\").unwrap();\n        assert_eq!(ci_gate.get(\"passed\").unwrap().as_bool().unwrap(), true);\n        assert_eq!(ci_gate.get(\"exitCode\").unwrap().as_i64().unwrap(), 0);\n    }\n\n    #[test]\n    fn ci_gate_passed_false() {\n        let mut output = make_json_output();\n        output.add_ci_gate_result(&make_fail_result());\n        let json = parse_json(&output);\n        let ci_gate = json.get(\"ciGate\").unwrap();\n        assert_eq!(ci_gate.get(\"passed\").unwrap().as_bool().unwrap(), false);\n        assert_eq!(ci_gate.get(\"exitCode\").unwrap().as_i64().unwrap(), 10);\n    }\n\n    #[test]\n    fn ci_gate_checks_array() {\n        let mut output = make_json_output();\n        output.add_ci_gate_result(&make_fail_result());\n        let json = parse_json(&output);\n        let checks = json[\"ciGate\"][\"checks\"].as_array().unwrap();\n        assert_eq!(checks.len(), 3);\n    }\n\n    #[test]\n    fn quality_scores_in_json() {\n        let mut output = make_json_output();\n        let scores = QualityScores {\n            overall: CategoryScore {\n                name: \"Overall\".into(),\n                code: \"overall\".into(),\n                score: 8.5,\n                label: \"Good\".into(),\n                weight: 1.0,\n                deductions: vec![],\n            },\n            categories: vec![],\n        };\n        output.add_quality_scores(&scores);\n        let json = parse_json(&output);\n        assert!(json.get(\"qualityScores\").is_some());\n    }\n\n    fn add_sample_rows(output: &mut JsonOutput) {\n        output.add_table_header();\n        let headers = HashMap::new();\n        let extras = HashMap::new();\n        output.add_table_row(\n            &headers,\n            \"https://example.com/\",\n            200,\n            0.1,\n            5000,\n            1,\n            &extras,\n            \"1/3\",\n            0,\n            None,\n        );\n        output.add_table_row(\n            &headers,\n            \"https://example.com/about\",\n            200,\n            0.2,\n            3000,\n            1,\n            &extras,\n            \"2/3\",\n            0,\n            None,\n        );\n        output.add_table_row(\n            &headers,\n            \"https://example.com/missing\",\n            404,\n            0.05,\n            1000,\n            1,\n            &extras,\n            \"3/3\",\n            0,\n            None,\n        );\n    }\n\n    #[test]\n    fn export_file_paths_offline_only() {\n        let mut output = make_json_output();\n        add_sample_rows(&mut output);\n\n        let mut offline = HashMap::new();\n        offline.insert(\"https://example.com/\".to_string(), \"index.html\".to_string());\n        offline.insert(\"https://example.com/about\".to_string(), \"about.html\".to_string());\n\n        output.set_export_file_paths(Some(&offline), None);\n\n        let json = parse_json(&output);\n        let results = json[\"results\"].as_array().unwrap();\n        assert_eq!(results[0][\"offlineFilePath\"], \"index.html\");\n        assert_eq!(results[1][\"offlineFilePath\"], \"about.html\");\n        assert!(results[2].get(\"offlineFilePath\").is_none());\n        // No markdown paths\n        assert!(results[0].get(\"markdownFilePath\").is_none());\n    }\n\n    #[test]\n    fn export_file_paths_both() {\n        let mut output = make_json_output();\n        add_sample_rows(&mut output);\n\n        let mut offline = HashMap::new();\n        offline.insert(\"https://example.com/\".to_string(), \"index.html\".to_string());\n\n        let mut markdown = HashMap::new();\n        markdown.insert(\"https://example.com/\".to_string(), \"index.md\".to_string());\n        markdown.insert(\"https://example.com/about\".to_string(), \"about.md\".to_string());\n\n        output.set_export_file_paths(Some(&offline), Some(&markdown));\n\n        let json = parse_json(&output);\n        let results = json[\"results\"].as_array().unwrap();\n        assert_eq!(results[0][\"offlineFilePath\"], \"index.html\");\n        assert_eq!(results[0][\"markdownFilePath\"], \"index.md\");\n        assert!(results[1].get(\"offlineFilePath\").is_none());\n        assert_eq!(results[1][\"markdownFilePath\"], \"about.md\");\n        // 404 page has neither\n        assert!(results[2].get(\"offlineFilePath\").is_none());\n        assert!(results[2].get(\"markdownFilePath\").is_none());\n    }\n\n    #[test]\n    fn export_file_paths_none_changes_nothing() {\n        let mut output = make_json_output();\n        add_sample_rows(&mut output);\n\n        output.set_export_file_paths(None, None);\n\n        let json = parse_json(&output);\n        let results = json[\"results\"].as_array().unwrap();\n        assert!(results[0].get(\"offlineFilePath\").is_none());\n        assert!(results[0].get(\"markdownFilePath\").is_none());\n    }\n}\n"
  },
  {
    "path": "src/output/mod.rs",
    "content": "pub mod json_output;\npub mod multi_output;\n#[allow(clippy::module_inception)]\npub mod output;\npub mod output_type;\npub mod text_output;\n"
  },
  {
    "path": "src/output/multi_output.rs",
    "content": "// SiteOne Crawler - MultiOutput (delegates to multiple outputs)\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n\nuse std::collections::HashMap;\n\nuse crate::components::summary::summary::Summary;\nuse crate::components::super_table::SuperTable;\nuse crate::extra_column::ExtraColumn;\nuse crate::output::output::{BasicStats, Output};\nuse crate::output::output_type::OutputType;\nuse crate::scoring::ci_gate::CiGateResult;\nuse crate::scoring::quality_score::QualityScores;\n\n#[derive(Default)]\npub struct MultiOutput {\n    outputs: Vec<Box<dyn Output>>,\n}\n\nimpl MultiOutput {\n    pub fn new() -> Self {\n        Self::default()\n    }\n\n    pub fn add_output(&mut self, output: Box<dyn Output>) {\n        self.outputs.push(output);\n    }\n\n    pub fn get_outputs(&self) -> &[Box<dyn Output>] {\n        &self.outputs\n    }\n\n    pub fn get_outputs_mut(&mut self) -> &mut [Box<dyn Output>] {\n        &mut self.outputs\n    }\n\n    pub fn get_output_by_type(&self, output_type: OutputType) -> Option<&dyn Output> {\n        self.outputs\n            .iter()\n            .find(|o| o.get_type() == output_type)\n            .map(|o| o.as_ref())\n    }\n\n    pub fn get_output_by_type_mut(&mut self, output_type: OutputType) -> Option<&mut Box<dyn Output>> {\n        self.outputs.iter_mut().find(|o| o.get_type() == output_type)\n    }\n}\n\nimpl Output for MultiOutput {\n    fn add_banner(&mut self) {\n        for output in &mut self.outputs {\n            output.add_banner();\n        }\n    }\n\n    fn add_used_options(&mut self) {\n        for output in &mut self.outputs {\n            output.add_used_options();\n        }\n    }\n\n    fn set_extra_columns_from_analysis(&mut self, extra_columns: Vec<ExtraColumn>) {\n        for output in &mut self.outputs {\n            output.set_extra_columns_from_analysis(extra_columns.clone());\n        }\n    }\n\n    fn add_table_header(&mut self) {\n        for output in &mut self.outputs {\n            output.add_table_header();\n        }\n    }\n\n    fn add_table_row(\n        &mut self,\n        response_headers: &HashMap<String, String>,\n        url: &str,\n        status: i32,\n        elapsed_time: f64,\n        size: i64,\n        content_type: i32,\n        extra_parsed_content: &HashMap<String, String>,\n        progress_status: &str,\n        cache_type_flags: i32,\n        cache_lifetime: Option<i32>,\n    ) {\n        for output in &mut self.outputs {\n            output.add_table_row(\n                response_headers,\n                url,\n                status,\n                elapsed_time,\n                size,\n                content_type,\n                extra_parsed_content,\n                progress_status,\n                cache_type_flags,\n                cache_lifetime,\n            );\n        }\n    }\n\n    fn add_super_table(&mut self, table: &SuperTable) {\n        for output in &mut self.outputs {\n            output.add_super_table(table);\n        }\n    }\n\n    fn add_total_stats(&mut self, stats: &BasicStats) {\n        for output in &mut self.outputs {\n            output.add_total_stats(stats);\n        }\n    }\n\n    fn add_notice(&mut self, text: &str) {\n        for output in &mut self.outputs {\n            output.add_notice(text);\n        }\n    }\n\n    fn add_error(&mut self, text: &str) {\n        for output in &mut self.outputs {\n            output.add_error(text);\n        }\n    }\n\n    fn add_quality_scores(&mut self, scores: &QualityScores) {\n        for output in &mut self.outputs {\n            output.add_quality_scores(scores);\n        }\n    }\n\n    fn add_ci_gate_result(&mut self, result: &CiGateResult) {\n        for output in &mut self.outputs {\n            output.add_ci_gate_result(result);\n        }\n    }\n\n    fn add_summary(&mut self, summary: &mut Summary) {\n        for output in &mut self.outputs {\n            output.add_summary(summary);\n        }\n    }\n\n    fn set_export_file_paths(\n        &mut self,\n        offline_paths: Option<&HashMap<String, String>>,\n        markdown_paths: Option<&HashMap<String, String>>,\n    ) {\n        for output in &mut self.outputs {\n            output.set_export_file_paths(offline_paths, markdown_paths);\n        }\n    }\n\n    fn get_type(&self) -> OutputType {\n        OutputType::Multi\n    }\n\n    fn end(&mut self) {\n        for output in &mut self.outputs {\n            output.end();\n        }\n    }\n\n    fn get_output_text(&self) -> Option<String> {\n        for output in &self.outputs {\n            if let Some(text) = output.get_output_text() {\n                return Some(text);\n            }\n        }\n        None\n    }\n\n    fn get_json_content(&self) -> Option<String> {\n        for output in &self.outputs {\n            if let Some(json) = output.get_json_content() {\n                return Some(json);\n            }\n        }\n        None\n    }\n}\n"
  },
  {
    "path": "src/output/output.rs",
    "content": "// SiteOne Crawler - Output trait\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\nuse std::collections::{BTreeMap, HashMap};\r\n\r\nuse crate::components::summary::summary::Summary;\r\nuse crate::components::super_table::SuperTable;\r\nuse crate::extra_column::ExtraColumn;\r\nuse crate::output::output_type::OutputType;\r\nuse crate::scoring::ci_gate::CiGateResult;\r\nuse crate::scoring::quality_score::QualityScores;\r\n\r\n/// Trait for crawler output implementations (text console, JSON, multi-output).\r\n///\r\n/// All implementations must be Send + Sync for use in async contexts.\r\npub trait Output: Send + Sync {\r\n    /// Print the banner (ASCII art for text, crawler info for JSON).\r\n    fn add_banner(&mut self);\r\n\r\n    /// Print the used crawler options.\r\n    fn add_used_options(&mut self);\r\n\r\n    /// Set extra columns from analysis that will be added to the URL table.\r\n    fn set_extra_columns_from_analysis(&mut self, extra_columns: Vec<ExtraColumn>);\r\n\r\n    /// Print the URL table header row.\r\n    fn add_table_header(&mut self);\r\n\r\n    /// Print a single URL table row with crawl result data.\r\n    ///\r\n    /// # Arguments\r\n    /// * `response_headers` - flat response headers (lowercase key -> value)\r\n    /// * `url` - the visited URL\r\n    /// * `status` - HTTP status code (negative for errors)\r\n    /// * `elapsed_time` - request duration in seconds\r\n    /// * `size` - response body size in bytes\r\n    /// * `content_type` - content type ID (see ContentTypeId)\r\n    /// * `extra_parsed_content` - extra column values extracted from the response\r\n    /// * `progress_status` - progress string like \"45/100\"\r\n    /// * `cache_type_flags` - bitwise cache type flags\r\n    /// * `cache_lifetime` - cache lifetime in seconds, if known\r\n    #[allow(clippy::too_many_arguments)]\r\n    fn add_table_row(\r\n        &mut self,\r\n        response_headers: &HashMap<String, String>,\r\n        url: &str,\r\n        status: i32,\r\n        elapsed_time: f64,\r\n        size: i64,\r\n        content_type: i32,\r\n        extra_parsed_content: &HashMap<String, String>,\r\n        progress_status: &str,\r\n        cache_type_flags: i32,\r\n        cache_lifetime: Option<i32>,\r\n    );\r\n\r\n    /// Add a SuperTable to the output.\r\n    fn add_super_table(&mut self, table: &SuperTable);\r\n\r\n    /// Add total crawl statistics.\r\n    ///\r\n    /// # Arguments\r\n    /// * `stats` - basic crawl statistics\r\n    fn add_total_stats(&mut self, stats: &BasicStats);\r\n\r\n    /// Add a notice/informational message.\r\n    fn add_notice(&mut self, text: &str);\r\n\r\n    /// Add an error message.\r\n    fn add_error(&mut self, text: &str);\r\n\r\n    /// Add quality scores before the summary.\r\n    fn add_quality_scores(&mut self, _scores: &QualityScores) {}\r\n\r\n    /// Add CI/CD quality gate result after quality scores.\r\n    fn add_ci_gate_result(&mut self, _result: &CiGateResult) {}\r\n\r\n    /// Add the final summary with status items.\r\n    fn add_summary(&mut self, summary: &mut Summary);\r\n\r\n    /// Get the output type enum variant.\r\n    fn get_type(&self) -> OutputType;\r\n\r\n    /// Finalize and flush the output.\r\n    fn end(&mut self);\r\n\r\n    /// Get the accumulated text output content (for file export).\r\n    /// Only TextOutput implements this meaningfully.\r\n    fn get_output_text(&self) -> Option<String> {\r\n        None\r\n    }\r\n\r\n    /// Get the accumulated JSON output content (for file export).\r\n    /// Only JsonOutput implements this meaningfully.\r\n    fn get_json_content(&self) -> Option<String> {\r\n        None\r\n    }\r\n\r\n    /// Inject export file paths into results (for JSON output).\r\n    /// `offline_paths` maps URL -> relative offline file path.\r\n    /// `markdown_paths` maps URL -> relative markdown file path.\r\n    fn set_export_file_paths(\r\n        &mut self,\r\n        _offline_paths: Option<&HashMap<String, String>>,\r\n        _markdown_paths: Option<&HashMap<String, String>>,\r\n    ) {\r\n    }\r\n}\r\n\r\n/// Basic crawl statistics, used by add_total_stats().\r\n/// This is a simplified version; the full Status/BasicStats will be provided by the result module.\r\n#[derive(Debug, Clone, Default)]\r\npub struct BasicStats {\r\n    pub total_urls: usize,\r\n    pub total_size: i64,\r\n    pub total_size_formatted: String,\r\n    pub total_execution_time: f64,\r\n    pub total_requests_times: f64,\r\n    pub total_requests_times_avg: f64,\r\n    pub total_requests_times_min: f64,\r\n    pub total_requests_times_max: f64,\r\n    pub count_by_status: BTreeMap<i32, usize>,\r\n    pub count_by_content_type: BTreeMap<i32, usize>,\r\n}\r\n\r\n/// Crawler info for the JSON banner output.\r\n#[derive(Debug, Clone, Default, serde::Serialize)]\r\n#[serde(rename_all = \"camelCase\")]\r\npub struct CrawlerInfo {\r\n    pub name: String,\r\n    pub version: String,\r\n    pub executed_at: String,\r\n    pub command: String,\r\n    pub hostname: String,\r\n    pub final_user_agent: String,\r\n    // Used by TextOutput for the banner (not serialized to JSON)\r\n    #[serde(skip)]\r\n    pub url: String,\r\n    #[serde(skip)]\r\n    pub device: String,\r\n    #[serde(skip)]\r\n    pub workers: usize,\r\n}\r\n"
  },
  {
    "path": "src/output/output_type.rs",
    "content": "// SiteOne Crawler - OutputType enum\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\nuse serde::{Deserialize, Serialize};\r\nuse std::fmt;\r\n\r\nuse crate::error::CrawlerError;\r\n\r\n#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]\r\n#[serde(rename_all = \"lowercase\")]\r\npub enum OutputType {\r\n    Text,\r\n    Json,\r\n    Multi,\r\n}\r\n\r\nimpl OutputType {\r\n    pub fn from_text(text: &str) -> Result<Self, CrawlerError> {\r\n        match text.trim().to_lowercase().as_str() {\r\n            \"text\" => Ok(OutputType::Text),\r\n            \"json\" => Ok(OutputType::Json),\r\n            other => Err(CrawlerError::Parse(format!(\r\n                \"Unknown output type '{}'. Supported values are: {}\",\r\n                other,\r\n                Self::available_text_types().join(\", \")\r\n            ))),\r\n        }\r\n    }\r\n\r\n    pub fn available_text_types() -> Vec<&'static str> {\r\n        vec![\"text\", \"json\"]\r\n    }\r\n\r\n    pub fn as_str(&self) -> &'static str {\r\n        match self {\r\n            OutputType::Text => \"text\",\r\n            OutputType::Json => \"json\",\r\n            OutputType::Multi => \"multi\",\r\n        }\r\n    }\r\n}\r\n\r\nimpl fmt::Display for OutputType {\r\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\r\n        f.write_str(self.as_str())\r\n    }\r\n}\r\n"
  },
  {
    "path": "src/output/text_output.rs",
    "content": "// SiteOne Crawler - TextOutput (console output)\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n\nuse std::collections::HashMap;\nuse std::io::Write;\n\nuse crate::components::summary::summary::Summary;\nuse crate::components::super_table::SuperTable;\nuse crate::extra_column::ExtraColumn;\nuse crate::output::output::{BasicStats, CrawlerInfo, Output};\nuse crate::output::output_type::OutputType;\nuse crate::scoring::ci_gate::CiGateResult;\nuse crate::scoring::quality_score::QualityScores;\nuse crate::types::ContentTypeId;\nuse crate::utils;\n\npub struct TextOutput {\n    version: String,\n    print_to_output: bool,\n    extra_columns_from_analysis_width: usize,\n    extra_columns_width: usize,\n\n    terminal_width: usize,\n    compact_mode: bool,\n    progress_bar_width: usize,\n\n    /// Extra columns from analysis that will be added to the table\n    extra_columns_from_analysis: Vec<ExtraColumn>,\n\n    /// Extra columns from options (user-specified)\n    extra_columns: Vec<ExtraColumn>,\n\n    output_text: String,\n\n    origin_host: String,\n\n    // Options that control output behavior\n    hide_progress_bar: bool,\n    show_scheme_and_host: bool,\n    do_not_truncate_url: bool,\n    add_random_query_params: bool,\n    url_column_size: Option<usize>,\n    show_inline_criticals: bool,\n    show_inline_warnings: bool,\n    hide_columns: Vec<String>,\n    workers: usize,\n    memory_limit: String,\n    disable_animation: bool,\n\n    /// Cached computed URL column size\n    cached_url_column_size: Option<usize>,\n}\n\nimpl TextOutput {\n    #[allow(clippy::too_many_arguments)]\n    pub fn new(\n        crawler_info: CrawlerInfo,\n        extra_columns: Vec<ExtraColumn>,\n        hide_progress_bar: bool,\n        show_scheme_and_host: bool,\n        do_not_truncate_url: bool,\n        add_random_query_params: bool,\n        url_column_size: Option<usize>,\n        show_inline_criticals: bool,\n        show_inline_warnings: bool,\n        hide_columns: Vec<String>,\n        workers: usize,\n        memory_limit: String,\n        print_to_output: bool,\n        disable_animation: bool,\n    ) -> Self {\n        let terminal_width = utils::get_console_width().min(345);\n        let compact_mode = terminal_width < 140;\n\n        let mut extra_columns_width: usize = 0;\n        for extra_column in &extra_columns {\n            extra_columns_width += extra_column.get_length() + 3; // 3 = 2 spaces + 1 pipe\n        }\n\n        let progress_bar_width = if hide_progress_bar {\n            0\n        } else if compact_mode {\n            8\n        } else {\n            26\n        };\n\n        let origin_host = extract_host(&crawler_info.url);\n\n        Self {\n            version: crawler_info.version.clone(),\n            print_to_output,\n            extra_columns_from_analysis_width: 0,\n            extra_columns_width,\n            terminal_width,\n            compact_mode,\n            progress_bar_width,\n            extra_columns_from_analysis: Vec::new(),\n            extra_columns,\n            output_text: String::new(),\n            origin_host,\n            hide_progress_bar,\n            show_scheme_and_host,\n            do_not_truncate_url,\n            add_random_query_params,\n            url_column_size,\n            show_inline_criticals,\n            show_inline_warnings,\n            hide_columns,\n            workers,\n            memory_limit,\n            disable_animation,\n            cached_url_column_size: None,\n        }\n    }\n\n    fn is_column_hidden(&self, name: &str) -> bool {\n        self.hide_columns.iter().any(|c| c == name)\n    }\n\n    /// Width of hidden columns (to reclaim for URL column sizing).\n    fn hidden_columns_width(&self) -> usize {\n        let mut w = 0;\n        if self.is_column_hidden(\"type\") {\n            w += 11;\n        } // \"| Type     \"\n        if self.is_column_hidden(\"time\") {\n            w += 9;\n        } // \"| Time   \"\n        if self.is_column_hidden(\"size\") {\n            w += 9;\n        } // \"| Size   \"\n        if self.is_column_hidden(\"cache\") {\n            w += 9;\n        } // \"| Cache  \"\n        w\n    }\n\n    fn add_to_output(&mut self, output: &str) {\n        if self.print_to_output {\n            print!(\"{}\", output);\n            // Flush stdout to ensure immediate display\n            let _ = std::io::stdout().flush();\n        }\n        self.output_text.push_str(output);\n    }\n\n    pub fn get_output_text(&self) -> &str {\n        &self.output_text\n    }\n\n    fn get_url_column_size(&mut self) -> usize {\n        if let Some(cached) = self.cached_url_column_size {\n            return cached;\n        }\n\n        let size = if let Some(url_col_size) = self.url_column_size {\n            url_col_size.min(184)\n        } else {\n            let status_type_time_size_cache_width: usize = 49usize.saturating_sub(self.hidden_columns_width());\n            let free_reserve: usize = 5;\n\n            let url_column_size = self\n                .terminal_width\n                .saturating_sub(self.progress_bar_width)\n                .saturating_sub(status_type_time_size_cache_width)\n                .saturating_sub(self.extra_columns_width)\n                .saturating_sub(self.extra_columns_from_analysis_width)\n                .saturating_sub(free_reserve);\n\n            url_column_size.clamp(20, 184)\n        };\n\n        self.cached_url_column_size = Some(size);\n        size\n    }\n\n    /// Generate polynomial delays for banner animation.\n    fn get_polynomial_delays(total_time: f64, iterations: usize, power: u32) -> Vec<f64> {\n        let mut delays = Vec::with_capacity(iterations);\n        let mut total_poly_sum: f64 = 0.0;\n\n        for i in 1..=iterations {\n            total_poly_sum += (i as f64).powi(power as i32);\n        }\n\n        for i in 1..=iterations {\n            delays.push(((i as f64).powi(power as i32) / total_poly_sum) * total_time);\n        }\n\n        delays\n    }\n}\n\nimpl Output for TextOutput {\n    fn add_banner(&mut self) {\n        // ASCII art banner - generated by https://www.asciiart.eu/image-to-ascii :-)\n        let mut banner = String::from(\"\\n\");\n        banner.push_str(\" ####                ####             #####        \\n\");\n        banner.push_str(\" ####                ####           #######        \\n\");\n        banner.push_str(\" ####      ###       ####         #########        \\n\");\n        banner.push_str(\" ####     ######     ####       ###### ####        \\n\");\n        banner.push_str(\"  ######################       #####   ####        \\n\");\n        banner.push_str(\"    #######    #######       #####     ####        \\n\");\n        banner.push_str(\"    #######    #######         #       ####        \\n\");\n        banner.push_str(\"  ######################               ####        \\n\");\n        banner.push_str(\" ####     ######     ####              ####        \\n\");\n        banner.push_str(\" ####       ##       ####              ####        \\n\");\n        banner.push_str(\" ####                ####       ################## \\n\");\n        banner.push_str(\" ####                ####       ################## \\n\");\n        banner.push('\\n');\n        banner.push_str(&\"=\".repeat(50));\n        banner.push('\\n');\n\n        let texts = [\n            format!(\"SiteOne Crawler, v{}\", self.version),\n            \"Author: jan.reges@siteone.cz\".to_string(),\n        ];\n\n        for text in &texts {\n            banner.push_str(&format!(\"# {:<46} #\\n\", text));\n        }\n        banner.push_str(&\"=\".repeat(50));\n\n        // Loading the rocket on the ramp and show banner with fancy polynomial delays\n        let lines: Vec<&str> = banner.split('\\n').collect();\n        if self.disable_animation {\n            for line in &lines {\n                self.add_to_output(&format!(\"{}\\n\", utils::get_color_text(line, \"yellow\", false)));\n            }\n            self.add_to_output(\"\\n\\n\");\n        } else {\n            let delays = Self::get_polynomial_delays(1.2, lines.len(), 2);\n            for (counter, line) in lines.iter().enumerate() {\n                self.add_to_output(&format!(\"{}\\n\", utils::get_color_text(line, \"yellow\", false)));\n\n                // Add delay between lines\n                if counter < delays.len() {\n                    let usleep_time = std::time::Duration::from_micros((delays[counter] * 1_000_000.0) as u64);\n                    std::thread::sleep(usleep_time);\n                }\n            }\n\n            // The rocket takes off smoothly :)\n            std::thread::sleep(std::time::Duration::from_millis(300));\n            self.add_to_output(\"\\n\");\n            std::thread::sleep(std::time::Duration::from_millis(150));\n            self.add_to_output(\"\\n\");\n        }\n\n        if self.compact_mode {\n            self.add_to_output(&utils::get_color_text(\n                &format!(\n                    \"Detected terminal width {} < 140 chars - compact mode activated.\\n\\n\",\n                    self.terminal_width\n                ),\n                \"yellow\",\n                false,\n            ));\n        }\n    }\n\n    fn add_used_options(&mut self) {\n        // Intentionally left empty\n    }\n\n    fn set_extra_columns_from_analysis(&mut self, extra_columns: Vec<ExtraColumn>) {\n        self.extra_columns_from_analysis_width = 0;\n        for extra_column in &extra_columns {\n            self.extra_columns_from_analysis_width += extra_column.get_length() + 3;\n            // 3 = 2 spaces + 1 pipe\n        }\n        self.extra_columns_from_analysis = extra_columns;\n        // Reset cached URL column size since widths changed\n        self.cached_url_column_size = None;\n    }\n\n    fn add_table_header(&mut self) {\n        let url_col_size = self.get_url_column_size();\n        let mut header = format!(\"{:<width$} | Status\", \"URL\", width = url_col_size);\n        if !self.is_column_hidden(\"type\") {\n            header.push_str(\" | Type    \");\n        }\n        if !self.is_column_hidden(\"time\") {\n            header.push_str(\" | Time  \");\n        }\n        if !self.is_column_hidden(\"size\") {\n            header.push_str(\" | Size  \");\n        }\n        if !self.is_column_hidden(\"cache\") {\n            header.push_str(\" | Cache \");\n        }\n\n        if !self.hide_progress_bar {\n            let progress_label = if self.compact_mode {\n                \"Progress\"\n            } else {\n                \"Progress report\"\n            };\n            header = format!(\n                \"{:<width$}| {}\",\n                progress_label,\n                header,\n                width = self.progress_bar_width\n            );\n        }\n\n        for extra_column in &self.extra_columns_from_analysis {\n            header.push_str(&format!(\n                \" | {:<width$}\",\n                extra_column.name,\n                width = extra_column.get_length().max(4)\n            ));\n        }\n\n        for extra_column in &self.extra_columns {\n            header.push_str(&format!(\n                \" | {:<width$}\",\n                extra_column.name,\n                width = extra_column.get_length().max(4)\n            ));\n        }\n        header.push('\\n');\n\n        let header_len = header.len();\n        self.add_to_output(&format!(\n            \"{}{}\\n\",\n            utils::get_color_text(&header, \"gray\", false),\n            \"-\".repeat(header_len)\n        ));\n    }\n\n    fn add_table_row(\n        &mut self,\n        response_headers: &HashMap<String, String>,\n        url: &str,\n        status: i32,\n        elapsed_time: f64,\n        size: i64,\n        content_type: i32,\n        extra_parsed_content: &HashMap<String, String>,\n        progress_status: &str,\n        cache_type_flags: i32,\n        cache_lifetime: Option<i32>,\n    ) {\n        let is_external_url = !url.contains(&format!(\"://{}\", self.origin_host));\n\n        let url_for_table = if !self.show_scheme_and_host && !is_external_url {\n            // Strip scheme and host from URL\n            strip_scheme_and_host(url)\n        } else {\n            url.to_string()\n        };\n\n        let url_col_size = self.get_url_column_size();\n\n        let colored_status = utils::get_colored_status_code(status, 6);\n\n        let content_type_name = ContentTypeId::from_i32(content_type)\n            .map(|ct| ct.name())\n            .unwrap_or(\"Other\");\n        let content_type_padded = format!(\"{:<8}\", content_type_name);\n\n        let colored_elapsed_time = utils::get_colored_request_time(elapsed_time, 6);\n\n        let colored_size = if size > 1024 * 1024 {\n            utils::get_color_text(&format!(\"{:<6}\", utils::get_formatted_size(size, 0)), \"red\", false)\n        } else {\n            format!(\"{:<6}\", utils::get_formatted_size(size, 0))\n        };\n\n        let content_type_header = response_headers.get(\"content-type\").map(|s| s.as_str()).unwrap_or(\"\");\n        let is_asset = utils::is_asset_by_content_type(content_type_header);\n        let colored_cache = get_colored_cache_info(cache_type_flags, cache_lifetime, is_asset);\n\n        // Process extra columns from analysis\n        let mut extra_headers_content = String::new();\n        let mut extra_new_line = String::new();\n        let extra_new_line_prefix = \"  \";\n\n        for extra_column in &self.extra_columns_from_analysis {\n            let value = extra_parsed_content\n                .get(&extra_column.name)\n                .map(|s| s.as_str())\n                .unwrap_or(\"\");\n\n            // For analysis results, we use the value as-is (colored output already applied).\n            // Manual padding is needed because ANSI color codes would be counted by format!(\"{:<width$}\").\n            let truncated = extra_column.get_truncated_value(Some(value)).unwrap_or_default();\n            let target_width = extra_column.get_length().max(4);\n            let visible_len = utils::remove_ansi_colors(&truncated).chars().count();\n            let padding = target_width.saturating_sub(visible_len);\n            extra_headers_content.push_str(&format!(\" | {}{}\", truncated, \" \".repeat(padding)));\n\n            // Show inline criticals/warnings if configured\n            if self.show_inline_criticals && value.contains(\"[CRITICAL]\") {\n                extra_new_line.push_str(&format!(\"{}\\u{26D4} {}\\n\", extra_new_line_prefix, value));\n            }\n            if self.show_inline_warnings && value.contains(\"[WARNING]\") {\n                extra_new_line.push_str(&format!(\"{}\\u{26A0}\\u{FE0F} {}\\n\", extra_new_line_prefix, value));\n            }\n        }\n\n        // Process extra columns from options\n        for extra_column in &self.extra_columns {\n            let mut value = String::new();\n            let header_name = &extra_column.name;\n\n            if let Some(v) = extra_parsed_content.get(header_name) {\n                value = v.trim().to_string();\n            } else if let Some(v) = response_headers.get(&header_name.to_lowercase()) {\n                value = v.trim().to_string();\n            }\n\n            let truncated = extra_column.get_truncated_value(Some(&value)).unwrap_or_default();\n            let target_width = extra_column.get_length().max(4);\n            let visible_len = utils::remove_ansi_colors(&truncated).chars().count();\n            let padding = target_width.saturating_sub(visible_len);\n            extra_headers_content.push_str(&format!(\" | {}{}\", truncated, \" \".repeat(padding)));\n        }\n\n        let mut url_display = url_for_table.clone();\n\n        if self.add_random_query_params {\n            url_display.push_str(&utils::get_color_text(\"+%random-query%\", \"gray\", false));\n        }\n\n        if !self.do_not_truncate_url {\n            url_display = utils::truncate_in_two_thirds(&url_display, url_col_size, \"\\u{2026}\", None);\n        }\n\n        // Progress content\n        let progress_content = if !self.hide_progress_bar {\n            let parts: Vec<&str> = progress_status.splitn(2, '/').collect();\n            let done: usize = parts.first().and_then(|s| s.parse().ok()).unwrap_or(0);\n            let total: usize = parts.get(1).and_then(|s| s.parse().ok()).unwrap_or(1);\n\n            if self.compact_mode {\n                format!(\"{:<7} |\", progress_status)\n            } else {\n                let progress_to_stderr =\n                    format!(\"{:<7} | {}\", progress_status, utils::get_progress_bar(done, total, 10));\n                format!(\"{:<17}\", progress_to_stderr)\n            }\n        } else {\n            String::new()\n        };\n\n        // Manual ANSI-aware padding for url_display (truncation may add colored \"…\")\n        let url_visible_len = utils::remove_ansi_colors(&url_display).chars().count();\n        let url_padding = url_col_size.saturating_sub(url_visible_len);\n        let url_padded = format!(\"{}{}\", url_display, \" \".repeat(url_padding));\n\n        let mut output = format!(\"{} {} | {}\", progress_content, url_padded, colored_status);\n        if !self.is_column_hidden(\"type\") {\n            output.push_str(&format!(\" | {}\", content_type_padded));\n        }\n        if !self.is_column_hidden(\"time\") {\n            output.push_str(&format!(\" | {}\", colored_elapsed_time));\n        }\n        if !self.is_column_hidden(\"size\") {\n            output.push_str(&format!(\" | {}\", colored_size));\n        }\n        if !self.is_column_hidden(\"cache\") {\n            output.push_str(&format!(\" | {}\", colored_cache));\n        }\n        output.push_str(&format!(\"{}\\n\", extra_headers_content));\n\n        if !extra_new_line.is_empty() {\n            let combined = format!(\"{}{}\\n\", output, extra_new_line.trim_end());\n            self.add_to_output(&combined);\n        } else {\n            self.add_to_output(&output);\n        }\n    }\n\n    fn add_super_table(&mut self, table: &SuperTable) {\n        self.add_to_output(\"\\n\");\n        self.add_to_output(&table.get_console_output());\n    }\n\n    fn add_total_stats(&mut self, stats: &BasicStats) {\n        self.add_to_output(\"\\n\");\n        self.add_to_output(&\"=\".repeat(self.terminal_width));\n        self.add_to_output(\"\\n\");\n\n        let peak_memory = utils::get_peak_memory_usage();\n        let peak_memory_str = if peak_memory > 0 {\n            format!(\n                \" (max used {})\",\n                utils::get_color_text(&utils::get_formatted_size(peak_memory, 0), \"cyan\", false,)\n            )\n        } else {\n            String::new()\n        };\n        let result_header = format!(\n            \"Total execution time {} using {} workers and {} memory limit{}\\n\",\n            utils::get_color_text(\n                &utils::get_formatted_duration(stats.total_execution_time),\n                \"cyan\",\n                false,\n            ),\n            utils::get_color_text(&self.workers.to_string(), \"cyan\", false),\n            utils::get_color_text(&self.memory_limit, \"cyan\", false),\n            peak_memory_str,\n        );\n        self.add_to_output(&result_header);\n\n        let reqs_per_sec = if stats.total_execution_time > 0.0 {\n            (stats.total_urls as f64 / stats.total_execution_time) as i64\n        } else {\n            0\n        };\n        let bytes_per_sec = if stats.total_execution_time > 0.0 {\n            (stats.total_size as f64 / stats.total_execution_time) as i64\n        } else {\n            0\n        };\n\n        self.add_to_output(&format!(\n            \"Total of {} visited URLs with a total size of {} and power of {} with download speed {}\\n\",\n            utils::get_color_text(&stats.total_urls.to_string(), \"cyan\", false),\n            utils::get_color_text(&stats.total_size_formatted, \"cyan\", false),\n            utils::get_color_text(&format!(\"{} reqs/s\", reqs_per_sec), \"magenta\", false),\n            utils::get_color_text(\n                &format!(\"{}/s\", utils::get_formatted_size(bytes_per_sec, 0)),\n                \"magenta\",\n                false,\n            ),\n        ));\n\n        self.add_to_output(&format!(\n            \"Response times: AVG {} MIN {} MAX {} TOTAL {}\\n\",\n            utils::get_color_text(\n                &utils::get_formatted_duration(stats.total_requests_times_avg),\n                \"magenta\",\n                false,\n            ),\n            utils::get_color_text(\n                &utils::get_formatted_duration(stats.total_requests_times_min),\n                \"green\",\n                false,\n            ),\n            utils::get_color_text(\n                &utils::get_formatted_duration(stats.total_requests_times_max),\n                \"red\",\n                false,\n            ),\n            utils::get_color_text(\n                &utils::get_formatted_duration(stats.total_requests_times),\n                \"cyan\",\n                false,\n            ),\n        ));\n\n        self.add_to_output(&\"=\".repeat(self.terminal_width));\n        self.add_to_output(\"\\n\");\n    }\n\n    fn add_notice(&mut self, text: &str) {\n        self.add_to_output(&format!(\"{}\\n\", utils::get_color_text(text, \"blue\", false)));\n    }\n\n    fn add_error(&mut self, text: &str) {\n        self.add_to_output(&format!(\"{}\\n\", utils::get_color_text(text, \"red\", false)));\n    }\n\n    fn add_quality_scores(&mut self, scores: &QualityScores) {\n        // Content: \"  \" + name(16) + bar(25) + \"  \" + score(7) + \"  \" + label(9) + \"  \" = 65\n        let inner = 65;\n\n        let mut out = String::new();\n        out.push('\\n');\n\n        // Top border\n        out.push_str(&format!(\"\\u{2554}{}\\u{2557}\\n\", \"\\u{2550}\".repeat(inner)));\n\n        // Title\n        let title = \"WEBSITE QUALITY SCORE\";\n        let pad = (inner as isize - title.len() as isize) / 2;\n        let pad = pad.max(0) as usize;\n        out.push_str(&format!(\n            \"\\u{2551}{}{:<width$}\\u{2551}\\n\",\n            \" \".repeat(pad),\n            title,\n            width = inner - pad,\n        ));\n\n        // Separator\n        out.push_str(&format!(\"\\u{2560}{}\\u{2563}\\n\", \"\\u{2550}\".repeat(inner)));\n\n        // Overall score bar\n        out.push_str(&format_score_line(&scores.overall, inner, true));\n\n        // Separator\n        out.push_str(&format!(\"\\u{2560}{}\\u{2563}\\n\", \"\\u{2550}\".repeat(inner)));\n\n        // Category scores\n        for cat in &scores.categories {\n            out.push_str(&format_score_line(cat, inner, false));\n        }\n\n        // Bottom border\n        out.push_str(&format!(\"\\u{255A}{}\\u{255D}\\n\", \"\\u{2550}\".repeat(inner)));\n\n        self.add_to_output(&out);\n    }\n\n    fn add_ci_gate_result(&mut self, result: &CiGateResult) {\n        let inner = 62;\n        let mut out = String::new();\n        out.push('\\n');\n\n        let border_color = if result.passed { \"green\" } else { \"red\" };\n\n        // Top border\n        out.push_str(&utils::get_color_text(\n            &format!(\"\\u{2554}{}\\u{2557}\", \"\\u{2550}\".repeat(inner)),\n            border_color,\n            false,\n        ));\n        out.push('\\n');\n\n        // Title\n        let title = \"CI/CD QUALITY GATE\";\n        let pad = (inner as isize - title.len() as isize) / 2;\n        let pad = pad.max(0) as usize;\n        let title_line = format!(\"{}{:<width$}\", \" \".repeat(pad), title, width = inner - pad,);\n        out.push_str(&utils::get_color_text(\"\\u{2551}\", border_color, false));\n        out.push_str(&title_line);\n        out.push_str(&utils::get_color_text(\"\\u{2551}\", border_color, false));\n        out.push('\\n');\n\n        // Separator\n        out.push_str(&utils::get_color_text(\n            &format!(\"\\u{2560}{}\\u{2563}\", \"\\u{2550}\".repeat(inner)),\n            border_color,\n            false,\n        ));\n        out.push('\\n');\n\n        // Check lines\n        for check in &result.checks {\n            let (tag, tag_color) = if check.passed {\n                (\"[PASS]\", \"green\")\n            } else {\n                (\"[FAIL]\", \"red\")\n            };\n\n            let detail = if check.passed {\n                format!(\n                    \"{}: {} {} {}\",\n                    check.metric,\n                    format_num(check.actual),\n                    check.operator,\n                    format_num(check.threshold)\n                )\n            } else if check.operator == \">=\" {\n                format!(\n                    \"{}: {} < {} (min: {})\",\n                    check.metric,\n                    format_num(check.actual),\n                    format_num(check.threshold),\n                    format_num(check.threshold)\n                )\n            } else {\n                format!(\n                    \"{}: {} > {} (max: {})\",\n                    check.metric,\n                    format_num(check.actual),\n                    format_num(check.threshold),\n                    format_num(check.threshold)\n                )\n            };\n\n            let content = format!(\"  {} {}\", tag, detail);\n            let visible_len = content.chars().count();\n            let padding = inner.saturating_sub(visible_len);\n\n            let colored_tag = utils::get_color_text(tag, tag_color, false);\n            let line_content = format!(\"  {} {}{}\", colored_tag, detail, \" \".repeat(padding));\n\n            out.push_str(&utils::get_color_text(\"\\u{2551}\", border_color, false));\n            out.push_str(&line_content);\n            out.push_str(&utils::get_color_text(\"\\u{2551}\", border_color, false));\n            out.push('\\n');\n        }\n\n        // Result separator\n        out.push_str(&utils::get_color_text(\n            &format!(\"\\u{2560}{}\\u{2563}\", \"\\u{2550}\".repeat(inner)),\n            border_color,\n            false,\n        ));\n        out.push('\\n');\n\n        // Result line\n        let failed_count = result.checks.iter().filter(|c| !c.passed).count();\n        let total_count = result.checks.len();\n        let result_text = if result.passed {\n            format!(\n                \"RESULT: PASS ({} of {} checks passed) \\u{2014} exit code 0\",\n                total_count, total_count\n            )\n        } else {\n            format!(\n                \"RESULT: FAIL ({} of {} checks failed) \\u{2014} exit code 10\",\n                failed_count, total_count\n            )\n        };\n        let result_content = format!(\"  {}\", result_text);\n        let visible_len = result_content.chars().count();\n        let padding = inner.saturating_sub(visible_len);\n\n        out.push_str(&utils::get_color_text(\"\\u{2551}\", border_color, false));\n        out.push_str(&utils::get_color_text(\n            &format!(\"{}{}\", result_content, \" \".repeat(padding)),\n            border_color,\n            false,\n        ));\n        out.push_str(&utils::get_color_text(\"\\u{2551}\", border_color, false));\n        out.push('\\n');\n\n        // Bottom border\n        out.push_str(&utils::get_color_text(\n            &format!(\"\\u{255A}{}\\u{255D}\", \"\\u{2550}\".repeat(inner)),\n            border_color,\n            false,\n        ));\n        out.push('\\n');\n\n        self.add_to_output(&out);\n    }\n\n    fn add_summary(&mut self, summary: &mut Summary) {\n        self.add_to_output(\"\\n\");\n        self.add_to_output(&summary.get_as_console_text());\n    }\n\n    fn get_type(&self) -> OutputType {\n        OutputType::Text\n    }\n\n    fn end(&mut self) {\n        self.add_to_output(\"\\n\");\n    }\n\n    fn get_output_text(&self) -> Option<String> {\n        Some(self.output_text.clone())\n    }\n}\n\n// ---- Helper functions ----\n\n/// Extract the host part from a URL.\nfn extract_host(url: &str) -> String {\n    if let Ok(parsed) = url::Url::parse(url) {\n        parsed.host_str().unwrap_or(\"\").to_string()\n    } else {\n        String::new()\n    }\n}\n\n/// Strip scheme and host from a URL, leaving only the path (and query).\nfn strip_scheme_and_host(url: &str) -> String {\n    if let Ok(parsed) = url::Url::parse(url) {\n        let path = parsed.path();\n        if let Some(query) = parsed.query() {\n            format!(\"{}?{}\", path, query)\n        } else {\n            path.to_string()\n        }\n    } else {\n        url.to_string()\n    }\n}\n\n// Cache type flag constants\nconst CACHE_TYPE_HAS_NO_STORE: i32 = 2048;\nconst CACHE_TYPE_HAS_ETAG: i32 = 4;\nconst CACHE_TYPE_HAS_LAST_MODIFIED: i32 = 8;\n\n/// Get colored cache info string.\nfn get_colored_cache_info(cache_type_flags: i32, cache_lifetime: Option<i32>, is_asset: bool) -> String {\n    let critical_color = \"red\";\n    let warning_color = \"yellow\";\n    let notice_color = \"magenta\";\n    let neutral_color = \"gray\";\n    let ok_color = \"green\";\n\n    let str_pad_to = 6;\n\n    if let Some(lifetime) = cache_lifetime {\n        let color = if is_asset {\n            if lifetime <= 0 {\n                critical_color\n            } else if lifetime < 7200 {\n                warning_color\n            } else if lifetime < 86400 {\n                notice_color\n            } else {\n                ok_color\n            }\n        } else {\n            neutral_color\n        };\n        utils::get_color_text(\n            &format!(\n                \"{:<width$}\",\n                utils::get_formatted_cache_lifetime(lifetime as i64),\n                width = str_pad_to\n            ),\n            color,\n            false,\n        )\n    } else if cache_type_flags & CACHE_TYPE_HAS_NO_STORE != 0 {\n        let color = if is_asset { critical_color } else { notice_color };\n        utils::get_color_text(&format!(\"{:<width$}\", \"0s\", width = str_pad_to), color, false)\n    } else if cache_type_flags & CACHE_TYPE_HAS_ETAG != 0 {\n        let color = if is_asset { warning_color } else { notice_color };\n        utils::get_color_text(&format!(\"{:<width$}\", \"etag\", width = str_pad_to), color, false)\n    } else if cache_type_flags & CACHE_TYPE_HAS_LAST_MODIFIED != 0 {\n        let color = if is_asset { warning_color } else { notice_color };\n        utils::get_color_text(&format!(\"{:<width$}\", \"lm\", width = str_pad_to), color, false)\n    } else {\n        let color = if is_asset { critical_color } else { notice_color };\n        utils::get_color_text(&format!(\"{:<width$}\", \"none\", width = str_pad_to), color, false)\n    }\n}\n\n/// Format a number for CI gate display: integers without decimals, floats with one decimal.\nfn format_num(v: f64) -> String {\n    if v == v.floor() && v.abs() < 1e15 {\n        format!(\"{}\", v as i64)\n    } else {\n        format!(\"{:.1}\", v)\n    }\n}\n\n/// Format a single score line for the quality score box.\nfn format_score_line(\n    cat: &crate::scoring::quality_score::CategoryScore,\n    inner_width: usize,\n    _is_overall: bool,\n) -> String {\n    let bar_width = 25;\n    let filled = ((cat.score / 10.0) * bar_width as f64).round() as usize;\n    let empty = bar_width - filled;\n    let bar = format!(\"{}{}\", \"\\u{2588}\".repeat(filled), \"\\u{2591}\".repeat(empty),);\n\n    let score_str = format!(\"{:>7}\", format!(\"{:.1}/10\", cat.score));\n    let label_padded = format!(\"{:<16}\", cat.name);\n    let label_str = format!(\"{:<9}\", cat.label);\n    let content = format!(\"  {}{}  {}  {}\", label_padded, bar, score_str, label_str);\n\n    // Calculate visible width using char count (Unicode block chars are 1 display char each)\n    let visible_width = content.chars().count();\n    let padding = inner_width.saturating_sub(visible_width);\n\n    // Colorize the entire content\n    let colored = utils::get_color_text(&content, cat.console_color(), false);\n\n    format!(\"\\u{2551}{}{}\\u{2551}\\n\", colored, \" \".repeat(padding))\n}\n"
  },
  {
    "path": "src/result/basic_stats.rs",
    "content": "// SiteOne Crawler - BasicStats\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::BTreeMap;\nuse std::time::Instant;\n\nuse serde::{Deserialize, Serialize};\n\nuse crate::result::visited_url::VisitedUrl;\nuse crate::utils;\n\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct BasicStats {\n    pub total_execution_time: f64,\n    pub total_urls: usize,\n    pub total_size: i64,\n    pub total_size_formatted: String,\n    pub total_requests_times: f64,\n    pub total_requests_times_avg: f64,\n    pub total_requests_times_min: f64,\n    pub total_requests_times_max: f64,\n    pub count_by_status: BTreeMap<i32, usize>,\n    pub count_by_content_type: BTreeMap<i32, usize>,\n}\n\nimpl BasicStats {\n    #[allow(clippy::too_many_arguments)]\n    pub fn new(\n        total_execution_time: f64,\n        total_urls: usize,\n        total_size: i64,\n        total_size_formatted: String,\n        total_requests_times: f64,\n        total_requests_times_avg: f64,\n        total_requests_times_min: f64,\n        total_requests_times_max: f64,\n        count_by_status: BTreeMap<i32, usize>,\n        count_by_content_type: BTreeMap<i32, usize>,\n    ) -> Self {\n        Self {\n            total_execution_time,\n            total_urls,\n            total_size,\n            total_size_formatted,\n            total_requests_times,\n            total_requests_times_avg,\n            total_requests_times_min,\n            total_requests_times_max,\n            count_by_status,\n            count_by_content_type,\n        }\n    }\n\n    pub fn from_visited_urls(visited_urls: &[&VisitedUrl], start_time: Instant) -> Self {\n        let total_urls = visited_urls.len();\n        let mut total_size: i64 = 0;\n        let mut total_time: f64 = 0.0;\n        let mut min_time: Option<f64> = None;\n        let mut max_time: Option<f64> = None;\n        let mut count_by_status: BTreeMap<i32, usize> = BTreeMap::new();\n        let mut count_by_content_type: BTreeMap<i32, usize> = BTreeMap::new();\n\n        for url in visited_urls {\n            total_time += url.request_time;\n            total_size += url.size.unwrap_or(0);\n            *count_by_status.entry(url.status_code).or_insert(0) += 1;\n            *count_by_content_type.entry(url.content_type as i32).or_insert(0) += 1;\n            min_time = Some(match min_time {\n                Some(current) => current.min(url.request_time),\n                None => url.request_time,\n            });\n            max_time = Some(match max_time {\n                Some(current) => current.max(url.request_time),\n                None => url.request_time,\n            });\n        }\n\n        let total_execution_time = (start_time.elapsed().as_secs_f64() * 1000.0).round() / 1000.0;\n        let total_requests_times = (total_time * 1000.0).round() / 1000.0;\n        let total_requests_times_avg = if total_urls > 0 {\n            (total_time / total_urls as f64 * 1000.0).round() / 1000.0\n        } else {\n            0.0\n        };\n        let total_requests_times_min = (min_time.unwrap_or(0.0) * 1000.0).round() / 1000.0;\n        let total_requests_times_max = (max_time.unwrap_or(0.0) * 1000.0).round() / 1000.0;\n\n        Self {\n            total_execution_time,\n            total_urls,\n            total_size,\n            total_size_formatted: utils::get_formatted_size(total_size, 0),\n            total_requests_times,\n            total_requests_times_avg,\n            total_requests_times_min,\n            total_requests_times_max,\n            count_by_status,\n            count_by_content_type,\n        }\n    }\n\n    pub fn get_as_html(&self) -> String {\n        let mut html = String::from(\"<table class=\\\"table table-bordered table-striped table-hover\\\">\");\n        html.push_str(\"<tr><th colspan=\\\"2\\\">Basic stats</th></tr>\");\n        html.push_str(&format!(\n            \"<tr><td>Total execution time</td><td>{}</td></tr>\",\n            utils::get_formatted_duration(self.total_execution_time)\n        ));\n        html.push_str(&format!(\"<tr><td>Total URLs</td><td>{}</td></tr>\", self.total_urls));\n        html.push_str(&format!(\n            \"<tr><td>Total size</td><td>{}</td></tr>\",\n            self.total_size_formatted\n        ));\n        html.push_str(&format!(\n            \"<tr><td>Requests - total time</td><td>{}</td></tr>\",\n            utils::get_formatted_duration(self.total_requests_times)\n        ));\n        html.push_str(&format!(\n            \"<tr><td>Requests - avg time</td><td>{}</td></tr>\",\n            utils::get_formatted_duration(self.total_requests_times_avg)\n        ));\n        html.push_str(&format!(\n            \"<tr><td>Requests - min time</td><td>{}</td></tr>\",\n            utils::get_formatted_duration(self.total_requests_times_min)\n        ));\n        html.push_str(&format!(\n            \"<tr><td>Requests - max time</td><td>{}</td></tr>\",\n            utils::get_formatted_duration(self.total_requests_times_max)\n        ));\n        html.push_str(\"<tr><td>Requests by status</td><td>\");\n        for (status_code, count) in &self.count_by_status {\n            let colored = utils::get_colored_status_code(*status_code, 0);\n            let colored_html = utils::convert_bash_colors_in_text_to_html(&colored);\n            html.push_str(&format!(\"{}: {}<br>\", colored_html, count));\n        }\n        html.push_str(\"</td></tr>\");\n        html.push_str(\"</table>\");\n\n        html\n    }\n}\n"
  },
  {
    "path": "src/result/manager_stats.rs",
    "content": "// SiteOne Crawler - ManagerStats\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\nuse std::time::Instant;\n\nuse crate::components::super_table::SuperTable;\nuse crate::components::super_table_column::SuperTableColumn;\nuse crate::utils;\n\n#[derive(Debug, Default)]\npub struct ManagerStats {\n    /// Total exec times of analyzer methods\n    exec_times: HashMap<String, f64>,\n\n    /// Total exec counts of analyzer methods\n    exec_counts: HashMap<String, usize>,\n}\n\nimpl ManagerStats {\n    pub fn new() -> Self {\n        Self {\n            exec_times: HashMap::new(),\n            exec_counts: HashMap::new(),\n        }\n    }\n\n    /// Measure and increment exec time and count of analyzer method\n    pub fn measure_exec_time(&mut self, class: &str, method: &str, start_time: Instant) {\n        let elapsed = start_time.elapsed().as_secs_f64();\n        let key = format!(\"{}::{}\", class, method);\n\n        *self.exec_times.entry(key.clone()).or_insert(0.0) += elapsed;\n        *self.exec_counts.entry(key).or_insert(0) += 1;\n    }\n\n    pub fn get_super_table(\n        &self,\n        apl_code: &str,\n        title: &str,\n        empty_table_message: &str,\n        external_times: Option<&HashMap<String, f64>>,\n        external_counts: Option<&HashMap<String, usize>>,\n    ) -> SuperTable {\n        let mut data: Vec<HashMap<String, String>> = Vec::new();\n\n        // Internal stats\n        for (class_and_method, exec_time) in &self.exec_times {\n            let short_name = class_and_method\n                .rsplit(\"::\")\n                .next()\n                .map(|method| {\n                    let class_part = class_and_method.split(\"::\").next().unwrap_or(class_and_method);\n                    let short_class = class_part.rsplit('/').next().unwrap_or(class_part);\n                    let short_class = short_class.rsplit('\\\\').next().unwrap_or(short_class);\n                    format!(\"{}::{}\", short_class, method)\n                })\n                .unwrap_or_else(|| class_and_method.clone());\n\n            let mut row = HashMap::new();\n            row.insert(\"classAndMethod\".to_string(), short_name);\n            row.insert(\"execTime\".to_string(), format!(\"{}\", exec_time));\n            row.insert(\n                \"execTimeFormatted\".to_string(),\n                utils::get_formatted_duration(*exec_time),\n            );\n            row.insert(\n                \"execCount\".to_string(),\n                format!(\"{}\", self.exec_counts.get(class_and_method).copied().unwrap_or(0)),\n            );\n            data.push(row);\n        }\n\n        // External stats (if any)\n        if let Some(ext_times) = external_times {\n            for (class_and_method, exec_time) in ext_times {\n                let short_name = class_and_method\n                    .rsplit(\"::\")\n                    .next()\n                    .map(|method| {\n                        let class_part = class_and_method.split(\"::\").next().unwrap_or(class_and_method);\n                        let short_class = class_part.rsplit('/').next().unwrap_or(class_part);\n                        let short_class = short_class.rsplit('\\\\').next().unwrap_or(short_class);\n                        format!(\"{}::{}\", short_class, method)\n                    })\n                    .unwrap_or_else(|| class_and_method.clone());\n\n                let mut row = HashMap::new();\n                row.insert(\"classAndMethod\".to_string(), short_name);\n                row.insert(\"execTime\".to_string(), format!(\"{}\", exec_time));\n                row.insert(\n                    \"execTimeFormatted\".to_string(),\n                    utils::get_formatted_duration(*exec_time),\n                );\n                row.insert(\n                    \"execCount\".to_string(),\n                    format!(\n                        \"{}\",\n                        external_counts\n                            .and_then(|c| c.get(class_and_method))\n                            .copied()\n                            .unwrap_or(0)\n                    ),\n                );\n                data.push(row);\n            }\n        }\n\n        let columns = vec![\n            SuperTableColumn::new(\n                \"classAndMethod\".to_string(),\n                \"Class::method\".to_string(),\n                -1, // AUTO_WIDTH\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"execTime\".to_string(),\n                \"Exec time\".to_string(),\n                9,\n                Some(Box::new(|value: &str, _render_into: &str| {\n                    if let Ok(v) = value.parse::<f64>() {\n                        utils::get_colored_request_time(v, 9)\n                    } else {\n                        value.to_string()\n                    }\n                })),\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n            SuperTableColumn::new(\n                \"execCount\".to_string(),\n                \"Exec count\".to_string(),\n                -1, // AUTO_WIDTH\n                None,\n                None,\n                false,\n                false,\n                false,\n                true,\n                None,\n            ),\n        ];\n\n        let mut super_table = SuperTable::new(\n            apl_code.to_string(),\n            title.to_string(),\n            empty_table_message.to_string(),\n            columns,\n            false,\n            Some(\"execTime\".to_string()),\n            \"DESC\".to_string(),\n            None,\n            None,\n            None,\n        );\n\n        super_table.set_data(data);\n        super_table\n    }\n\n    pub fn get_exec_times(&self) -> &HashMap<String, f64> {\n        &self.exec_times\n    }\n\n    pub fn get_exec_counts(&self) -> &HashMap<String, usize> {\n        &self.exec_counts\n    }\n}\n"
  },
  {
    "path": "src/result/mod.rs",
    "content": "pub mod basic_stats;\r\npub mod manager_stats;\r\npub mod status;\r\npub mod storage;\r\npub mod visited_url;\r\n"
  },
  {
    "path": "src/result/status.rs",
    "content": "// SiteOne Crawler - Status (central crawl state)\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\nuse std::sync::{Mutex, RwLock};\nuse std::time::Instant;\n\nuse indexmap::IndexMap;\n\nuse crate::analysis::result::url_analysis_result::UrlAnalysisResult;\nuse crate::components::summary::item::Item;\nuse crate::components::summary::item_status::ItemStatus;\nuse crate::components::summary::summary::Summary;\nuse crate::components::super_table::SuperTable;\nuse crate::info::Info;\nuse crate::result::basic_stats::BasicStats;\nuse crate::result::storage::storage::Storage;\nuse crate::result::visited_url::VisitedUrl;\nuse crate::types::{ContentTypeId, SkippedReason};\n\n/// Central state for the crawl result.\n/// Must be Send + Sync for concurrent access from multiple workers.\npub struct Status {\n    /// Content storage (memory or file) - used only if store_content is true\n    storage: Box<dyn Storage>,\n\n    /// Store content of visited URLs (HTML, CSS, JS, images, ...) to storage\n    store_content: bool,\n\n    /// Crawl start time\n    start_time: Instant,\n\n    /// Basic stats/metrics about visited URLs (lazily computed)\n    basic_stats: RwLock<Option<BasicStats>>,\n\n    /// Overall summary of the crawl\n    summary: Mutex<Summary>,\n\n    /// SuperTables that are at the beginning of the page\n    super_tables_at_beginning: Mutex<Vec<SuperTable>>,\n\n    /// SuperTables that are at the end of the page\n    super_tables_at_end: Mutex<Vec<SuperTable>>,\n\n    /// Crawler info\n    crawler_info: RwLock<Info>,\n\n    /// Visited URLs, keyed by uq_id (IndexMap preserves crawl/insertion order)\n    visited_urls: Mutex<IndexMap<String, VisitedUrl>>,\n\n    /// Analysis results per visited URL uq_id\n    visited_url_to_analysis_result: Mutex<HashMap<String, Vec<UrlAnalysisResultEntry>>>,\n\n    /// Robots.txt content - key is \"scheme://host:port\"\n    robots_txt_content: RwLock<HashMap<String, String>>,\n\n    /// Skipped URLs (transferred from crawler after crawling)\n    skipped_urls: Mutex<Vec<SkippedUrlEntry>>,\n}\n\n/// Entry for a skipped URL stored in Status\n#[derive(Debug, Clone)]\npub struct SkippedUrlEntry {\n    pub url: String,\n    pub reason: SkippedReason,\n    pub source_uq_id: String,\n    pub source_attr: i32,\n}\n\n/// Per-URL analysis result entry stored in Status\n#[derive(Debug, Clone)]\npub struct UrlAnalysisResultEntry {\n    pub analysis_name: String,\n    pub result: UrlAnalysisResult,\n}\n\n// SAFETY: Status uses internal synchronization primitives (Mutex, RwLock)\n// for all mutable state, making it safe to share across threads.\nunsafe impl Send for Status {}\nunsafe impl Sync for Status {}\n\nimpl Status {\n    pub fn new(storage: Box<dyn Storage>, store_content: bool, crawler_info: Info, start_time: Instant) -> Self {\n        Self {\n            storage,\n            store_content,\n            start_time,\n            basic_stats: RwLock::new(None),\n            summary: Mutex::new(Summary::new()),\n            super_tables_at_beginning: Mutex::new(Vec::new()),\n            super_tables_at_end: Mutex::new(Vec::new()),\n            crawler_info: RwLock::new(crawler_info),\n            visited_urls: Mutex::new(IndexMap::new()),\n            visited_url_to_analysis_result: Mutex::new(HashMap::new()),\n            robots_txt_content: RwLock::new(HashMap::new()),\n            skipped_urls: Mutex::new(Vec::new()),\n        }\n    }\n\n    pub fn add_visited_url(\n        &mut self,\n        visited_url: VisitedUrl,\n        body: Option<&[u8]>,\n        headers: Option<&HashMap<String, String>>,\n    ) {\n        let uq_id = visited_url.uq_id.clone();\n        let content_type = visited_url.content_type;\n\n        if let Ok(mut urls) = self.visited_urls.lock() {\n            urls.insert(uq_id.clone(), visited_url);\n        }\n\n        if self.store_content {\n            if let Some(body_bytes) = body {\n                let content = if content_type == ContentTypeId::Html {\n                    // Trim whitespace for HTML (text-safe operation)\n                    let text = String::from_utf8_lossy(body_bytes);\n                    text.trim().as_bytes().to_vec()\n                } else {\n                    body_bytes.to_vec()\n                };\n                // Ignore storage errors - they are non-fatal\n                let _ = self.storage.save(&uq_id, &content);\n            }\n\n            if let Some(hdrs) = headers {\n                // Serialize headers as JSON for storage\n                if let Ok(serialized) = serde_json::to_string(hdrs) {\n                    let _ = self.storage.save(&format!(\"{}.headers\", uq_id), serialized.as_bytes());\n                }\n            }\n        }\n\n        // Invalidate cached basic stats\n        if let Ok(mut stats) = self.basic_stats.write() {\n            *stats = None;\n        }\n    }\n\n    pub fn add_summary_item_by_ranges(\n        &self,\n        apl_code: &str,\n        value: f64,\n        ranges: &[(f64, f64)],\n        text_per_range: &[&str],\n    ) {\n        let mut status = ItemStatus::Info;\n        let mut text = format!(\"{} out of range ({})\", apl_code, value);\n\n        for (range_id, range) in ranges.iter().enumerate() {\n            if value >= range.0 && value <= range.1 {\n                if let Ok(s) = ItemStatus::from_range_id(range_id as i32) {\n                    status = s;\n                }\n                if let Some(tmpl) = text_per_range.get(range_id) {\n                    text = tmpl.replace(\"{}\", &format!(\"{}\", value));\n                }\n                break;\n            }\n        }\n\n        if let Ok(mut summary) = self.summary.lock() {\n            summary.add_item(Item::new(apl_code.to_string(), text, status));\n        }\n    }\n\n    pub fn add_ok_to_summary(&self, apl_code: &str, text: &str) {\n        if let Ok(mut summary) = self.summary.lock() {\n            summary.add_item(Item::new(apl_code.to_string(), text.to_string(), ItemStatus::Ok));\n        }\n    }\n\n    pub fn add_notice_to_summary(&self, apl_code: &str, text: &str) {\n        if let Ok(mut summary) = self.summary.lock() {\n            summary.add_item(Item::new(apl_code.to_string(), text.to_string(), ItemStatus::Notice));\n        }\n    }\n\n    pub fn add_info_to_summary(&self, apl_code: &str, text: &str) {\n        if let Ok(mut summary) = self.summary.lock() {\n            summary.add_item(Item::new(apl_code.to_string(), text.to_string(), ItemStatus::Info));\n        }\n    }\n\n    pub fn add_warning_to_summary(&self, apl_code: &str, text: &str) {\n        if let Ok(mut summary) = self.summary.lock() {\n            summary.add_item(Item::new(apl_code.to_string(), text.to_string(), ItemStatus::Warning));\n        }\n    }\n\n    pub fn add_critical_to_summary(&self, apl_code: &str, text: &str) {\n        if let Ok(mut summary) = self.summary.lock() {\n            summary.add_item(Item::new(apl_code.to_string(), text.to_string(), ItemStatus::Critical));\n        }\n    }\n\n    pub fn get_summary(&self) -> Summary {\n        self.summary.lock().map(|s| s.clone()).unwrap_or_default()\n    }\n\n    pub fn with_summary<F, R>(&self, f: F) -> Option<R>\n    where\n        F: FnOnce(&mut Summary) -> R,\n    {\n        self.summary.lock().ok().map(|mut s| f(&mut s))\n    }\n\n    /// Get stored body as raw bytes (preserves binary data for images, fonts, etc.)\n    pub fn get_url_body(&self, uq_id: &str) -> Option<Vec<u8>> {\n        if !self.store_content {\n            return None;\n        }\n        self.storage.load(uq_id).ok().filter(|b| !b.is_empty())\n    }\n\n    /// Get stored body as text (lossy UTF-8 conversion). Use for HTML/CSS/JS processing.\n    pub fn get_url_body_text(&self, uq_id: &str) -> Option<String> {\n        self.get_url_body(uq_id)\n            .map(|b| String::from_utf8_lossy(&b).into_owned())\n    }\n\n    pub fn get_url_headers(&self, uq_id: &str) -> Option<HashMap<String, String>> {\n        let key = format!(\"{}.headers\", uq_id);\n        let data = self.storage.load(&key).ok()?;\n        if data.is_empty() {\n            return None;\n        }\n        serde_json::from_slice(&data).ok()\n    }\n\n    pub fn get_visited_urls(&self) -> Vec<VisitedUrl> {\n        self.visited_urls\n            .lock()\n            .map(|urls| urls.values().cloned().collect())\n            .unwrap_or_default()\n    }\n\n    pub fn with_visited_urls<F, R>(&self, f: F) -> Option<R>\n    where\n        F: FnOnce(&IndexMap<String, VisitedUrl>) -> R,\n    {\n        self.visited_urls.lock().ok().map(|urls| f(&urls))\n    }\n\n    pub fn get_crawler_info(&self) -> Info {\n        self.crawler_info.read().map(|info| info.clone()).unwrap_or_else(|_| {\n            Info::new(\n                String::new(),\n                String::new(),\n                String::new(),\n                String::new(),\n                String::new(),\n                String::new(),\n                String::new(),\n            )\n        })\n    }\n\n    pub fn get_storage(&self) -> &dyn Storage {\n        self.storage.as_ref()\n    }\n\n    pub fn set_final_user_agent(&self, value: &str) {\n        if let Ok(mut info) = self.crawler_info.write() {\n            info.set_final_user_agent(value.to_string());\n        }\n    }\n\n    pub fn get_basic_stats(&self) -> BasicStats {\n        // Check if we already have cached stats\n        if let Ok(stats_guard) = self.basic_stats.read()\n            && let Some(ref stats) = *stats_guard\n        {\n            return stats.clone();\n        }\n\n        // Compute stats\n        let stats = match self.visited_urls.lock() {\n            Ok(urls) => {\n                let url_refs: Vec<&VisitedUrl> = urls.values().collect();\n                BasicStats::from_visited_urls(&url_refs, self.start_time)\n            }\n            _ => BasicStats::from_visited_urls(&[], self.start_time),\n        };\n\n        // Cache the result\n        if let Ok(mut stats_guard) = self.basic_stats.write() {\n            *stats_guard = Some(stats.clone());\n        }\n\n        stats\n    }\n\n    pub fn add_super_table_at_beginning(&self, super_table: SuperTable) {\n        if let Ok(mut tables) = self.super_tables_at_beginning.lock() {\n            tables.push(super_table);\n        }\n    }\n\n    pub fn add_super_table_at_end(&self, super_table: SuperTable) {\n        if let Ok(mut tables) = self.super_tables_at_end.lock() {\n            tables.push(super_table);\n        }\n    }\n\n    pub fn with_super_tables_at_beginning<F, R>(&self, f: F) -> Option<R>\n    where\n        F: FnOnce(&[SuperTable]) -> R,\n    {\n        self.super_tables_at_beginning.lock().ok().map(|tables| f(&tables))\n    }\n\n    pub fn with_super_tables_at_beginning_mut<F, R>(&self, f: F) -> Option<R>\n    where\n        F: FnOnce(&mut [SuperTable]) -> R,\n    {\n        self.super_tables_at_beginning\n            .lock()\n            .ok()\n            .map(|mut tables| f(&mut tables))\n    }\n\n    pub fn with_super_tables_at_end<F, R>(&self, f: F) -> Option<R>\n    where\n        F: FnOnce(&[SuperTable]) -> R,\n    {\n        self.super_tables_at_end.lock().ok().map(|tables| f(&tables))\n    }\n\n    pub fn with_super_tables_at_end_mut<F, R>(&self, f: F) -> Option<R>\n    where\n        F: FnOnce(&mut [SuperTable]) -> R,\n    {\n        self.super_tables_at_end.lock().ok().map(|mut tables| f(&mut tables))\n    }\n\n    /// Set host_to_strip_from_urls and initial_url on a SuperTable based on crawler info.\n    /// Used so that URLs matching the initial domain are displayed without protocol+domain.\n    pub fn configure_super_table_url_stripping(&self, table: &mut SuperTable) {\n        let info = self.get_crawler_info();\n        if !info.initial_url.is_empty()\n            && let Ok(parsed) = url::Url::parse(&info.initial_url)\n        {\n            table.set_host_to_strip_from_urls(\n                parsed.host_str().map(|h| h.to_string()),\n                Some(parsed.scheme().to_string()),\n            );\n            table.set_initial_url(Some(info.initial_url.clone()));\n        }\n    }\n\n    pub fn get_super_table_by_apl_code(&self, apl_code: &str) -> bool {\n        let found_beginning = self\n            .super_tables_at_beginning\n            .lock()\n            .ok()\n            .map(|tables| tables.iter().any(|t| t.apl_code == apl_code))\n            .unwrap_or(false);\n\n        if found_beginning {\n            return true;\n        }\n\n        self.super_tables_at_end\n            .lock()\n            .ok()\n            .map(|tables| tables.iter().any(|t| t.apl_code == apl_code))\n            .unwrap_or(false)\n    }\n\n    pub fn get_url_by_uq_id(&self, uq_id: &str) -> Option<String> {\n        self.visited_urls\n            .lock()\n            .ok()\n            .and_then(|urls| urls.get(uq_id).map(|v| v.url.clone()))\n    }\n\n    pub fn get_origin_header_value_by_source_uq_id(&self, source_uq_id: &str) -> Option<String> {\n        self.visited_urls.lock().ok().and_then(|urls| {\n            urls.get(source_uq_id).and_then(|visited_url| {\n                url::Url::parse(&visited_url.url).ok().map(|parsed| {\n                    let scheme = parsed.scheme();\n                    let host = parsed.host_str().unwrap_or(\"\");\n                    let port = parsed.port();\n                    if let Some(p) = port {\n                        format!(\"{}://{}:{}\", scheme, host, p)\n                    } else {\n                        format!(\"{}://{}\", scheme, host)\n                    }\n                })\n            })\n        })\n    }\n\n    pub fn add_url_analysis_result(&self, visited_url_uq_id: &str, result: UrlAnalysisResultEntry) {\n        if let Ok(mut map) = self.visited_url_to_analysis_result.lock() {\n            map.entry(visited_url_uq_id.to_string()).or_default().push(result);\n        }\n    }\n\n    pub fn get_url_analysis_results(&self, visited_url_uq_id: &str) -> Vec<UrlAnalysisResultEntry> {\n        self.visited_url_to_analysis_result\n            .lock()\n            .ok()\n            .and_then(|map| map.get(visited_url_uq_id).cloned())\n            .unwrap_or_default()\n    }\n\n    pub fn add_skipped_url(&mut self, url: String, reason: SkippedReason, source_uq_id: String, source_attr: i32) {\n        if let Ok(mut skipped) = self.skipped_urls.lock() {\n            skipped.push(SkippedUrlEntry {\n                url,\n                reason,\n                source_uq_id,\n                source_attr,\n            });\n        }\n    }\n\n    pub fn get_skipped_urls(&self) -> Vec<SkippedUrlEntry> {\n        self.skipped_urls.lock().ok().map(|v| v.clone()).unwrap_or_default()\n    }\n\n    pub fn get_details_by_analysis_name_and_severity(&self, analysis_name: &str, severity: &str) -> Vec<String> {\n        let mut result = Vec::new();\n        if let Ok(map) = self.visited_url_to_analysis_result.lock() {\n            for entries in map.values() {\n                for entry in entries {\n                    let details = entry\n                        .result\n                        .get_details_of_severity_and_analysis_name(severity, analysis_name);\n                    result.extend(details);\n                }\n            }\n        }\n        result\n    }\n\n    pub fn get_visited_url_to_analysis_result(&self) -> HashMap<String, Vec<UrlAnalysisResultEntry>> {\n        self.visited_url_to_analysis_result\n            .lock()\n            .map(|map| map.clone())\n            .unwrap_or_default()\n    }\n\n    /// Get number of visited URLs with HTTP code >= 200\n    pub fn get_number_of_working_visited_urls(&self) -> usize {\n        self.visited_urls\n            .lock()\n            .map(|urls| urls.values().filter(|u| u.status_code >= 200).count())\n            .unwrap_or(0)\n    }\n\n    pub fn set_robots_txt_content(&self, scheme: &str, host: &str, port: u16, content: &str) {\n        let key = format!(\"{}://{}:{}\", scheme, host, port);\n        if let Ok(mut map) = self.robots_txt_content.write() {\n            map.insert(key, content.to_string());\n        }\n    }\n\n    pub fn get_robots_txt_content(&self, scheme: &str, host: &str, port: u16) -> Option<String> {\n        let key = format!(\"{}://{}:{}\", scheme, host, port);\n        self.robots_txt_content\n            .read()\n            .ok()\n            .and_then(|map| map.get(&key).cloned())\n    }\n}\n\nimpl std::fmt::Debug for Status {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"Status\")\n            .field(\"store_content\", &self.store_content)\n            .field(\n                \"visited_urls_count\",\n                &self.visited_urls.lock().map(|u| u.len()).unwrap_or(0),\n            )\n            .finish()\n    }\n}\n"
  },
  {
    "path": "src/result/storage/file_storage.rs",
    "content": "// SiteOne Crawler - FileStorage\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::fs;\nuse std::io::Write;\nuse std::path::{Path, PathBuf};\n\nuse regex::Regex;\n\nuse crate::error::{CrawlerError, CrawlerResult};\nuse crate::result::storage::storage::Storage;\n\npub struct FileStorage {\n    cache_dir: PathBuf,\n    compress: bool,\n}\n\nimpl FileStorage {\n    pub fn new(tmp_dir: &str, compress: bool, origin_url_domain: &str) -> CrawlerResult<Self> {\n        // Sanitize domain name for use as directory name\n        let sanitized_domain = match Regex::new(r\"[^a-zA-Z0-9.\\-_]\") {\n            Ok(re) => re.replace_all(&origin_url_domain.to_lowercase(), \"-\").to_string(),\n            _ => origin_url_domain.to_lowercase(),\n        };\n\n        let cache_dir = PathBuf::from(tmp_dir).join(sanitized_domain);\n\n        if !cache_dir.exists() {\n            fs::create_dir_all(&cache_dir).map_err(|e| {\n                CrawlerError::Io(std::io::Error::other(format!(\n                    \"Directory '{}' was not created: {}\",\n                    cache_dir.display(),\n                    e\n                )))\n            })?;\n        }\n\n        Ok(Self { cache_dir, compress })\n    }\n\n    fn get_file_extension(&self) -> &str {\n        if self.compress { \"cache.gz\" } else { \"cache\" }\n    }\n\n    fn get_file_path(&self, uq_id: &str) -> PathBuf {\n        debug_assert!(\n            uq_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_'),\n            \"uq_id '{}' contains unsafe characters\",\n            uq_id\n        );\n        let subdir = if uq_id.len() >= 2 { &uq_id[..2] } else { uq_id };\n        self.cache_dir\n            .join(subdir)\n            .join(format!(\"{}.{}\", uq_id, self.get_file_extension()))\n    }\n\n    fn create_directory_if_needed(&self, path: &Path) -> CrawlerResult<()> {\n        if !path.exists() {\n            fs::create_dir_all(path).map_err(|e| {\n                CrawlerError::Io(std::io::Error::other(format!(\n                    \"Directory '{}' was not created. Please check permissions: {}\",\n                    path.display(),\n                    e\n                )))\n            })?;\n        }\n        Ok(())\n    }\n}\n\nimpl Storage for FileStorage {\n    fn save(&mut self, uq_id: &str, content: &[u8]) -> CrawlerResult<()> {\n        let data = if self.compress {\n            let mut encoder = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());\n            encoder.write_all(content).map_err(CrawlerError::Io)?;\n            encoder.finish().map_err(CrawlerError::Io)?\n        } else {\n            content.to_vec()\n        };\n\n        let file_path = self.get_file_path(uq_id);\n        if let Some(parent) = file_path.parent() {\n            self.create_directory_if_needed(parent)?;\n        }\n\n        fs::write(&file_path, &data).map_err(CrawlerError::Io)\n    }\n\n    fn load(&self, uq_id: &str) -> CrawlerResult<Vec<u8>> {\n        let file_path = self.get_file_path(uq_id);\n\n        if !file_path.exists() {\n            return Ok(Vec::new());\n        }\n\n        let data = fs::read(&file_path).map_err(CrawlerError::Io)?;\n\n        if self.compress {\n            let mut decoder = flate2::read::GzDecoder::new(&data[..]);\n            let mut decompressed = Vec::new();\n            std::io::Read::read_to_end(&mut decoder, &mut decompressed).map_err(CrawlerError::Io)?;\n            Ok(decompressed)\n        } else {\n            Ok(data)\n        }\n    }\n\n    fn delete(&mut self, uq_id: &str) -> CrawlerResult<()> {\n        let file_path = self.get_file_path(uq_id);\n        if file_path.exists() {\n            fs::remove_file(&file_path).map_err(CrawlerError::Io)?;\n        }\n        Ok(())\n    }\n\n    fn delete_all(&mut self) -> CrawlerResult<()> {\n        if self.cache_dir.exists() {\n            // Remove all files recursively within cache_dir, then recreate\n            fs::remove_dir_all(&self.cache_dir).map_err(CrawlerError::Io)?;\n            fs::create_dir_all(&self.cache_dir).map_err(CrawlerError::Io)?;\n        }\n        Ok(())\n    }\n}\n\nimpl Drop for FileStorage {\n    fn drop(&mut self) {\n        // Clean up cache directory on drop\n        let _ = self.delete_all();\n    }\n}\n\nimpl std::fmt::Debug for FileStorage {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"FileStorage\")\n            .field(\"cache_dir\", &self.cache_dir)\n            .field(\"compress\", &self.compress)\n            .finish()\n    }\n}\n"
  },
  {
    "path": "src/result/storage/memory_storage.rs",
    "content": "// SiteOne Crawler - MemoryStorage\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\nuse std::io::Write;\n\nuse crate::error::{CrawlerError, CrawlerResult};\nuse crate::result::storage::storage::Storage;\n\npub struct MemoryStorage {\n    storage: HashMap<String, Vec<u8>>,\n    compress: bool,\n}\n\nimpl MemoryStorage {\n    pub fn new(compress: bool) -> Self {\n        Self {\n            storage: HashMap::new(),\n            compress,\n        }\n    }\n}\n\nimpl Storage for MemoryStorage {\n    fn save(&mut self, uq_id: &str, content: &[u8]) -> CrawlerResult<()> {\n        let data = if self.compress {\n            let mut encoder = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default());\n            encoder.write_all(content).map_err(CrawlerError::Io)?;\n            encoder.finish().map_err(CrawlerError::Io)?\n        } else {\n            content.to_vec()\n        };\n\n        self.storage.insert(uq_id.to_string(), data);\n        Ok(())\n    }\n\n    fn load(&self, uq_id: &str) -> CrawlerResult<Vec<u8>> {\n        match self.storage.get(uq_id) {\n            Some(data) if !data.is_empty() => {\n                if self.compress {\n                    let mut decoder = flate2::read::GzDecoder::new(&data[..]);\n                    let mut decompressed = Vec::new();\n                    std::io::Read::read_to_end(&mut decoder, &mut decompressed).map_err(CrawlerError::Io)?;\n                    Ok(decompressed)\n                } else {\n                    Ok(data.clone())\n                }\n            }\n            _ => Ok(Vec::new()),\n        }\n    }\n\n    fn delete(&mut self, uq_id: &str) -> CrawlerResult<()> {\n        self.storage.remove(uq_id);\n        Ok(())\n    }\n\n    fn delete_all(&mut self) -> CrawlerResult<()> {\n        self.storage.clear();\n        Ok(())\n    }\n}\n\nimpl std::fmt::Debug for MemoryStorage {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        f.debug_struct(\"MemoryStorage\")\n            .field(\"entries\", &self.storage.len())\n            .field(\"compress\", &self.compress)\n            .finish()\n    }\n}\n"
  },
  {
    "path": "src/result/storage/mod.rs",
    "content": "pub mod file_storage;\r\npub mod memory_storage;\r\n#[allow(clippy::module_inception)]\r\npub mod storage;\r\npub mod storage_type;\r\n"
  },
  {
    "path": "src/result/storage/storage.rs",
    "content": "// SiteOne Crawler - Storage trait\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\nuse crate::error::CrawlerResult;\r\n\r\npub trait Storage: Send + Sync {\r\n    fn save(&mut self, uq_id: &str, content: &[u8]) -> CrawlerResult<()>;\r\n\r\n    fn load(&self, uq_id: &str) -> CrawlerResult<Vec<u8>>;\r\n\r\n    fn delete(&mut self, uq_id: &str) -> CrawlerResult<()>;\r\n\r\n    fn delete_all(&mut self) -> CrawlerResult<()>;\r\n}\r\n"
  },
  {
    "path": "src/result/storage/storage_type.rs",
    "content": "// SiteOne Crawler - StorageType\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\nuse serde::{Deserialize, Serialize};\r\n\r\nuse crate::error::CrawlerError;\r\n\r\n#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]\r\n#[serde(rename_all = \"lowercase\")]\r\npub enum StorageType {\r\n    Memory,\r\n    File,\r\n}\r\n\r\nimpl StorageType {\r\n    pub fn from_text(text: &str) -> Result<Self, CrawlerError> {\r\n        match text.trim().to_lowercase().as_str() {\r\n            \"memory\" => Ok(StorageType::Memory),\r\n            \"file\" => Ok(StorageType::File),\r\n            other => Err(CrawlerError::Parse(format!(\r\n                \"Unknown storage type '{}'. Supported values are: {}\",\r\n                other,\r\n                Self::available_text_types().join(\", \")\r\n            ))),\r\n        }\r\n    }\r\n\r\n    pub fn available_text_types() -> Vec<&'static str> {\r\n        vec![\"memory\", \"file\"]\r\n    }\r\n\r\n    pub fn as_str(&self) -> &'static str {\r\n        match self {\r\n            StorageType::Memory => \"memory\",\r\n            StorageType::File => \"file\",\r\n        }\r\n    }\r\n}\r\n\r\nimpl std::fmt::Display for StorageType {\r\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\r\n        f.write_str(self.as_str())\r\n    }\r\n}\r\n"
  },
  {
    "path": "src/result/visited_url.rs",
    "content": "// SiteOne Crawler - VisitedUrl\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::collections::HashMap;\n\nuse regex::Regex;\nuse serde::{Deserialize, Serialize};\n\nuse crate::types::ContentTypeId;\nuse crate::utils;\n\n// Error status codes (negative values)\npub const ERROR_CONNECTION_FAIL: i32 = -1;\npub const ERROR_TIMEOUT: i32 = -2;\npub const ERROR_SERVER_RESET: i32 = -3;\npub const ERROR_SEND_ERROR: i32 = -4;\npub const ERROR_SKIPPED: i32 = -6;\n\n// Cache type flags (bitwise OR)\npub const CACHE_TYPE_HAS_CACHE_CONTROL: u32 = 1;\npub const CACHE_TYPE_HAS_EXPIRES: u32 = 2;\npub const CACHE_TYPE_HAS_ETAG: u32 = 4;\npub const CACHE_TYPE_HAS_LAST_MODIFIED: u32 = 8;\npub const CACHE_TYPE_HAS_MAX_AGE: u32 = 16;\npub const CACHE_TYPE_HAS_S_MAX_AGE: u32 = 32;\npub const CACHE_TYPE_HAS_STALE_WHILE_REVALIDATE: u32 = 64;\npub const CACHE_TYPE_HAS_STALE_IF_ERROR: u32 = 128;\npub const CACHE_TYPE_HAS_PUBLIC: u32 = 256;\npub const CACHE_TYPE_HAS_PRIVATE: u32 = 512;\npub const CACHE_TYPE_HAS_NO_CACHE: u32 = 1024;\npub const CACHE_TYPE_HAS_NO_STORE: u32 = 2048;\npub const CACHE_TYPE_HAS_MUST_REVALIDATE: u32 = 4096;\npub const CACHE_TYPE_HAS_PROXY_REVALIDATE: u32 = 8192;\npub const CACHE_TYPE_HAS_IMMUTABLE: u32 = 16384;\npub const CACHE_TYPE_NO_CACHE_HEADERS: u32 = 32768;\npub const CACHE_TYPE_NOT_AVAILABLE: u32 = 65536;\n\n// Source attribute constants\npub const SOURCE_INIT_URL: i32 = 5;\npub const SOURCE_A_HREF: i32 = 10;\npub const SOURCE_IMG_SRC: i32 = 20;\npub const SOURCE_IMG_SRCSET: i32 = 21;\npub const SOURCE_INPUT_SRC: i32 = 22;\npub const SOURCE_SOURCE_SRC: i32 = 23;\npub const SOURCE_VIDEO_SRC: i32 = 24;\npub const SOURCE_AUDIO_SRC: i32 = 25;\npub const SOURCE_SCRIPT_SRC: i32 = 30;\npub const SOURCE_INLINE_SCRIPT_SRC: i32 = 40;\npub const SOURCE_LINK_HREF: i32 = 50;\npub const SOURCE_CSS_URL: i32 = 60;\npub const SOURCE_JS_URL: i32 = 70;\npub const SOURCE_REDIRECT: i32 = 80;\npub const SOURCE_SITEMAP: i32 = 90;\n\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct VisitedUrl {\n    /// Unique ID hash of this URL\n    pub uq_id: String,\n\n    /// Unique ID hash of the source URL where this URL was found\n    pub source_uq_id: String,\n\n    /// Source attribute where this URL was found (see SOURCE_* constants)\n    pub source_attr: i32,\n\n    /// Full URL with scheme, domain, path and query\n    pub url: String,\n\n    /// HTTP status code of the request (negative values are errors, see ERROR_* constants)\n    pub status_code: i32,\n\n    /// Request time in seconds\n    pub request_time: f64,\n\n    /// Request time formatted as \"32 ms\" or \"7.4 s\"\n    pub request_time_formatted: String,\n\n    /// Size of the response in bytes\n    pub size: Option<i64>,\n\n    /// Size of the response formatted as \"1.23 MB\"\n    pub size_formatted: Option<String>,\n\n    /// Content-Encoding header value (br, gzip, ...)\n    pub content_encoding: Option<String>,\n\n    /// Content type ID\n    pub content_type: ContentTypeId,\n\n    /// Content type header value (text/html, application/json, ...)\n    pub content_type_header: Option<String>,\n\n    /// Extra data from the response required by --extra-columns\n    pub extras: Option<HashMap<String, String>>,\n\n    /// Is this URL external (not from the same domain as the initial URL)\n    pub is_external: bool,\n\n    /// Is this URL allowed for crawling (based on --allowed-domain-for-crawling)\n    pub is_allowed_for_crawling: bool,\n\n    /// Cache type flags of the response (bitwise OR). See CACHE_TYPE_* constants\n    pub cache_type_flags: u32,\n\n    /// How long the response is allowed to be cached in seconds\n    pub cache_lifetime: Option<i64>,\n}\n\nimpl VisitedUrl {\n    #[allow(clippy::too_many_arguments)]\n    pub fn new(\n        uq_id: String,\n        source_uq_id: String,\n        source_attr: i32,\n        url: String,\n        status_code: i32,\n        request_time: f64,\n        size: Option<i64>,\n        content_type: ContentTypeId,\n        content_type_header: Option<String>,\n        content_encoding: Option<String>,\n        extras: Option<HashMap<String, String>>,\n        is_external: bool,\n        is_allowed_for_crawling: bool,\n        cache_type_flags: u32,\n        cache_lifetime: Option<i64>,\n    ) -> Self {\n        let request_time_formatted = utils::get_formatted_duration(request_time);\n        let size_formatted = size.map(|s| utils::get_formatted_size(s, 0));\n\n        Self {\n            uq_id,\n            source_uq_id,\n            source_attr,\n            url,\n            status_code,\n            request_time,\n            request_time_formatted,\n            size,\n            size_formatted,\n            content_encoding,\n            content_type,\n            content_type_header,\n            extras,\n            is_external,\n            is_allowed_for_crawling,\n            cache_type_flags,\n            cache_lifetime,\n        }\n    }\n\n    pub fn is_https(&self) -> bool {\n        self.url.starts_with(\"https://\")\n    }\n\n    pub fn is_static_file(&self) -> bool {\n        matches!(\n            self.content_type,\n            ContentTypeId::Image\n                | ContentTypeId::Script\n                | ContentTypeId::Stylesheet\n                | ContentTypeId::Video\n                | ContentTypeId::Audio\n                | ContentTypeId::Document\n                | ContentTypeId::Font\n                | ContentTypeId::Json\n                | ContentTypeId::Xml\n        )\n    }\n\n    pub fn is_image(&self) -> bool {\n        self.content_type == ContentTypeId::Image\n    }\n\n    pub fn is_video(&self) -> bool {\n        self.content_type == ContentTypeId::Video\n    }\n\n    pub fn get_source_description(&self, source_url: Option<&str>) -> String {\n        let source = source_url.unwrap_or(\"unknown\");\n        match self.source_attr {\n            SOURCE_INIT_URL => \"Initial URL\".to_string(),\n            SOURCE_A_HREF => format!(\"<a href> on {}\", source),\n            SOURCE_IMG_SRC => format!(\"<img src> on {}\", source),\n            SOURCE_IMG_SRCSET => format!(\"<img srcset> on {}\", source),\n            SOURCE_INPUT_SRC => format!(\"<input src> on {}\", source),\n            SOURCE_SOURCE_SRC => format!(\"<source src> on {}\", source),\n            SOURCE_VIDEO_SRC => format!(\"<video src> on {}\", source),\n            SOURCE_AUDIO_SRC => format!(\"<audio src> on {}\", source),\n            SOURCE_SCRIPT_SRC => format!(\"<script src> on {}\", source),\n            SOURCE_INLINE_SCRIPT_SRC => format!(\"<script> on {}\", source),\n            SOURCE_LINK_HREF => format!(\"<link href> on {}\", source),\n            SOURCE_CSS_URL => format!(\"CSS url() on {}\", source),\n            SOURCE_JS_URL => format!(\"JS url on {}\", source),\n            SOURCE_REDIRECT => format!(\"Redirect from {}\", source),\n            SOURCE_SITEMAP => format!(\"URL in sitemap {}\", source),\n            _ => \"Unknown source\".to_string(),\n        }\n    }\n\n    pub fn get_source_short_name(&self) -> &'static str {\n        match self.source_attr {\n            SOURCE_INIT_URL => \"Initial URL\",\n            SOURCE_A_HREF => \"<a href>\",\n            SOURCE_IMG_SRC => \"<img src>\",\n            SOURCE_IMG_SRCSET => \"<img srcset>\",\n            SOURCE_INPUT_SRC => \"<input src>\",\n            SOURCE_SOURCE_SRC => \"<source src>\",\n            SOURCE_VIDEO_SRC => \"<video src>\",\n            SOURCE_AUDIO_SRC => \"<audio src>\",\n            SOURCE_SCRIPT_SRC => \"<script src>\",\n            SOURCE_INLINE_SCRIPT_SRC => \"inline <script src>\",\n            SOURCE_LINK_HREF => \"<link href>\",\n            SOURCE_CSS_URL => \"css url()\",\n            SOURCE_JS_URL => \"js url\",\n            SOURCE_REDIRECT => \"redirect\",\n            SOURCE_SITEMAP => \"sitemap\",\n            _ => \"unknown\",\n        }\n    }\n\n    pub fn looks_like_static_file_by_url(&self) -> bool {\n        use once_cell::sync::Lazy;\n        static RE_STATIC_FILE: Lazy<Regex> = Lazy::new(|| {\n            Regex::new(\n                r\"(?i)\\.(jpg|jpeg|png|gif|webp|svg|ico|js|css|txt|woff2|woff|ttf|eot|mp4|webm|ogg|mp3|wav|flac|pdf|doc|docx|xls|xlsx|ppt|pptx|zip|rar|gz|bz2|7z|xml|json)\",\n            ).unwrap()\n        });\n        RE_STATIC_FILE.is_match(&self.url)\n    }\n\n    pub fn has_error_status_code(&self) -> bool {\n        self.status_code < 0\n    }\n\n    pub fn get_scheme(&self) -> Option<String> {\n        url::Url::parse(&self.url).ok().map(|u| u.scheme().to_string())\n    }\n\n    pub fn get_host(&self) -> Option<String> {\n        url::Url::parse(&self.url)\n            .ok()\n            .and_then(|u| u.host_str().map(|h| h.to_string()))\n    }\n\n    pub fn get_port(&self) -> u16 {\n        if let Ok(parsed) = url::Url::parse(&self.url) {\n            parsed.port().unwrap_or_else(|| if self.is_https() { 443 } else { 80 })\n        } else if self.is_https() {\n            443\n        } else {\n            80\n        }\n    }\n\n    pub fn get_cache_type_label(&self) -> String {\n        let mut labels = Vec::new();\n\n        // Cache-Control or Expires (if Cache-Control is not defined)\n        if self.cache_type_flags & CACHE_TYPE_HAS_CACHE_CONTROL != 0 {\n            labels.push(\"Cache-Control\");\n        } else if self.cache_type_flags & CACHE_TYPE_HAS_EXPIRES != 0 {\n            labels.push(\"Expires\");\n        }\n\n        // ETag and Last-Modified\n        if self.cache_type_flags & CACHE_TYPE_HAS_ETAG != 0 {\n            labels.push(\"ETag\");\n        }\n        if self.cache_type_flags & CACHE_TYPE_HAS_LAST_MODIFIED != 0 {\n            labels.push(\"Last-Modified\");\n        }\n\n        if labels.is_empty() {\n            \"No cache headers\".to_string()\n        } else {\n            labels.join(\" + \")\n        }\n    }\n}\n"
  },
  {
    "path": "src/scoring/ci_gate.rs",
    "content": "// SiteOne Crawler - CI/CD Quality Gate\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Evaluates crawler results against configurable thresholds.\n// Returns exit code 10 when any check fails.\n\nuse serde::Serialize;\n\nuse crate::components::summary::item_status::ItemStatus;\nuse crate::components::summary::summary::Summary;\nuse crate::options::core_options::CoreOptions;\nuse crate::output::output::BasicStats;\nuse crate::scoring::quality_score::QualityScores;\nuse crate::types::ContentTypeId;\n\n#[derive(Debug, Clone, Serialize)]\n#[serde(rename_all = \"camelCase\")]\npub struct CiCheck {\n    pub metric: String,\n    pub operator: String,\n    pub threshold: f64,\n    pub actual: f64,\n    pub passed: bool,\n}\n\n#[derive(Debug, Clone, Serialize)]\n#[serde(rename_all = \"camelCase\")]\npub struct CiGateResult {\n    pub passed: bool,\n    pub exit_code: i32,\n    pub checks: Vec<CiCheck>,\n}\n\npub fn evaluate(options: &CoreOptions, scores: &QualityScores, stats: &BasicStats, summary: &Summary) -> CiGateResult {\n    let mut checks = Vec::new();\n\n    // If no pages were successfully crawled, fail immediately.\n    // URLs with negative status codes (-1 connection error, -2 timeout, etc.) don't count.\n    let has_successful_response = stats.count_by_status.keys().any(|&code| code > 0);\n    if stats.total_urls == 0 || !has_successful_response {\n        checks.push(CiCheck {\n            metric: \"Pages crawled\".to_string(),\n            operator: \">\".to_string(),\n            threshold: 0.0,\n            actual: 0.0,\n            passed: false,\n        });\n        return CiGateResult {\n            passed: false,\n            exit_code: 10,\n            checks,\n        };\n    }\n\n    // Overall score\n    checks.push(check_min(\"Overall score\", scores.overall.score, options.ci_min_score));\n\n    // Category scores\n    if let Some(threshold) = options.ci_min_performance {\n        let actual = find_category_score(scores, \"performance\");\n        checks.push(check_min(\"Performance score\", actual, threshold));\n    }\n    if let Some(threshold) = options.ci_min_seo {\n        let actual = find_category_score(scores, \"seo\");\n        checks.push(check_min(\"SEO score\", actual, threshold));\n    }\n    if let Some(threshold) = options.ci_min_security {\n        let actual = find_category_score(scores, \"security\");\n        checks.push(check_min(\"Security score\", actual, threshold));\n    }\n    if let Some(threshold) = options.ci_min_accessibility {\n        let actual = find_category_score(scores, \"accessibility\");\n        checks.push(check_min(\"Accessibility score\", actual, threshold));\n    }\n    if let Some(threshold) = options.ci_min_best_practices {\n        let actual = find_category_score(scores, \"best-practices\");\n        checks.push(check_min(\"Best Practices score\", actual, threshold));\n    }\n\n    // 404 errors\n    let count_404 = stats.count_by_status.get(&404).copied().unwrap_or(0) as f64;\n    checks.push(check_max(\"404 errors\", count_404, options.ci_max_404 as f64));\n\n    // 5xx errors\n    let count_5xx: usize = stats\n        .count_by_status\n        .iter()\n        .filter(|&(&code, _)| (500..600).contains(&code))\n        .map(|(_, &count)| count)\n        .sum();\n    checks.push(check_max(\"5xx errors\", count_5xx as f64, options.ci_max_5xx as f64));\n\n    // Critical findings\n    let criticals = summary.get_count_by_item_status(ItemStatus::Critical) as f64;\n    checks.push(check_max(\n        \"Critical findings\",\n        criticals,\n        options.ci_max_criticals as f64,\n    ));\n\n    // Warning findings (optional)\n    if let Some(max_warnings) = options.ci_max_warnings {\n        let warnings = summary.get_count_by_item_status(ItemStatus::Warning) as f64;\n        checks.push(check_max(\"Warning findings\", warnings, max_warnings as f64));\n    }\n\n    // Average response time (optional)\n    if let Some(max_avg) = options.ci_max_avg_response {\n        checks.push(check_max(\n            \"Avg response time (s)\",\n            stats.total_requests_times_avg,\n            max_avg,\n        ));\n    }\n\n    // Minimum content type counts\n    let pages = count_content_types(stats, &[ContentTypeId::Html]);\n    checks.push(check_min(\"HTML pages\", pages as f64, options.ci_min_pages as f64));\n\n    let assets = count_content_types(\n        stats,\n        &[\n            ContentTypeId::Script,\n            ContentTypeId::Stylesheet,\n            ContentTypeId::Image,\n            ContentTypeId::Font,\n        ],\n    );\n    checks.push(check_min(\n        \"Assets (JS/CSS/img/font)\",\n        assets as f64,\n        options.ci_min_assets as f64,\n    ));\n\n    if options.ci_min_documents > 0 {\n        let documents = count_content_types(stats, &[ContentTypeId::Document]);\n        checks.push(check_min(\n            \"Documents\",\n            documents as f64,\n            options.ci_min_documents as f64,\n        ));\n    }\n\n    let passed = checks.iter().all(|c| c.passed);\n    CiGateResult {\n        passed,\n        exit_code: if passed { 0 } else { 10 },\n        checks,\n    }\n}\n\nfn check_min(metric: &str, actual: f64, threshold: f64) -> CiCheck {\n    CiCheck {\n        metric: metric.to_string(),\n        operator: \">=\".to_string(),\n        threshold,\n        actual,\n        passed: actual >= threshold,\n    }\n}\n\nfn check_max(metric: &str, actual: f64, threshold: f64) -> CiCheck {\n    CiCheck {\n        metric: metric.to_string(),\n        operator: \"<=\".to_string(),\n        threshold,\n        actual,\n        passed: actual <= threshold,\n    }\n}\n\nfn count_content_types(stats: &BasicStats, types: &[ContentTypeId]) -> usize {\n    types\n        .iter()\n        .map(|t| stats.count_by_content_type.get(&(*t as i32)).copied().unwrap_or(0))\n        .sum()\n}\n\nfn find_category_score(scores: &QualityScores, code: &str) -> f64 {\n    scores\n        .categories\n        .iter()\n        .find(|c| c.code == code)\n        .map(|c| c.score)\n        .unwrap_or(0.0)\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n    use crate::components::summary::item::Item;\n    use crate::scoring::quality_score::{CategoryScore, score_label};\n    use std::collections::BTreeMap;\n\n    fn make_options() -> CoreOptions {\n        CoreOptions {\n            url: \"https://test.com\".to_string(),\n            single_page: false,\n            max_depth: 0,\n            device: crate::types::DeviceType::Desktop,\n            user_agent: None,\n            timeout: 5,\n            proxy: None,\n            http_auth: None,\n            accept_invalid_certs: false,\n            timezone: None,\n            show_version_only: false,\n            show_help_only: false,\n            output_type: crate::types::OutputType::Text,\n            url_column_size: None,\n            show_inline_criticals: false,\n            show_inline_warnings: false,\n            rows_limit: 200,\n            extra_columns: Vec::new(),\n            extra_columns_names_only: Vec::new(),\n            show_scheme_and_host: false,\n            do_not_truncate_url: false,\n            hide_progress_bar: false,\n            hide_columns: Vec::new(),\n            no_color: false,\n            force_color: false,\n            console_width: None,\n            disable_all_assets: false,\n            disable_javascript: false,\n            disable_styles: false,\n            disable_fonts: false,\n            disable_images: false,\n            disable_files: false,\n            remove_all_anchor_listeners: false,\n            workers: 3,\n            max_reqs_per_sec: 10.0,\n            memory_limit: \"2048M\".to_string(),\n            resolve: Vec::new(),\n            websocket_server: None,\n            ignore_robots_txt: false,\n            allowed_domains_for_external_files: Vec::new(),\n            allowed_domains_for_crawling: Vec::new(),\n            single_foreign_page: false,\n            result_storage: crate::options::core_options::StorageType::Memory,\n            result_storage_dir: \"tmp/result-storage\".to_string(),\n            result_storage_compression: false,\n            accept_encoding: \"gzip, deflate, br\".to_string(),\n            max_queue_length: 9000,\n            max_visited_urls: 10000,\n            max_url_length: 2083,\n            max_skipped_urls: 10000,\n            max_non200_responses_per_basename: 5,\n            include_regex: Vec::new(),\n            ignore_regex: Vec::new(),\n            regex_filtering_only_for_pages: false,\n            analyzer_filter_regex: None,\n            add_random_query_params: false,\n            remove_query_params: false,\n            keep_query_params: Vec::new(),\n            transform_url: Vec::new(),\n            force_relative_urls: false,\n            output_html_report: None,\n            html_report_options: None,\n            output_json_file: None,\n            output_text_file: None,\n            add_host_to_output_file: false,\n            add_timestamp_to_output_file: false,\n            sitemap_xml_file: None,\n            sitemap_txt_file: None,\n            sitemap_base_priority: 0.5,\n            sitemap_priority_increase: 0.1,\n            offline_export_dir: None,\n            offline_export_store_only_url_regex: Vec::new(),\n            offline_export_remove_unwanted_code: true,\n            offline_export_no_auto_redirect_html: false,\n            offline_export_preserve_url_structure: false,\n            offline_export_preserve_urls: false,\n            replace_content: Vec::new(),\n            replace_query_string: Vec::new(),\n            offline_export_lowercase: false,\n            ignore_store_file_error: false,\n            disable_astro_inline_modules: false,\n            markdown_export_dir: None,\n            markdown_export_single_file: None,\n            markdown_move_content_before_h1_to_end: false,\n            markdown_disable_images: false,\n            markdown_disable_files: false,\n            markdown_remove_links_and_images_from_single_file: false,\n            markdown_exclude_selector: Vec::new(),\n            markdown_replace_content: Vec::new(),\n            markdown_replace_query_string: Vec::new(),\n            markdown_export_store_only_url_regex: Vec::new(),\n            markdown_ignore_store_file_error: false,\n            mail_to: Vec::new(),\n            mail_from: \"test@test.com\".to_string(),\n            mail_from_name: \"Test\".to_string(),\n            mail_subject_template: \"Test\".to_string(),\n            mail_smtp_host: \"localhost\".to_string(),\n            mail_smtp_port: 25,\n            mail_smtp_user: None,\n            mail_smtp_pass: None,\n            upload_enabled: false,\n            upload_to: String::new(),\n            upload_retention: \"30d\".to_string(),\n            upload_password: None,\n            upload_timeout: 3600,\n            http_cache_dir: None,\n            http_cache_compression: false,\n            http_cache_ttl: None,\n            debug: false,\n            debug_log_file: None,\n            debug_url_regex: Vec::new(),\n            fastest_top_limit: 20,\n            fastest_max_time: 1.0,\n            max_heading_level: 3,\n            slowest_top_limit: 20,\n            slowest_min_time: 0.01,\n            slowest_max_time: 3.0,\n            serve_markdown_dir: None,\n            serve_offline_dir: None,\n            serve_port: 8321,\n            serve_bind_address: \"127.0.0.1\".to_string(),\n            html_to_markdown_file: None,\n            html_to_markdown_output: None,\n            ci: true,\n            ci_min_score: 5.0,\n            ci_min_performance: Some(5.0),\n            ci_min_seo: Some(5.0),\n            ci_min_security: Some(5.0),\n            ci_min_accessibility: Some(3.0),\n            ci_min_best_practices: Some(5.0),\n            ci_max_404: 0,\n            ci_max_5xx: 0,\n            ci_max_criticals: 0,\n            ci_max_warnings: None,\n            ci_max_avg_response: None,\n            ci_min_pages: 0,\n            ci_min_assets: 0,\n            ci_min_documents: 0,\n        }\n    }\n\n    fn make_scores(overall: f64) -> QualityScores {\n        let cats = vec![\n            (\"Performance\", \"performance\", 0.20),\n            (\"SEO\", \"seo\", 0.20),\n            (\"Security\", \"security\", 0.25),\n            (\"Accessibility\", \"accessibility\", 0.20),\n            (\"Best Practices\", \"best-practices\", 0.15),\n        ];\n        QualityScores {\n            overall: CategoryScore {\n                name: \"Overall\".to_string(),\n                code: \"overall\".to_string(),\n                score: overall,\n                label: score_label(overall).to_string(),\n                weight: 1.0,\n                deductions: Vec::new(),\n            },\n            categories: cats\n                .into_iter()\n                .map(|(name, code, weight)| CategoryScore {\n                    name: name.to_string(),\n                    code: code.to_string(),\n                    score: overall,\n                    label: score_label(overall).to_string(),\n                    weight,\n                    deductions: Vec::new(),\n                })\n                .collect(),\n        }\n    }\n\n    fn make_stats(total_urls: usize) -> BasicStats {\n        let mut count_by_status = BTreeMap::new();\n        if total_urls > 0 {\n            count_by_status.insert(200, total_urls);\n        }\n        BasicStats {\n            total_urls,\n            count_by_status,\n            ..Default::default()\n        }\n    }\n\n    fn make_stats_with_status(total_urls: usize, status_counts: &[(i32, usize)]) -> BasicStats {\n        let mut count_by_status = BTreeMap::new();\n        for &(code, count) in status_counts {\n            count_by_status.insert(code, count);\n        }\n        BasicStats {\n            total_urls,\n            count_by_status,\n            ..Default::default()\n        }\n    }\n\n    #[test]\n    fn all_checks_pass() {\n        let options = make_options();\n        let scores = make_scores(8.0);\n        let stats = make_stats(100);\n        let summary = Summary::new();\n        let result = evaluate(&options, &scores, &stats, &summary);\n        assert!(result.passed);\n        assert_eq!(result.exit_code, 0);\n    }\n\n    #[test]\n    fn fail_low_overall_score() {\n        let options = make_options();\n        let scores = make_scores(3.0);\n        let stats = make_stats(100);\n        let summary = Summary::new();\n        let result = evaluate(&options, &scores, &stats, &summary);\n        assert!(!result.passed);\n        assert_eq!(result.exit_code, 10);\n    }\n\n    #[test]\n    fn fail_404_count() {\n        let options = make_options();\n        let scores = make_scores(8.0);\n        let stats = make_stats_with_status(100, &[(404, 3)]);\n        let summary = Summary::new();\n        let result = evaluate(&options, &scores, &stats, &summary);\n        assert!(!result.passed);\n    }\n\n    #[test]\n    fn fail_5xx_count() {\n        let options = make_options();\n        let scores = make_scores(8.0);\n        let stats = make_stats_with_status(100, &[(500, 2)]);\n        let summary = Summary::new();\n        let result = evaluate(&options, &scores, &stats, &summary);\n        assert!(!result.passed);\n    }\n\n    #[test]\n    fn fail_criticals() {\n        let options = make_options();\n        let scores = make_scores(8.0);\n        let stats = make_stats(100);\n        let mut summary = Summary::new();\n        summary.add_item(Item::new(\n            \"test\".to_string(),\n            \"Test critical\".to_string(),\n            ItemStatus::Critical,\n        ));\n        let result = evaluate(&options, &scores, &stats, &summary);\n        assert!(!result.passed);\n    }\n\n    #[test]\n    fn optional_warnings() {\n        let mut options = make_options();\n        options.ci_max_warnings = Some(0);\n        let scores = make_scores(8.0);\n        let stats = make_stats(100);\n        let mut summary = Summary::new();\n        summary.add_item(Item::new(\n            \"test\".to_string(),\n            \"Test warning\".to_string(),\n            ItemStatus::Warning,\n        ));\n        let result = evaluate(&options, &scores, &stats, &summary);\n        assert!(!result.passed);\n    }\n\n    #[test]\n    fn optional_avg_response() {\n        let mut options = make_options();\n        options.ci_max_avg_response = Some(0.5);\n        let scores = make_scores(8.0);\n        let mut stats = make_stats(100);\n        stats.total_requests_times_avg = 1.0;\n        let summary = Summary::new();\n        let result = evaluate(&options, &scores, &stats, &summary);\n        assert!(!result.passed);\n    }\n\n    #[test]\n    fn zero_urls_immediate_fail() {\n        let options = make_options();\n        let scores = make_scores(10.0);\n        let stats = make_stats(0);\n        let summary = Summary::new();\n        let result = evaluate(&options, &scores, &stats, &summary);\n        assert!(!result.passed);\n        assert_eq!(result.exit_code, 10);\n    }\n\n    #[test]\n    fn only_negative_status_codes_immediate_fail() {\n        let options = make_options();\n        let scores = make_scores(10.0);\n        // 1 URL visited but only with negative status (e.g. timeout = -2)\n        let stats = make_stats_with_status(1, &[(-2, 1)]);\n        let summary = Summary::new();\n        let result = evaluate(&options, &scores, &stats, &summary);\n        assert!(!result.passed);\n        assert_eq!(result.exit_code, 10);\n    }\n\n    #[test]\n    fn category_threshold() {\n        let mut options = make_options();\n        options.ci_min_performance = Some(8.0);\n        let mut scores = make_scores(9.0);\n        // Set performance score to 6.0 while keeping overall high\n        scores.categories[0].score = 6.0;\n        let stats = make_stats(100);\n        let summary = Summary::new();\n        let result = evaluate(&options, &scores, &stats, &summary);\n        assert!(!result.passed);\n    }\n\n    #[test]\n    fn fail_min_pages() {\n        let mut options = make_options();\n        options.ci_min_pages = 5;\n        let scores = make_scores(8.0);\n        let mut stats = make_stats(100);\n        // Only 3 HTML pages\n        stats.count_by_content_type.insert(ContentTypeId::Html as i32, 3);\n        let summary = Summary::new();\n        let result = evaluate(&options, &scores, &stats, &summary);\n        assert!(!result.passed);\n        assert!(result.checks.iter().any(|c| c.metric == \"HTML pages\" && !c.passed));\n    }\n\n    #[test]\n    fn pass_min_pages() {\n        let mut options = make_options();\n        options.ci_min_pages = 5;\n        let scores = make_scores(8.0);\n        let mut stats = make_stats(100);\n        stats.count_by_content_type.insert(ContentTypeId::Html as i32, 10);\n        let summary = Summary::new();\n        let result = evaluate(&options, &scores, &stats, &summary);\n        assert!(result.checks.iter().any(|c| c.metric == \"HTML pages\" && c.passed));\n    }\n\n    #[test]\n    fn fail_min_assets() {\n        let mut options = make_options();\n        options.ci_min_assets = 5;\n        let scores = make_scores(8.0);\n        let mut stats = make_stats(100);\n        stats.count_by_content_type.insert(ContentTypeId::Script as i32, 1);\n        stats.count_by_content_type.insert(ContentTypeId::Stylesheet as i32, 1);\n        // Total assets = 2, below threshold 5\n        let summary = Summary::new();\n        let result = evaluate(&options, &scores, &stats, &summary);\n        assert!(!result.passed);\n        assert!(result.checks.iter().any(|c| c.metric.contains(\"Assets\") && !c.passed));\n    }\n\n    #[test]\n    fn documents_check_skipped_when_zero() {\n        let options = make_options(); // ci_min_documents = 0\n        let scores = make_scores(8.0);\n        let stats = make_stats(100);\n        let summary = Summary::new();\n        let result = evaluate(&options, &scores, &stats, &summary);\n        // Documents check should not appear at all\n        assert!(!result.checks.iter().any(|c| c.metric == \"Documents\"));\n    }\n\n    #[test]\n    fn fail_min_documents() {\n        let mut options = make_options();\n        options.ci_min_documents = 2;\n        let scores = make_scores(8.0);\n        let mut stats = make_stats(100);\n        stats.count_by_content_type.insert(ContentTypeId::Document as i32, 1);\n        let summary = Summary::new();\n        let result = evaluate(&options, &scores, &stats, &summary);\n        assert!(!result.passed);\n        assert!(result.checks.iter().any(|c| c.metric == \"Documents\" && !c.passed));\n    }\n}\n"
  },
  {
    "path": "src/scoring/mod.rs",
    "content": "// SiteOne Crawler - Quality Scoring module\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\npub mod ci_gate;\r\npub mod quality_score;\r\npub mod scorer;\r\n"
  },
  {
    "path": "src/scoring/quality_score.rs",
    "content": "// SiteOne Crawler - Quality Score data model\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\nuse serde::Serialize;\r\n\r\n#[derive(Debug, Clone, Serialize)]\r\n#[serde(rename_all = \"camelCase\")]\r\npub struct QualityScores {\r\n    pub overall: CategoryScore,\r\n    pub categories: Vec<CategoryScore>,\r\n}\r\n\r\n#[derive(Debug, Clone, Serialize)]\r\n#[serde(rename_all = \"camelCase\")]\r\npub struct CategoryScore {\r\n    pub name: String,\r\n    pub code: String,\r\n    pub score: f64,\r\n    pub label: String,\r\n    pub weight: f64,\r\n    pub deductions: Vec<Deduction>,\r\n}\r\n\r\n#[derive(Debug, Clone, Serialize)]\r\n#[serde(rename_all = \"camelCase\")]\r\npub struct Deduction {\r\n    pub reason: String,\r\n    pub points: f64,\r\n}\r\n\r\nimpl CategoryScore {\r\n    pub fn color_hex(&self) -> &'static str {\r\n        match self.score {\r\n            s if s >= 9.0 => \"#22c55e\",\r\n            s if s >= 7.0 => \"#3b82f6\",\r\n            s if s >= 5.0 => \"#eab308\",\r\n            s if s >= 3.0 => \"#a855f7\",\r\n            _ => \"#ef4444\",\r\n        }\r\n    }\r\n\r\n    pub fn console_color(&self) -> &'static str {\r\n        match self.score {\r\n            s if s >= 9.0 => \"green\",\r\n            s if s >= 7.0 => \"blue\",\r\n            s if s >= 5.0 => \"yellow\",\r\n            s if s >= 3.0 => \"magenta\",\r\n            _ => \"red\",\r\n        }\r\n    }\r\n}\r\n\r\npub fn score_label(score: f64) -> &'static str {\r\n    match score {\r\n        s if s >= 9.0 => \"Excellent\",\r\n        s if s >= 7.0 => \"Good\",\r\n        s if s >= 5.0 => \"Fair\",\r\n        s if s >= 3.0 => \"Poor\",\r\n        _ => \"Critical\",\r\n    }\r\n}\r\n\r\n#[cfg(test)]\r\nmod tests {\r\n    use super::*;\r\n\r\n    fn make_score(score: f64) -> CategoryScore {\r\n        CategoryScore {\r\n            name: \"Test\".to_string(),\r\n            code: \"test\".to_string(),\r\n            score,\r\n            label: score_label(score).to_string(),\r\n            weight: 1.0,\r\n            deductions: Vec::new(),\r\n        }\r\n    }\r\n\r\n    #[test]\r\n    fn score_label_values() {\r\n        assert_eq!(score_label(0.0), \"Critical\");\r\n        assert_eq!(score_label(3.0), \"Poor\");\r\n        assert_eq!(score_label(5.0), \"Fair\");\r\n        assert_eq!(score_label(7.0), \"Good\");\r\n        assert_eq!(score_label(9.0), \"Excellent\");\r\n    }\r\n\r\n    #[test]\r\n    fn score_label_boundaries() {\r\n        assert_eq!(score_label(2.99), \"Critical\");\r\n        assert_eq!(score_label(4.99), \"Poor\");\r\n        assert_eq!(score_label(6.99), \"Fair\");\r\n        assert_eq!(score_label(8.99), \"Good\");\r\n    }\r\n\r\n    #[test]\r\n    fn color_hex_green_for_excellent() {\r\n        assert_eq!(make_score(9.5).color_hex(), \"#22c55e\");\r\n    }\r\n\r\n    #[test]\r\n    fn color_hex_purple_for_poor() {\r\n        assert_eq!(make_score(4.0).color_hex(), \"#a855f7\");\r\n    }\r\n\r\n    #[test]\r\n    fn color_hex_red_for_critical() {\r\n        assert_eq!(make_score(1.0).color_hex(), \"#ef4444\");\r\n    }\r\n\r\n    #[test]\r\n    fn color_hex_boundaries() {\r\n        assert_eq!(make_score(8.99).color_hex(), \"#3b82f6\");\r\n        assert_eq!(make_score(6.99).color_hex(), \"#eab308\");\r\n    }\r\n\r\n    #[test]\r\n    fn console_color_values() {\r\n        assert_eq!(make_score(9.5).console_color(), \"green\");\r\n        assert_eq!(make_score(7.5).console_color(), \"blue\");\r\n        assert_eq!(make_score(5.5).console_color(), \"yellow\");\r\n        assert_eq!(make_score(3.5).console_color(), \"magenta\");\r\n        assert_eq!(make_score(1.0).console_color(), \"red\");\r\n    }\r\n}\r\n"
  },
  {
    "path": "src/scoring/scorer.rs",
    "content": "// SiteOne Crawler - Quality Scorer\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Computes quality scores (0.0-10.0) across 5 categories based on\n// data already collected by existing analyzers.\n\nuse regex::Regex;\n\nuse crate::components::summary::item_status::ItemStatus;\nuse crate::components::summary::summary::Summary;\nuse crate::output::output::BasicStats;\nuse crate::scoring::quality_score::{CategoryScore, Deduction, QualityScores, score_label};\n\n/// Maximum total deduction from \"per URL\" rules within a single category.\nconst MAX_PER_URL_DEDUCTION: f64 = 5.0;\n\n/// Maximum deduction from a single per-URL deduction type (prevents one issue from eating entire budget).\nconst MAX_PER_TYPE_DEDUCTION: f64 = 2.5;\n\n/// Calculate quality scores from analysis results.\npub fn calculate_scores(summary: &Summary, basic_stats: &BasicStats) -> QualityScores {\n    let categories = vec![\n        score_performance(summary, basic_stats),\n        score_seo(summary, basic_stats),\n        score_security(summary),\n        score_accessibility(summary),\n        score_best_practices(summary),\n    ];\n\n    let overall_score = categories.iter().map(|c| c.score * c.weight).sum::<f64>();\n    let overall_score = round1(overall_score);\n\n    let overall = CategoryScore {\n        name: \"Overall\".to_string(),\n        code: \"overall\".to_string(),\n        score: overall_score,\n        label: score_label(overall_score).to_string(),\n        weight: 1.0,\n        deductions: Vec::new(),\n    };\n\n    QualityScores { overall, categories }\n}\n\n// ---- Category scorers ----\n\nfn score_performance(summary: &Summary, stats: &BasicStats) -> CategoryScore {\n    let mut deductions = Vec::new();\n    let mut per_url_total = 0.0;\n\n    // Average response time\n    if stats.total_requests_times_avg > 1.0 {\n        deductions.push(Deduction {\n            reason: format!(\n                \"Average response time {:.0}ms > 1000ms\",\n                stats.total_requests_times_avg * 1000.0\n            ),\n            points: 1.0,\n        });\n    } else if stats.total_requests_times_avg > 0.5 {\n        deductions.push(Deduction {\n            reason: format!(\n                \"Average response time {:.0}ms > 500ms\",\n                stats.total_requests_times_avg * 1000.0\n            ),\n            points: 0.5,\n        });\n    }\n\n    // Slowest single response (from BasicStats — covers all resource types)\n    if stats.total_requests_times_max > 5.0 {\n        deductions.push(Deduction {\n            reason: format!(\"Slowest response {:.1}s > 5.0s\", stats.total_requests_times_max),\n            points: 1.0,\n        });\n    } else if stats.total_requests_times_max > 3.0 {\n        deductions.push(Deduction {\n            reason: format!(\"Slowest response {:.1}s > 3.0s\", stats.total_requests_times_max),\n            points: 0.5,\n        });\n    }\n\n    // Slow URLs count (from slowest analyzer summary)\n    if is_not_ok(summary, \"slowUrls\") {\n        let count = get_item_count(summary, \"slowUrls\").unwrap_or(1);\n        if count > 0 {\n            let pts = (count as f64 * 0.3).min(MAX_PER_URL_DEDUCTION);\n            per_url_total += pts;\n            deductions.push(Deduction {\n                reason: format!(\"{} slow URL(s) detected\", count),\n                points: round1(pts),\n            });\n        }\n    }\n\n    build_category(\"Performance\", \"performance\", 0.20, deductions, per_url_total)\n}\n\nfn score_seo(summary: &Summary, stats: &BasicStats) -> CategoryScore {\n    let mut deductions = Vec::new();\n    let mut per_url_total = 0.0;\n\n    // Missing H1\n    per_url_deduct(\n        summary,\n        \"pages-without-h1\",\n        0.3,\n        \"page(s) without <h1>\",\n        &mut deductions,\n        &mut per_url_total,\n    );\n\n    // Multiple H1\n    per_url_deduct(\n        summary,\n        \"pages-with-multiple-h1\",\n        0.2,\n        \"page(s) with multiple <h1>\",\n        &mut deductions,\n        &mut per_url_total,\n    );\n\n    // Title uniqueness issues\n    if is_not_ok(summary, \"title-uniqueness\") {\n        let count = get_item_count_for_code(summary, \"title-uniqueness\").unwrap_or(1);\n        let pts = (count as f64 * 0.3).min(MAX_PER_TYPE_DEDUCTION);\n        let remaining = MAX_PER_URL_DEDUCTION - per_url_total;\n        let pts = pts.min(remaining).max(0.0);\n        per_url_total += pts;\n        deductions.push(Deduction {\n            reason: \"Non-unique page titles detected\".to_string(),\n            points: round1(pts),\n        });\n    }\n\n    // Meta description uniqueness\n    if is_not_ok(summary, \"meta-description-uniqueness\") {\n        let count = get_item_count_for_code(summary, \"meta-description-uniqueness\").unwrap_or(1);\n        let pts = (count as f64 * 0.3).min(MAX_PER_TYPE_DEDUCTION);\n        let remaining = MAX_PER_URL_DEDUCTION - per_url_total;\n        let pts = pts.min(remaining).max(0.0);\n        per_url_total += pts;\n        deductions.push(Deduction {\n            reason: \"Non-unique meta descriptions detected\".to_string(),\n            points: round1(pts),\n        });\n    }\n\n    // 404 pages — use status code count from BasicStats for accuracy\n    let count_404 = stats.count_by_status.get(&404).copied().unwrap_or(0);\n    if count_404 > 0 {\n        let pts = match count_404 {\n            1 => 0.5,\n            2..=5 => 1.0,\n            6..=20 => 1.5,\n            _ => 2.0,\n        };\n        deductions.push(Deduction {\n            reason: format!(\"{} page(s) returned 404\", count_404),\n            points: pts,\n        });\n    }\n\n    // Redirects\n    if is_not_ok(summary, \"redirects\") {\n        let count = get_item_count(summary, \"redirects\").unwrap_or(1);\n        if count > 0 {\n            let pts = (count as f64 * 0.15).min(MAX_PER_TYPE_DEDUCTION);\n            let remaining = MAX_PER_URL_DEDUCTION - per_url_total;\n            let pts = pts.min(remaining).max(0.0);\n            per_url_total += pts;\n            deductions.push(Deduction {\n                reason: format!(\"{} redirect(s) found\", count),\n                points: round1(pts),\n            });\n        }\n    }\n\n    build_category(\"SEO\", \"seo\", 0.20, deductions, per_url_total)\n}\n\nfn score_security(summary: &Summary) -> CategoryScore {\n    let mut deductions = Vec::new();\n\n    // SSL certificate issues\n    for code in &[\n        \"ssl-certificate-connect\",\n        \"ssl-certificate-missing\",\n        \"ssl-certificate-parse\",\n        \"ssl-certificate-valid\",\n    ] {\n        if is_critical(summary, code) {\n            deductions.push(Deduction {\n                reason: \"SSL/TLS certificate issue\".to_string(),\n                points: 3.0,\n            });\n            break;\n        }\n    }\n\n    // SSL certificate validity period\n    if is_critical(summary, \"ssl-certificate-valid-to\") {\n        deductions.push(Deduction {\n            reason: \"SSL certificate expired or expiring soon\".to_string(),\n            points: 0.5,\n        });\n    }\n\n    // Unsafe SSL protocols\n    if is_critical(summary, \"ssl-protocol-unsafe\") || is_warning(summary, \"ssl-protocol-unsafe\") {\n        deductions.push(Deduction {\n            reason: \"Insecure TLS protocol versions supported\".to_string(),\n            points: 1.0,\n        });\n    }\n\n    // Security headers — graduated scale based on affected page count\n    if is_critical(summary, \"security\") {\n        let count = get_item_count(summary, \"security\").unwrap_or(1);\n        let pts = match count {\n            0 => 0.0,\n            1 => 1.0,\n            2 => 1.5,\n            3 => 2.0,\n            4..=10 => 2.5,\n            11..=50 => 3.0,\n            _ => 3.5,\n        };\n        deductions.push(Deduction {\n            reason: format!(\"{} page(s) with critical security findings\", count),\n            points: pts,\n        });\n    } else if is_warning_or_above(summary, \"security\") {\n        let count = get_item_count(summary, \"security\").unwrap_or(1);\n        let pts = match count {\n            0 => 0.0,\n            1 => 0.5,\n            2 => 0.75,\n            3 => 1.0,\n            4..=10 => 1.25,\n            _ => 1.5,\n        };\n        deductions.push(Deduction {\n            reason: format!(\"{} page(s) with security warnings\", count),\n            points: pts,\n        });\n    }\n\n    build_category(\"Security\", \"security\", 0.25, deductions, 0.0)\n}\n\nfn score_accessibility(summary: &Summary) -> CategoryScore {\n    let mut deductions = Vec::new();\n    let mut per_url_total = 0.0;\n\n    // Missing lang attribute (flat deduction — affects entire site)\n    if is_not_ok(summary, \"pages-without-lang\") {\n        let count = get_item_count(summary, \"pages-without-lang\").unwrap_or(1);\n        let pts = if count > 0 { 1.5 } else { 0.0 };\n        deductions.push(Deduction {\n            reason: format!(\"{} page(s) without lang attribute\", count),\n            points: pts,\n        });\n    }\n\n    // Missing image alt attributes\n    per_url_deduct(\n        summary,\n        \"pages-without-image-alt-attributes\",\n        0.5,\n        \"page(s) without image alt attributes\",\n        &mut deductions,\n        &mut per_url_total,\n    );\n\n    // Missing form labels\n    per_url_deduct(\n        summary,\n        \"pages-without-form-labels\",\n        0.5,\n        \"page(s) without form labels\",\n        &mut deductions,\n        &mut per_url_total,\n    );\n\n    // Skipped heading levels (accessibility concern, not SEO)\n    per_url_deduct(\n        summary,\n        \"pages-with-skipped-heading-levels\",\n        0.1,\n        \"page(s) with skipped heading levels\",\n        &mut deductions,\n        &mut per_url_total,\n    );\n\n    // Missing ARIA labels\n    per_url_deduct(\n        summary,\n        \"pages-without-aria-labels\",\n        0.3,\n        \"page(s) without aria labels\",\n        &mut deductions,\n        &mut per_url_total,\n    );\n\n    // Missing roles (lower weight — semantic HTML provides implicit roles)\n    per_url_deduct(\n        summary,\n        \"pages-without-roles\",\n        0.15,\n        \"page(s) without role attributes\",\n        &mut deductions,\n        &mut per_url_total,\n    );\n\n    // Invalid HTML\n    per_url_deduct(\n        summary,\n        \"pages-with-invalid-html\",\n        0.3,\n        \"page(s) with invalid HTML\",\n        &mut deductions,\n        &mut per_url_total,\n    );\n\n    build_category(\"Accessibility\", \"accessibility\", 0.20, deductions, per_url_total)\n}\n\nfn score_best_practices(summary: &Summary) -> CategoryScore {\n    let mut deductions = Vec::new();\n    let mut per_url_total = 0.0;\n\n    // Duplicate SVGs\n    per_url_deduct(\n        summary,\n        \"pages-with-duplicated-svgs\",\n        0.3,\n        \"page(s) with duplicated inline SVGs\",\n        &mut deductions,\n        &mut per_url_total,\n    );\n\n    // Large SVGs\n    per_url_deduct(\n        summary,\n        \"pages-with-large-svgs\",\n        0.2,\n        \"page(s) with large inline SVGs\",\n        &mut deductions,\n        &mut per_url_total,\n    );\n\n    // Invalid SVGs\n    per_url_deduct(\n        summary,\n        \"pages-with-invalid-svgs\",\n        0.2,\n        \"page(s) with invalid inline SVGs\",\n        &mut deductions,\n        &mut per_url_total,\n    );\n\n    // Missing quotes\n    per_url_deduct(\n        summary,\n        \"pages-with-missing-quotes\",\n        0.2,\n        \"page(s) with missing quotes\",\n        &mut deductions,\n        &mut per_url_total,\n    );\n\n    // Deep DOM\n    per_url_deduct(\n        summary,\n        \"pages-with-deep-dom\",\n        0.5,\n        \"page(s) with deep DOM\",\n        &mut deductions,\n        &mut per_url_total,\n    );\n\n    // Non-clickable phone numbers\n    per_url_deduct(\n        summary,\n        \"pages-with-non-clickable-phone-numbers\",\n        0.3,\n        \"page(s) with non-clickable phone numbers\",\n        &mut deductions,\n        &mut per_url_total,\n    );\n\n    // Brotli support\n    if is_not_ok(summary, \"brotli-support\") {\n        deductions.push(Deduction {\n            reason: \"No Brotli compression support\".to_string(),\n            points: 0.5,\n        });\n    }\n\n    // WebP support\n    if is_not_ok(summary, \"webp-support\") {\n        deductions.push(Deduction {\n            reason: \"No WebP image support\".to_string(),\n            points: 0.3,\n        });\n    }\n\n    build_category(\"Best Practices\", \"best-practices\", 0.15, deductions, per_url_total)\n}\n\n// ---- Helpers ----\n\nfn build_category(\n    name: &str,\n    code: &str,\n    weight: f64,\n    deductions: Vec<Deduction>,\n    _per_url_total: f64,\n) -> CategoryScore {\n    let fixed_total: f64 = deductions.iter().map(|d| d.points).sum();\n    let score = round1((10.0 - fixed_total).clamp(0.0, 10.0));\n\n    CategoryScore {\n        name: name.to_string(),\n        code: code.to_string(),\n        score,\n        label: score_label(score).to_string(),\n        weight,\n        deductions,\n    }\n}\n\n/// Apply a per-URL deduction with per-type sub-cap and total cap.\nfn per_url_deduct(\n    summary: &Summary,\n    apl_code: &str,\n    points_per_url: f64,\n    description: &str,\n    deductions: &mut Vec<Deduction>,\n    per_url_total: &mut f64,\n) {\n    if is_not_ok(summary, apl_code) {\n        let count = get_item_count(summary, apl_code).unwrap_or(1);\n        if count > 0 {\n            let remaining = MAX_PER_URL_DEDUCTION - *per_url_total;\n            if remaining <= 0.0 {\n                return;\n            }\n            // Apply per-type sub-cap, then total cap\n            let pts = (count as f64 * points_per_url)\n                .min(MAX_PER_TYPE_DEDUCTION)\n                .min(remaining);\n            *per_url_total += pts;\n            deductions.push(Deduction {\n                reason: format!(\"{} {}\", count, description),\n                points: round1(pts),\n            });\n        }\n    }\n}\n\n/// Check if a summary item is not OK (Warning, Critical, or Notice).\nfn is_not_ok(summary: &Summary, apl_code: &str) -> bool {\n    summary\n        .get_items()\n        .iter()\n        .any(|item| item.apl_code == apl_code && !matches!(item.status, ItemStatus::Ok | ItemStatus::Info))\n}\n\n/// Check if a summary item is Critical.\nfn is_critical(summary: &Summary, apl_code: &str) -> bool {\n    summary\n        .get_items()\n        .iter()\n        .any(|item| item.apl_code == apl_code && item.status == ItemStatus::Critical)\n}\n\n/// Check if a summary item is Warning or above.\nfn is_warning_or_above(summary: &Summary, apl_code: &str) -> bool {\n    summary\n        .get_items()\n        .iter()\n        .any(|item| item.apl_code == apl_code && matches!(item.status, ItemStatus::Warning | ItemStatus::Critical))\n}\n\n/// Check if a summary item is Warning.\nfn is_warning(summary: &Summary, apl_code: &str) -> bool {\n    summary\n        .get_items()\n        .iter()\n        .any(|item| item.apl_code == apl_code && item.status == ItemStatus::Warning)\n}\n\n/// Extract a count (first number found) from a non-OK summary item's text.\nfn get_item_count(summary: &Summary, apl_code: &str) -> Option<usize> {\n    let item = summary\n        .get_items()\n        .iter()\n        .find(|i| i.apl_code == apl_code && !matches!(i.status, ItemStatus::Ok | ItemStatus::Info))?;\n    extract_first_number(&item.text)\n}\n\n/// Get count for items that may have multiple entries with the same apl_code (e.g. title-uniqueness).\nfn get_item_count_for_code(summary: &Summary, apl_code: &str) -> Option<usize> {\n    let count = summary\n        .get_items()\n        .iter()\n        .filter(|i| i.apl_code == apl_code && !matches!(i.status, ItemStatus::Ok | ItemStatus::Info))\n        .count();\n    if count > 0 { Some(count) } else { None }\n}\n\n/// Extract the first number from a string (e.g., \"Security - 89 pages(s) with...\" -> 89).\nfn extract_first_number(text: &str) -> Option<usize> {\n    number_regex().find(text).and_then(|m| m.as_str().parse().ok())\n}\n\nfn number_regex() -> &'static Regex {\n    use std::sync::OnceLock;\n    static RE: OnceLock<Regex> = OnceLock::new();\n    RE.get_or_init(|| Regex::new(r\"\\d+\").unwrap())\n}\n\nfn round1(v: f64) -> f64 {\n    (v * 10.0).round() / 10.0\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n    use crate::components::summary::item::Item;\n    use crate::scoring::quality_score::score_label;\n\n    fn make_empty_summary() -> Summary {\n        Summary::new()\n    }\n\n    fn make_summary_with_items(items: Vec<(&str, ItemStatus)>) -> Summary {\n        let mut s = Summary::new();\n        for (code, status) in items {\n            s.add_item(Item::new(code.to_string(), \"1 test issue\".to_string(), status));\n        }\n        s\n    }\n\n    fn make_basic_stats() -> BasicStats {\n        BasicStats {\n            total_urls: 100,\n            total_requests_times_avg: 0.3,\n            ..Default::default()\n        }\n    }\n\n    #[test]\n    fn perfect_score_for_clean_site() {\n        let summary = make_empty_summary();\n        let stats = make_basic_stats();\n        let scores = calculate_scores(&summary, &stats);\n        assert_eq!(scores.overall.score, 10.0);\n    }\n\n    #[test]\n    fn score_label_thresholds() {\n        assert_eq!(score_label(9.5), \"Excellent\");\n        assert_eq!(score_label(8.0), \"Good\");\n        assert_eq!(score_label(5.5), \"Fair\");\n        assert_eq!(score_label(3.5), \"Poor\");\n        assert_eq!(score_label(1.0), \"Critical\");\n    }\n\n    #[test]\n    fn slow_response_deduction() {\n        let summary = make_empty_summary();\n        let mut stats = make_basic_stats();\n        stats.total_requests_times_avg = 1.5;\n        let scores = calculate_scores(&summary, &stats);\n        let perf = scores.categories.iter().find(|c| c.code == \"performance\").unwrap();\n        assert!(perf.score < 10.0);\n    }\n\n    #[test]\n    fn categories_have_correct_weights() {\n        let summary = make_empty_summary();\n        let stats = make_basic_stats();\n        let scores = calculate_scores(&summary, &stats);\n        let total_weight: f64 = scores.categories.iter().map(|c| c.weight).sum();\n        assert!((total_weight - 1.0).abs() < 0.001);\n    }\n\n    #[test]\n    fn overall_is_weighted_average() {\n        let summary = make_empty_summary();\n        let stats = make_basic_stats();\n        let scores = calculate_scores(&summary, &stats);\n        let expected: f64 = scores.categories.iter().map(|c| c.score * c.weight).sum();\n        let expected = round1(expected);\n        assert!((scores.overall.score - expected).abs() < 0.01);\n    }\n\n    #[test]\n    fn errors_404_deduct_from_seo() {\n        let summary = make_empty_summary();\n        let mut stats = make_basic_stats();\n        stats.count_by_status.insert(404, 5);\n        let scores = calculate_scores(&summary, &stats);\n        let seo = scores.categories.iter().find(|c| c.code == \"seo\").unwrap();\n        assert!(seo.score < 10.0);\n    }\n\n    #[test]\n    fn warnings_reduce_score() {\n        let summary = make_summary_with_items(vec![\n            (\"pages-without-h1\", ItemStatus::Warning),\n            (\"pages-without-lang\", ItemStatus::Warning),\n        ]);\n        let stats = make_basic_stats();\n        let scores = calculate_scores(&summary, &stats);\n        assert!(scores.overall.score < 10.0);\n    }\n}\n"
  },
  {
    "path": "src/server.rs",
    "content": "// SiteOne Crawler - Built-in HTTP server for serving exports\n// (c) Jan Reges <jan.reges@siteone.cz>\n//\n// Two modes:\n// - Markdown: reads .md files, renders them as styled HTML with table/accordion support\n// - Offline: serves static HTML files with Content-Security-Policy restricting to same origin\n\nuse std::path::{Path, PathBuf};\n\nuse tokio::io::{AsyncReadExt, AsyncWriteExt};\nuse tokio::net::{TcpListener, TcpStream};\n\nuse crate::utils;\nuse crate::version;\n\n/// Server mode\npub enum ServeMode {\n    Markdown,\n    Offline,\n}\n\n/// Run the HTTP server for serving exported content.\npub async fn run(root_dir: PathBuf, mode: ServeMode, port: u16, bind_address: &str) {\n    if !root_dir.is_dir() {\n        eprintln!(\n            \"{}\",\n            utils::get_color_text(\n                &format!(\"ERROR: Directory '{}' does not exist.\", root_dir.display()),\n                \"red\",\n                false,\n            )\n        );\n        std::process::exit(101);\n    }\n\n    let mode_name = match mode {\n        ServeMode::Markdown => \"Markdown\",\n        ServeMode::Offline => \"Offline HTML\",\n    };\n\n    let addr = format!(\"{}:{}\", bind_address, port);\n    let listener = match TcpListener::bind(&addr).await {\n        Ok(l) => l,\n        Err(e) => {\n            eprintln!(\n                \"{}\",\n                utils::get_color_text(\n                    &format!(\"ERROR: Cannot bind to {}:{}: {}\", bind_address, port, e),\n                    \"red\",\n                    false,\n                )\n            );\n            std::process::exit(1);\n        }\n    };\n\n    let display_host = if bind_address == \"0.0.0.0\" || bind_address == \"127.0.0.1\" {\n        \"localhost\"\n    } else {\n        bind_address\n    };\n\n    println!();\n    println!(\n        \"{}\",\n        utils::get_color_text(\n            &format!(\"SiteOne Crawler v{} - {} Server\", version::CODE, mode_name),\n            \"yellow\",\n            false,\n        )\n    );\n    println!(\n        \"  {}\",\n        utils::get_color_text(&format!(\"Serving from: {}\", root_dir.display()), \"gray\", false,)\n    );\n    println!(\n        \"  {}\",\n        utils::get_color_text(&format!(\"URL: http://{}:{}\", display_host, port), \"cyan\", false,)\n    );\n    if bind_address == \"0.0.0.0\" {\n        println!(\n            \"  {}\",\n            utils::get_color_text(\"Listening on all network interfaces\", \"yellow\", false,)\n        );\n    }\n    println!(\"  {}\", utils::get_color_text(\"Press Ctrl+C to stop\", \"gray\", false,));\n    println!();\n\n    let is_markdown = matches!(mode, ServeMode::Markdown);\n\n    loop {\n        match listener.accept().await {\n            Ok((stream, _)) => {\n                let root = root_dir.clone();\n                tokio::spawn(async move {\n                    if let Err(e) = handle_connection(stream, &root, is_markdown).await {\n                        eprintln!(\"Connection error: {}\", e);\n                    }\n                });\n            }\n            Err(e) => eprintln!(\"Accept error: {}\", e),\n        }\n    }\n}\n\nasync fn handle_connection(mut stream: TcpStream, root_dir: &Path, is_markdown: bool) -> std::io::Result<()> {\n    let mut buf = vec![0u8; 8192];\n    let n = match tokio::time::timeout(std::time::Duration::from_secs(30), stream.read(&mut buf)).await {\n        Ok(result) => result?,\n        Err(_) => return Ok(()), // read timeout — close silently\n    };\n    if n == 0 {\n        return Ok(());\n    }\n\n    let request = String::from_utf8_lossy(&buf[..n]);\n\n    let first_line = match request.lines().next() {\n        Some(line) => line,\n        None => {\n            stream\n                .write_all(&build_response(400, \"text/plain\", b\"Bad Request\", &[]))\n                .await?;\n            return Ok(());\n        }\n    };\n\n    let parts: Vec<&str> = first_line.split_whitespace().collect();\n    if parts.len() < 2 || (parts[0] != \"GET\" && parts[0] != \"HEAD\") {\n        stream\n            .write_all(&build_response(\n                405,\n                \"text/plain\",\n                b\"Method Not Allowed\",\n                &[(\"Allow\", \"GET, HEAD\")],\n            ))\n            .await?;\n        return Ok(());\n    }\n\n    let is_head = parts[0] == \"HEAD\";\n\n    let raw_path = parts[1];\n\n    // Decode percent-encoding, strip query string and fragment\n    let decoded = percent_encoding::percent_decode_str(raw_path)\n        .decode_utf8_lossy()\n        .to_string();\n    let clean_path = decoded\n        .split('?')\n        .next()\n        .unwrap_or(&decoded)\n        .split('#')\n        .next()\n        .unwrap_or(&decoded);\n\n    // Security: prevent path traversal (check segments, not substring)\n    let normalized = clean_path.replace('\\\\', \"/\");\n    if normalized.split('/').any(|seg| seg == \"..\") {\n        stream\n            .write_all(&build_response(403, \"text/plain\", b\"Forbidden\", &[]))\n            .await?;\n        return Ok(());\n    }\n\n    let relative_path = normalized.trim_start_matches('/');\n\n    let mut response = if is_markdown {\n        serve_markdown_request(root_dir, relative_path)\n    } else {\n        serve_offline_request(root_dir, relative_path)\n    };\n\n    // For HEAD requests, send only headers (Content-Length stays correct)\n    if is_head && let Some(pos) = find_header_end(&response) {\n        response.truncate(pos);\n    }\n\n    let status = extract_status(&response);\n    let method = parts[0];\n    let status_color = if status < 300 {\n        \"green\"\n    } else if status < 400 {\n        \"cyan\"\n    } else {\n        \"red\"\n    };\n    println!(\n        \"  {} {} {}\",\n        utils::get_color_text(&format!(\"{}\", status), status_color, false),\n        method,\n        raw_path,\n    );\n\n    stream.write_all(&response).await?;\n    Ok(())\n}\n\nfn find_header_end(response: &[u8]) -> Option<usize> {\n    response.windows(4).position(|w| w == b\"\\r\\n\\r\\n\").map(|p| p + 4)\n}\n\nfn extract_status(response: &[u8]) -> u16 {\n    let header = String::from_utf8_lossy(&response[..std::cmp::min(30, response.len())]);\n    header\n        .split_whitespace()\n        .nth(1)\n        .and_then(|s| s.parse().ok())\n        .unwrap_or(0)\n}\n\n// ---- Markdown serving ----\n\nfn serve_markdown_request(root_dir: &Path, relative_path: &str) -> Vec<u8> {\n    let csp = (\"Content-Security-Policy\", \"default-src 'self' 'unsafe-inline' data:\");\n\n    match resolve_markdown_path(root_dir, relative_path) {\n        Some(path) if !is_within_root(root_dir, &path) => build_response(403, \"text/plain\", b\"Forbidden\", &[]),\n        Some(path) if path.extension().is_some_and(|ext| ext == \"md\") => match std::fs::read_to_string(&path) {\n            Ok(content) if content.trim().is_empty() => {\n                // Empty markdown file — show directory listing instead\n                let dir_path = path.parent().unwrap_or(root_dir);\n                let url_path = relative_path\n                    .trim_end_matches('/')\n                    .trim_end_matches(\"index.md\")\n                    .trim_end_matches('/');\n                let listing = directory_listing(dir_path, url_path, true);\n                build_response(200, \"text/html; charset=utf-8\", listing.as_bytes(), &[csp])\n            }\n            Ok(content) => {\n                let html = render_markdown_to_html(&content, relative_path);\n                build_response(200, \"text/html; charset=utf-8\", html.as_bytes(), &[csp])\n            }\n            Err(_) => build_404_response(true),\n        },\n        Some(path) => serve_static_file(&path, &[csp]),\n        None => {\n            let dir_path = root_dir.join(relative_path);\n            if dir_path.is_dir() && is_within_root(root_dir, &dir_path) {\n                let listing = directory_listing(&dir_path, relative_path, true);\n                build_response(200, \"text/html; charset=utf-8\", listing.as_bytes(), &[csp])\n            } else {\n                build_404_response(true)\n            }\n        }\n    }\n}\n\nfn resolve_markdown_path(root_dir: &Path, relative_path: &str) -> Option<PathBuf> {\n    if relative_path.is_empty() {\n        let index = root_dir.join(\"index.md\");\n        if index.is_file() {\n            return Some(index);\n        }\n        return None;\n    }\n\n    let full_path = root_dir.join(relative_path);\n\n    // Direct file match (static files, .md files with extension in URL)\n    if full_path.is_file() {\n        return Some(full_path);\n    }\n\n    // Try adding .md extension\n    let trimmed = relative_path.trim_end_matches('/');\n    let md_path = root_dir.join(format!(\"{}.md\", trimmed));\n    if md_path.is_file() {\n        return Some(md_path);\n    }\n\n    // Try as directory with index.md\n    let index_path = full_path.join(\"index.md\");\n    if index_path.is_file() {\n        return Some(index_path);\n    }\n\n    None\n}\n\n// ---- Offline serving ----\n\nfn serve_offline_request(root_dir: &Path, relative_path: &str) -> Vec<u8> {\n    let csp = (\"Content-Security-Policy\", \"default-src 'self' 'unsafe-inline' data:\");\n\n    match resolve_offline_path(root_dir, relative_path) {\n        Some(path) if !is_within_root(root_dir, &path) => build_response(403, \"text/plain\", b\"Forbidden\", &[]),\n        Some(path) => serve_static_file(&path, &[csp]),\n        None => {\n            let dir_path = root_dir.join(relative_path);\n            if dir_path.is_dir() && is_within_root(root_dir, &dir_path) {\n                let listing = directory_listing(&dir_path, relative_path, false);\n                build_response(200, \"text/html; charset=utf-8\", listing.as_bytes(), &[csp])\n            } else {\n                build_404_response(false)\n            }\n        }\n    }\n}\n\nfn resolve_offline_path(root_dir: &Path, relative_path: &str) -> Option<PathBuf> {\n    if relative_path.is_empty() {\n        let index = root_dir.join(\"index.html\");\n        if index.is_file() {\n            return Some(index);\n        }\n        return None;\n    }\n\n    let full_path = root_dir.join(relative_path);\n\n    // Direct file match\n    if full_path.is_file() {\n        return Some(full_path);\n    }\n\n    // Try as directory with index.html (prefer over .html redirect files)\n    let dir_path = root_dir.join(relative_path.trim_end_matches('/'));\n    let index_path = dir_path.join(\"index.html\");\n    if index_path.is_file() {\n        return Some(index_path);\n    }\n\n    // Try with .html extension\n    let trimmed = relative_path.trim_end_matches('/');\n    let html_path = root_dir.join(format!(\"{}.html\", trimmed));\n    if html_path.is_file() {\n        return Some(html_path);\n    }\n\n    None\n}\n\n// ---- Shared utilities ----\n\nfn serve_static_file(path: &Path, extra_headers: &[(&str, &str)]) -> Vec<u8> {\n    match std::fs::read(path) {\n        Ok(content) => {\n            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(\"\");\n            let content_type = content_type_for_extension(ext);\n            build_response(200, content_type, &content, extra_headers)\n        }\n        Err(_) => build_response(500, \"text/plain\", b\"Internal Server Error\", &[]),\n    }\n}\n\n/// Verify that the resolved path stays within the root directory (symlink-safe).\nfn is_within_root(root_dir: &Path, resolved_path: &Path) -> bool {\n    let Ok(canonical_root) = std::fs::canonicalize(root_dir) else {\n        return false;\n    };\n    let Ok(canonical_path) = std::fs::canonicalize(resolved_path) else {\n        return false;\n    };\n    canonical_path.starts_with(&canonical_root)\n}\n\nfn build_response(status: u16, content_type: &str, body: &[u8], extra_headers: &[(&str, &str)]) -> Vec<u8> {\n    let status_text = match status {\n        200 => \"OK\",\n        301 => \"Moved Permanently\",\n        400 => \"Bad Request\",\n        403 => \"Forbidden\",\n        404 => \"Not Found\",\n        405 => \"Method Not Allowed\",\n        500 => \"Internal Server Error\",\n        _ => \"Unknown\",\n    };\n\n    let mut header = format!(\n        \"HTTP/1.1 {} {}\\r\\nContent-Type: {}\\r\\nContent-Length: {}\\r\\nX-Powered-By: siteone-crawler/{}\\r\\nX-Frame-Options: DENY\\r\\nX-Content-Type-Options: nosniff\\r\\nConnection: close\\r\\n\",\n        status,\n        status_text,\n        content_type,\n        body.len(),\n        version::CODE\n    );\n\n    for (name, value) in extra_headers {\n        header.push_str(&format!(\"{}: {}\\r\\n\", name, value));\n    }\n\n    header.push_str(\"\\r\\n\");\n\n    let mut response = header.into_bytes();\n    response.extend_from_slice(body);\n    response\n}\n\nfn build_404_response(is_markdown: bool) -> Vec<u8> {\n    let body = if is_markdown {\n        format!(\n            \"<!DOCTYPE html>\\n<html lang=\\\"en\\\">\\n<head><meta charset=\\\"utf-8\\\"><title>404 Not Found</title>\\n<style>{}</style>\\n</head>\\n<body>\\n<div class=\\\"container\\\">\\n<article class=\\\"markdown-body\\\">\\n<h1>404 - Page Not Found</h1>\\n<p>The requested page was not found.</p>\\n<p><a href=\\\"/\\\">Back to home</a></p>\\n</article>\\n</div>\\n</body>\\n</html>\",\n            MARKDOWN_CSS\n        )\n    } else {\n        \"<!DOCTYPE html>\\n<html><body><h1>404 Not Found</h1><p>The requested file was not found.</p></body></html>\"\n            .to_string()\n    };\n    build_response(404, \"text/html; charset=utf-8\", body.as_bytes(), &[])\n}\n\nfn content_type_for_extension(ext: &str) -> &'static str {\n    // Extensions come from the filesystem and are almost always lowercase.\n    // Use a small stack buffer to avoid heap allocation for the rare uppercase case.\n    let mut lower = [0u8; 8];\n    let ext_lower = if ext.len() <= 8 {\n        for (i, b) in ext.bytes().enumerate() {\n            lower[i] = b.to_ascii_lowercase();\n        }\n        std::str::from_utf8(&lower[..ext.len()]).unwrap_or(ext)\n    } else {\n        ext // fallback: will only match if already lowercase\n    };\n    match ext_lower {\n        \"html\" | \"htm\" => \"text/html; charset=utf-8\",\n        \"css\" => \"text/css; charset=utf-8\",\n        \"js\" | \"mjs\" => \"application/javascript; charset=utf-8\",\n        \"json\" => \"application/json; charset=utf-8\",\n        \"xml\" => \"application/xml; charset=utf-8\",\n        \"txt\" => \"text/plain; charset=utf-8\",\n        \"md\" => \"text/markdown; charset=utf-8\",\n        \"png\" => \"image/png\",\n        \"jpg\" | \"jpeg\" => \"image/jpeg\",\n        \"gif\" => \"image/gif\",\n        \"svg\" => \"image/svg+xml; charset=utf-8\",\n        \"ico\" => \"image/x-icon\",\n        \"webp\" => \"image/webp\",\n        \"avif\" => \"image/avif\",\n        \"woff\" => \"font/woff\",\n        \"woff2\" => \"font/woff2\",\n        \"ttf\" => \"font/ttf\",\n        \"otf\" => \"font/otf\",\n        \"eot\" => \"application/vnd.ms-fontobject\",\n        \"pdf\" => \"application/pdf\",\n        \"zip\" => \"application/zip\",\n        \"mp4\" => \"video/mp4\",\n        \"webm\" => \"video/webm\",\n        \"mp3\" => \"audio/mpeg\",\n        _ => \"application/octet-stream\",\n    }\n}\n\n// ---- Markdown rendering ----\n\nfn render_markdown_to_html(markdown: &str, request_path: &str) -> String {\n    use pulldown_cmark::{Options, Parser, html};\n\n    // Replace curly/smart quotes with straight quotes\n    let markdown = markdown\n        .replace(['\\u{201c}', '\\u{201d}'], \"\\\"\")\n        .replace(['\\u{2018}', '\\u{2019}'], \"'\");\n\n    // Clean up markdown artifacts from HTML→MD conversion\n    let cleaned = clean_markdown_artifacts(&markdown);\n\n    let mut options = Options::empty();\n    options.insert(Options::ENABLE_TABLES);\n    options.insert(Options::ENABLE_STRIKETHROUGH);\n    options.insert(Options::ENABLE_TASKLISTS);\n\n    let parser = Parser::new_ext(&cleaned, options);\n    let mut html_content = String::new();\n    html::push_html(&mut html_content, parser);\n\n    // Add id attributes to h1-h4 headings for anchor linking\n    html_content = add_heading_ids(&html_content);\n\n    // Convert heading + link-only blocks (>3 links) into accordions\n    html_content = collapse_link_blocks(&html_content);\n\n    // Add link counts to existing Menu/Links accordions\n    html_content = add_accordion_link_counts(&html_content);\n\n    // Style callout blocks (Tip, Note, Caution, etc.)\n    html_content = style_callout_blocks(&html_content);\n\n    // Extract title from first heading in the cleaned markdown\n    let heading = extract_title(&cleaned);\n    let title = if heading == \"SiteOne Crawler - Markdown Viewer\" {\n        heading\n    } else {\n        format!(\"{} | SiteOne Crawler - Markdown Viewer\", heading)\n    };\n\n    // Build breadcrumb navigation\n    let breadcrumb = build_breadcrumb(request_path);\n\n    format!(\n        r#\"<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"utf-8\">\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n<meta name=\"author\" content=\"SiteOne Crawler - https://crawler.siteone.io/\">\n<title>{title}</title>\n<style>\n{css}\n</style>\n<script>\n(function(){{\n  var t=localStorage.getItem('md-theme'),w=localStorage.getItem('md-width');\n  if(t==='dark')document.documentElement.classList.add('dark');\n  if(w==='wide')document.documentElement.classList.add('wide');\n}})();\n</script>\n</head>\n<body>\n<div class=\"container\">\n<nav class=\"breadcrumb\">\n<span class=\"breadcrumb-path\">{breadcrumb}</span>\n<span class=\"toolbar\">\n<button onclick=\"toggleWidth()\" id=\"width-btn\" title=\"Toggle full width\"></button>\n<button onclick=\"toggleTheme()\" id=\"theme-btn\" title=\"Toggle dark/light mode\"></button>\n</span>\n</nav>\n<article class=\"markdown-body\">\n{content}\n</article>\n<footer>\n<p>Served by <a href=\"https://crawler.siteone.io/\" target=\"_blank\" rel=\"noopener\">SiteOne Crawler</a> v{version}</p>\n</footer>\n</div>\n<script>\nvar svgMoon='<svg viewBox=\"0 0 16 16\" width=\"14\" height=\"14\"><path d=\"M6 .5a7.5 7.5 0 1 0 8 12A6 6 0 0 1 6 .5z\" fill=\"currentColor\"/></svg>';\nvar svgSun='<svg viewBox=\"0 0 16 16\" width=\"14\" height=\"14\"><circle cx=\"8\" cy=\"8\" r=\"2.8\" fill=\"currentColor\"/><g stroke=\"currentColor\" stroke-width=\"1.5\" stroke-linecap=\"round\"><line x1=\"8\" y1=\".5\" x2=\"8\" y2=\"3\"/><line x1=\"8\" y1=\"13\" x2=\"8\" y2=\"15.5\"/><line x1=\".5\" y1=\"8\" x2=\"3\" y2=\"8\"/><line x1=\"13\" y1=\"8\" x2=\"15.5\" y2=\"8\"/><line x1=\"2.7\" y1=\"2.7\" x2=\"4.5\" y2=\"4.5\"/><line x1=\"11.5\" y1=\"11.5\" x2=\"13.3\" y2=\"13.3\"/><line x1=\"2.7\" y1=\"13.3\" x2=\"4.5\" y2=\"11.5\"/><line x1=\"11.5\" y1=\"4.5\" x2=\"13.3\" y2=\"2.7\"/></g></svg>';\nvar svgExpand='<svg viewBox=\"0 0 16 16\" width=\"14\" height=\"14\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"1.5\" stroke-linecap=\"round\" stroke-linejoin=\"round\"><path d=\"M1 8h14M4.5 5L1 8l3.5 3M11.5 5L15 8l-3.5 3\"/></svg>';\nvar svgContract='<svg viewBox=\"0 0 16 16\" width=\"14\" height=\"14\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"1.5\" stroke-linecap=\"round\" stroke-linejoin=\"round\"><path d=\"M1 8h5M10 8h5M3.5 5L7 8l-3.5 3M12.5 5L9 8l3.5 3\"/></svg>';\nfunction toggleTheme(){{\n  document.documentElement.classList.toggle('dark');\n  localStorage.setItem('md-theme',document.documentElement.classList.contains('dark')?'dark':'light');\n  updBtn();\n}}\nfunction toggleWidth(){{\n  document.documentElement.classList.toggle('wide');\n  localStorage.setItem('md-width',document.documentElement.classList.contains('wide')?'wide':'narrow');\n  updBtn();\n}}\nfunction updBtn(){{\n  var d=document.documentElement.classList.contains('dark');\n  var w=document.documentElement.classList.contains('wide');\n  document.getElementById('theme-btn').innerHTML=d?svgSun:svgMoon;\n  document.getElementById('width-btn').innerHTML=w?svgContract:svgExpand;\n}}\nupdBtn();\n</script>\n</body>\n</html>\"#,\n        title = html_escape(&title),\n        css = MARKDOWN_CSS,\n        breadcrumb = breadcrumb,\n        content = html_content,\n        version = version::CODE,\n    )\n}\n\n/// Clean up common artifacts left by HTML→Markdown export.\nfn clean_markdown_artifacts(markdown: &str) -> String {\n    let lines: Vec<&str> = markdown.lines().collect();\n    let mut result: Vec<&str> = Vec::with_capacity(lines.len());\n    let mut in_code_block = false;\n\n    // Phase 1: Skip site navigation header before the first h1 heading.\n    // If there are 3+ links before the first h1, the content is likely nav/header.\n    // But preserve <details> blocks (accordion menus) from the navigation.\n    let mut content_start = 0;\n    {\n        let mut link_count = 0;\n        for idx in 0..lines.len() {\n            let t = lines[idx].trim();\n            if t.starts_with(\"```\") {\n                break; // don't look inside code blocks\n            }\n\n            // Check for setext h1 (line followed by ===)\n            if idx + 1 < lines.len() {\n                let next = lines[idx + 1].trim();\n                if !next.is_empty() && next.len() >= 3 && next.chars().all(|c| c == '=') {\n                    if link_count >= 3 {\n                        content_start = idx;\n                    }\n                    break;\n                }\n            }\n            // Check for ATX h1\n            if t.starts_with(\"# \") && !t.starts_with(\"## \") {\n                if link_count >= 3 {\n                    content_start = idx;\n                }\n                break;\n            }\n\n            // Count links in non-heading lines\n            link_count += t.matches(\"](\").count();\n        }\n    }\n\n    // Phase 1b: Preserve navigation from the skipped section.\n    // If the section already contains <details> blocks, preserve them as-is.\n    // Otherwise, wrap the entire nav content in a <details><summary>Menu</summary> block.\n    if content_start > 0 {\n        let has_details = lines[..content_start]\n            .iter()\n            .any(|l| l.trim() == \"<details>\" || l.trim().starts_with(\"<details>\"));\n\n        if has_details {\n            // Preserve existing <details> blocks (e.g. astro Menu accordion)\n            let mut k = 0;\n            while k < content_start {\n                let t = lines[k].trim();\n                if t == \"<details>\" || t.starts_with(\"<details>\") {\n                    while k < content_start {\n                        result.push(lines[k]);\n                        k += 1;\n                        if lines[k - 1].trim() == \"</details>\" {\n                            result.push(\"\"); // blank line required so pulldown-cmark ends the HTML block\n                            break;\n                        }\n                    }\n                } else {\n                    k += 1;\n                }\n            }\n        } else {\n            // No <details> blocks — wrap nav content in an accordion.\n            // Collect non-empty, non-artifact lines as the accordion body.\n            let mut nav_lines: Vec<&str> = Vec::new();\n            for line in &lines[..content_start] {\n                let t = line.trim();\n                // Skip header artifacts\n                if t.is_empty()\n                    || t == \"-\"\n                    || t.starts_with(\"[Skip to content]\")\n                    || (t.starts_with(\"| [\") && t.ends_with(\" |\"))\n                {\n                    continue;\n                }\n                // Skip setext underlines (--- or ===)\n                if t.len() >= 3 && (t.chars().all(|c| c == '-') || t.chars().all(|c| c == '=')) {\n                    continue;\n                }\n                // Skip heading text that is just \"Site navigation\" or similar\n                if t == \"Site navigation\" || t == \"Navigation\" {\n                    continue;\n                }\n                nav_lines.push(line);\n            }\n            // Trim trailing non-link plain-text lines (breadcrumb labels etc.)\n            while let Some(last) = nav_lines.last() {\n                let t = last.trim();\n                if !t.contains(\"](\") && !t.starts_with(\"- \") && !t.starts_with(\"### \") {\n                    nav_lines.pop();\n                } else {\n                    break;\n                }\n            }\n            if nav_lines.len() > 3 {\n                result.push(\"<details>\");\n                result.push(\"<summary>Menu</summary>\");\n                result.push(\"\");\n                for line in &nav_lines {\n                    result.push(line);\n                }\n                result.push(\"\");\n                result.push(\"</details>\");\n                result.push(\"\"); // empty line so pulldown-cmark starts a new block\n            }\n        }\n    }\n\n    // Phase 2: Process lines from content_start, filtering artifacts\n    let mut i = content_start;\n    while i < lines.len() {\n        let trimmed = lines[i].trim();\n\n        // Track code blocks to avoid filtering inside them\n        if trimmed.starts_with(\"```\") {\n            in_code_block = !in_code_block;\n            result.push(lines[i]);\n            i += 1;\n            continue;\n        }\n        if in_code_block {\n            result.push(lines[i]);\n            i += 1;\n            continue;\n        }\n\n        // Skip empty list items (just \"-\" with no text content)\n        if trimmed == \"-\" {\n            i += 1;\n            continue;\n        }\n\n        // Skip [Section titled \"...\"](...) lines\n        if trimmed.starts_with(\"[Section titled \\\"\") && trimmed.ends_with(')') {\n            i += 1;\n            continue;\n        }\n\n        // Skip [Skip to content](...) variants\n        if trimmed.starts_with(\"[Skip to content]\") {\n            i += 1;\n            continue;\n        }\n\n        // Detect footer: \"[Go to ... homepage](...)\" standalone link\n        if !trimmed.starts_with(\"- \")\n            && trimmed.starts_with(\"[Go to \")\n            && trimmed.to_lowercase().contains(\"homepage\")\n            && trimmed.ends_with(')')\n        {\n            break;\n        }\n\n        // Skip \"On this page\" heading + its following <details> block\n        if (trimmed == \"On this page\" || trimmed == \"On this page:\")\n            && i + 1 < lines.len()\n            && lines[i + 1].trim().starts_with('-')\n        {\n            // Skip underline-style heading (--- below)\n            i += 1;\n            if i < lines.len() && lines[i].trim().starts_with('-') && lines[i].trim().chars().all(|c| c == '-') {\n                i += 1;\n            }\n            // Skip until next heading or end of <details> block\n            while i < lines.len() {\n                let t = lines[i].trim();\n                if t.starts_with(\"# \")\n                    || t.starts_with(\"## \")\n                    || (t.starts_with('#') && t.chars().nth(1).is_some_and(|c| c == '#' || c == ' '))\n                {\n                    break;\n                }\n                if t == \"</details>\" {\n                    i += 1;\n                    break;\n                }\n                i += 1;\n            }\n            // Skip empty lines after the block\n            while i < lines.len() && lines[i].trim().is_empty() {\n                i += 1;\n            }\n            continue;\n        }\n\n        // Skip footer artifacts: \"Learn\" heading (astro docs pattern)\n        if trimmed == \"Learn\"\n            && i + 1 < lines.len()\n            && (lines.get(i + 1).is_some_and(|l| l.trim().is_empty())\n                || lines.get(i + 2).is_some_and(|l| l.trim().starts_with(\"| [\")))\n        {\n            break;\n        }\n\n        result.push(lines[i]);\n        i += 1;\n    }\n\n    // Phase 3: Fix broken code fences — detect unfenced code blocks between\n    // a closing ``` and an opening ```lang (or at end of content).\n    // The HTML→MD converter sometimes misses fences for tabbed code examples.\n    let mut fixed: Vec<&str> = Vec::with_capacity(result.len());\n    let mut ri = 0;\n    let result_lines: Vec<&str> = result; // take ownership\n    let mut in_fence = false;\n    while ri < result_lines.len() {\n        let t = result_lines[ri].trim();\n        if t.starts_with(\"```\") {\n            in_fence = !in_fence;\n            fixed.push(result_lines[ri]);\n            ri += 1;\n            continue;\n        }\n        if in_fence {\n            fixed.push(result_lines[ri]);\n            ri += 1;\n            continue;\n        }\n\n        // Not inside a fence — check if this starts an unfenced code block.\n        // Heuristic: if the previous non-empty line was a closing ```,\n        // and this line looks like code (contains semicolons, braces, =>, etc.),\n        // collect all lines until the next ``` or blank-line gap.\n        let prev_is_fence_close = fixed\n            .iter()\n            .rev()\n            .find(|l| !l.trim().is_empty())\n            .is_some_and(|l| l.trim().starts_with(\"```\"));\n\n        if prev_is_fence_close && !t.is_empty() && looks_like_code(t) {\n            // Collect unfenced code lines\n            let start = ri;\n            while ri < result_lines.len() {\n                let lt = result_lines[ri].trim();\n                if lt.starts_with(\"```\") {\n                    break;\n                }\n                // Stop if we hit a markdown heading or a completely blank line\n                // followed by non-code content\n                if lt.is_empty() {\n                    // Look ahead: if the next non-empty line is not code-like, stop\n                    let mut peek = ri + 1;\n                    while peek < result_lines.len() && result_lines[peek].trim().is_empty() {\n                        peek += 1;\n                    }\n                    if peek < result_lines.len() && !looks_like_code(result_lines[peek].trim()) {\n                        break;\n                    }\n                }\n                if lt.starts_with(\"# \") || lt.starts_with(\"## \") || lt.starts_with(\"### \") {\n                    break;\n                }\n                ri += 1;\n            }\n            // Trim trailing empty lines from the collected block\n            let mut end = ri;\n            while end > start && result_lines[end - 1].trim().is_empty() {\n                end -= 1;\n            }\n            if end > start {\n                fixed.push(\"```\");\n                for line in &result_lines[start..end] {\n                    fixed.push(line);\n                }\n                fixed.push(\"```\");\n            }\n            // If the next line is an opening ```, it will be handled normally\n            continue;\n        }\n\n        fixed.push(result_lines[ri]);\n        ri += 1;\n    }\n\n    fixed.join(\"\\n\")\n}\n\n/// Heuristic: does this line look like source code?\n/// Requires at least 2 code indicators to reduce false positives on prose.\nfn looks_like_code(line: &str) -> bool {\n    let mut score = 0;\n    // Strong indicators (score 2) — very unlikely in prose\n    if line.contains(\"=> {\") || line.contains(\"=> (\") {\n        score += 2;\n    }\n    if line.contains(\"};\") {\n        score += 2;\n    }\n    if line.ends_with(';') {\n        score += 2;\n    }\n    if line.starts_with(\"//\") {\n        score += 2;\n    }\n    if line.starts_with(\"if (\") || line.starts_with(\"if (!\") {\n        score += 2;\n    }\n    if line.contains(\"?.\") {\n        score += 2;\n    } // optional chaining\n    if line.contains(\"===\") || line.contains(\"!==\") {\n        score += 2;\n    }\n    // Moderate indicators (score 1)\n    if line.contains(\"export \") {\n        score += 1;\n    }\n    if line.contains(\"const \") {\n        score += 1;\n    }\n    if line.contains(\"return \") {\n        score += 1;\n    }\n    if line.contains(\"await \") {\n        score += 1;\n    }\n    if line.contains(\"async \") {\n        score += 1;\n    }\n    if line.contains(\"function \") {\n        score += 1;\n    }\n    if line.ends_with('{') || line.ends_with('}') {\n        score += 1;\n    }\n    score >= 2\n}\n\n/// Convert standalone callout paragraphs (Tip, Note, Caution, etc.) into styled divs.\nfn style_callout_blocks(html: &str) -> String {\n    let callout_patterns: [(&str, &str); 6] = [\n        (\"<p>Tip</p>\", \"Tip\"),\n        (\"<p>Note</p>\", \"Note\"),\n        (\"<p>Caution</p>\", \"Caution\"),\n        (\"<p>Warning</p>\", \"Warning\"),\n        (\"<p>Important</p>\", \"Important\"),\n        (\"<p>Quick start</p>\", \"Quick start\"),\n    ];\n    let mut result = String::with_capacity(html.len() + 512);\n    let lines: Vec<&str> = html.lines().collect();\n    let mut i = 0;\n\n    while i < lines.len() {\n        let trimmed = lines[i].trim();\n\n        // Match <p>Label</p> followed by <p>content</p>\n        let mut matched_label = None;\n        for (pattern, label) in &callout_patterns {\n            if trimmed == *pattern {\n                matched_label = Some(*label);\n                break;\n            }\n        }\n\n        if let Some(label) = matched_label {\n            let icon = match label {\n                \"Tip\" => \"💡\",\n                \"Note\" | \"Important\" => \"📝\",\n                \"Caution\" | \"Warning\" => \"⚠️\",\n                \"Quick start\" => \"🚀\",\n                _ => \"ℹ️\",\n            };\n            let css_class = match label {\n                \"Caution\" | \"Warning\" => \"callout callout-warning\",\n                \"Tip\" | \"Quick start\" => \"callout callout-tip\",\n                _ => \"callout callout-note\",\n            };\n            // Collect following paragraphs as callout content\n            result.push_str(&format!(\n                \"<div class=\\\"{}\\\">\\n<p class=\\\"callout-title\\\">{} {}</p>\\n\",\n                css_class, icon, label\n            ));\n            i += 1;\n            // Include subsequent content paragraphs until next heading or another callout\n            while i < lines.len() {\n                let next = lines[i].trim();\n                if next.is_empty() {\n                    i += 1;\n                    continue;\n                }\n                // Stop at headings, details, or another callout label\n                if next.starts_with(\"<h\") || next.starts_with(\"<details\") || next.starts_with(\"<div class=\\\"callout\") {\n                    break;\n                }\n                // Include this line in the callout\n                result.push_str(lines[i]);\n                result.push('\\n');\n                i += 1;\n                // Only include the first content element\n                break;\n            }\n            result.push_str(\"</div>\\n\");\n        } else {\n            result.push_str(lines[i]);\n            result.push('\\n');\n            i += 1;\n        }\n    }\n\n    result\n}\n\nfn extract_title(markdown: &str) -> String {\n    let lines: Vec<&str> = markdown.lines().collect();\n    let mut in_code_block = false;\n    for (i, line) in lines.iter().enumerate() {\n        let trimmed = line.trim();\n        if trimmed.starts_with(\"```\") {\n            in_code_block = !in_code_block;\n            continue;\n        }\n        if in_code_block {\n            continue;\n        }\n        // ATX h1: # Heading\n        if let Some(heading) = trimmed.strip_prefix(\"# \") {\n            return heading.trim().to_string();\n        }\n        // Setext h1: text followed by === on the next line\n        if !trimmed.is_empty()\n            && i + 1 < lines.len()\n            && lines[i + 1].trim().len() >= 3\n            && lines[i + 1].trim().chars().all(|c| c == '=')\n        {\n            return trimmed.to_string();\n        }\n    }\n    \"SiteOne Crawler - Markdown Viewer\".to_string()\n}\n\nfn add_heading_ids(html: &str) -> String {\n    let closing_tags: [&str; 4] = [\"</h1>\", \"</h2>\", \"</h3>\", \"</h4>\"];\n    let mut result = String::with_capacity(html.len() + 256);\n    let mut used_slugs: std::collections::HashMap<String, usize> = std::collections::HashMap::new();\n    let mut search_from = 0;\n\n    while search_from < html.len() {\n        // Find next '<h' pattern\n        let remaining = &html[search_from..];\n        let next_h = remaining.find(\"<h\").and_then(|pos| {\n            let abs = search_from + pos;\n            let bytes = html.as_bytes();\n            if abs + 3 < bytes.len() && bytes[abs + 2] >= b'1' && bytes[abs + 2] <= b'4' && bytes[abs + 3] == b'>' {\n                Some((abs, (bytes[abs + 2] - b'1') as usize))\n            } else {\n                None\n            }\n        });\n\n        match next_h {\n            Some((tag_start, level_idx)) => {\n                let tag_end = tag_start + 4; // past '>'\n                let closing_tag = closing_tags[level_idx];\n                if let Some(close_rel) = html[tag_end..].find(closing_tag) {\n                    let inner_html = &html[tag_end..tag_end + close_rel];\n                    let plain_text = strip_html_tags(inner_html);\n                    let base_slug = slugify(&plain_text);\n\n                    // Copy everything before this heading\n                    result.push_str(&html[search_from..tag_start]);\n\n                    if !base_slug.is_empty() {\n                        // Deduplicate slug\n                        let slug = match used_slugs.get(&base_slug) {\n                            Some(&count) => {\n                                let deduped = format!(\"{}-{}\", base_slug, count);\n                                *used_slugs.get_mut(&base_slug).unwrap() = count + 1;\n                                deduped\n                            }\n                            None => {\n                                used_slugs.insert(base_slug.clone(), 1);\n                                base_slug\n                            }\n                        };\n                        let escaped_slug = html_escape(&slug);\n                        result.push_str(&format!(\n                            \"<h{0} id=\\\"{1}\\\"><a href=\\\"#{1}\\\" class=\\\"heading-link\\\">\",\n                            level_idx + 1,\n                            escaped_slug\n                        ));\n                        result.push_str(inner_html);\n                        result.push_str(\"</a>\");\n                        result.push_str(closing_tag);\n                    } else {\n                        result.push_str(&html[tag_start..tag_end]);\n                        result.push_str(inner_html);\n                        result.push_str(closing_tag);\n                    }\n                    search_from = tag_end + close_rel + closing_tag.len();\n                } else {\n                    // No closing tag found — emit the opening tag and continue\n                    result.push_str(&html[search_from..tag_end]);\n                    search_from = tag_end;\n                }\n            }\n            None => {\n                // No more headings — copy rest and done\n                result.push_str(&html[search_from..]);\n                break;\n            }\n        }\n    }\n\n    result\n}\n\n/// Detect heading level from an HTML line like `<h2 ...>` or `<h2>`.\nfn detect_heading_level(line: &str) -> Option<u8> {\n    let t = line.trim();\n    if t.starts_with(\"<h\") && t.len() > 3 {\n        let ch = t.as_bytes()[2];\n        if (b'1'..=b'6').contains(&ch) && (t.as_bytes()[3] == b'>' || t.as_bytes()[3] == b' ') {\n            return Some(ch - b'0');\n        }\n    }\n    None\n}\n\n/// When a heading is followed only by link-only content (paragraphs or lists)\n/// and there are more than 3 links, collapse them into an accordion.\n/// Also handles sub-headings + link blocks grouped under a parent heading.\nfn collapse_link_blocks(html: &str) -> String {\n    let lines: Vec<&str> = html.lines().collect();\n    let mut result = String::with_capacity(html.len() + 512);\n    let mut i = 0;\n    let mut details_depth = 0; // track nesting inside <details> blocks\n\n    while i < lines.len() {\n        let trimmed = lines[i].trim();\n\n        // Track <details> nesting — don't create accordions inside existing ones\n        if trimmed.starts_with(\"<details\") {\n            details_depth += 1;\n        }\n        if trimmed == \"</details>\" && details_depth > 0 {\n            details_depth -= 1;\n        }\n\n        let heading_level = detect_heading_level(trimmed);\n\n        if details_depth > 0 {\n            // Inside an existing <details> block — pass through without collapsing\n            result.push_str(lines[i]);\n            result.push('\\n');\n            i += 1;\n            continue;\n        }\n\n        if let Some(level) = heading_level {\n            let heading_line = lines[i];\n            let closing = format!(\"</h{}>\", level);\n            let heading_text = if let Some(start) = heading_line.find('>') {\n                let after = &heading_line[start + 1..];\n                if let Some(end) = after.find(&closing) {\n                    strip_html_tags(&after[..end]).trim().to_string()\n                } else {\n                    String::new()\n                }\n            } else {\n                String::new()\n            };\n\n            // Scan forward: collect link-only content blocks\n            // Allowed: empty lines, link-only <p>, link-only <ul>, sub-headings\n            let mut j = i + 1;\n            let mut link_count = 0;\n            let mut content_indices: Vec<(usize, usize)> = Vec::new(); // (start, end) ranges\n            let mut all_link_only = true;\n\n            while j < lines.len() {\n                let next = lines[j].trim();\n\n                if next.is_empty() {\n                    content_indices.push((j, j + 1));\n                    j += 1;\n                    continue;\n                }\n\n                // Stop at same-or-higher-level heading\n                if let Some(next_level) = detect_heading_level(next) {\n                    if next_level <= level {\n                        break;\n                    }\n                    // Sub-heading within this section — include it\n                    content_indices.push((j, j + 1));\n                    j += 1;\n                    continue;\n                }\n\n                // Link-only paragraph\n                if is_link_only_paragraph(next) {\n                    link_count += next.matches(\"<a \").count();\n                    content_indices.push((j, j + 1));\n                    j += 1;\n                    continue;\n                }\n\n                // Link-only <ul> block\n                if next == \"<ul>\" {\n                    let ul_start = j;\n                    let mut ul_links = 0;\n                    let mut ul_ok = true;\n                    let mut k = j + 1;\n                    while k < lines.len() {\n                        let ul_line = lines[k].trim();\n                        if ul_line == \"</ul>\" {\n                            k += 1;\n                            break;\n                        }\n                        if ul_line.starts_with(\"<li>\") && ul_line.contains(\"<a \") {\n                            ul_links += 1;\n                        } else if ul_line.starts_with(\"<li>\") && ul_line != \"<li></li>\" {\n                            // Non-link list item with content\n                            let inner_text = strip_html_tags(ul_line);\n                            if !inner_text.trim().is_empty() {\n                                ul_ok = false;\n                            }\n                        }\n                        k += 1;\n                    }\n\n                    if ul_ok && ul_links > 0 {\n                        link_count += ul_links;\n                        content_indices.push((ul_start, k));\n                        j = k;\n                        continue;\n                    }\n\n                    all_link_only = false;\n                    break;\n                }\n\n                // Any other content → stop\n                all_link_only = false;\n                break;\n            }\n\n            if all_link_only && link_count > 3 && !heading_text.is_empty() {\n                result.push_str(&format!(\n                    \"<details>\\n<summary>{} ({} links)</summary>\\n\",\n                    html_escape(&heading_text),\n                    link_count\n                ));\n                for (start, end) in &content_indices {\n                    for line in lines.iter().take(*end).skip(*start) {\n                        result.push_str(line);\n                        result.push('\\n');\n                    }\n                }\n                result.push_str(\"</details>\\n\");\n                i = j;\n            } else {\n                result.push_str(heading_line);\n                result.push('\\n');\n                i += 1;\n            }\n        } else {\n            result.push_str(lines[i]);\n            result.push('\\n');\n            i += 1;\n        }\n    }\n\n    result\n}\n\n/// Check if an HTML line is a `<p>` containing only `<a>` links.\nfn is_link_only_paragraph(line: &str) -> bool {\n    let trimmed = line.trim();\n    if !trimmed.starts_with(\"<p>\") || !trimmed.ends_with(\"</p>\") {\n        return false;\n    }\n    let inner = &trimmed[3..trimmed.len() - 4];\n    if !inner.contains(\"<a \") {\n        return false;\n    }\n    let mut remaining = inner.to_string();\n    while let Some(start) = remaining.find(\"<a \") {\n        if let Some(end) = remaining[start..].find(\"</a>\") {\n            remaining = format!(\"{}{}\", &remaining[..start], &remaining[start + end + 4..]);\n        } else {\n            break;\n        }\n    }\n    remaining.trim().is_empty()\n}\n\n/// Find `<details>` blocks whose `<summary>` is \"Menu\" or \"Links\" and append link count.\nfn add_accordion_link_counts(html: &str) -> String {\n    let mut result = String::with_capacity(html.len() + 256);\n    let mut search_from = 0;\n\n    while let Some(details_start) = html[search_from..].find(\"<details>\") {\n        let abs_start = search_from + details_start;\n        // Copy everything before this <details>\n        result.push_str(&html[search_from..abs_start]);\n\n        // Find matching </details> (respecting nesting)\n        let after_tag = abs_start + \"<details>\".len();\n        let details_end = {\n            let mut depth = 1;\n            let mut scan = after_tag;\n            loop {\n                let next_open = html[scan..].find(\"<details>\");\n                let next_close = html[scan..].find(\"</details>\");\n                match next_close {\n                    Some(close_rel) => {\n                        if let Some(open_rel) = next_open\n                            && open_rel < close_rel\n                        {\n                            depth += 1;\n                            scan += open_rel + \"<details>\".len();\n                        } else {\n                            depth -= 1;\n                            scan += close_rel + \"</details>\".len();\n                            if depth == 0 {\n                                break;\n                            }\n                        }\n                    }\n                    None => break,\n                }\n            }\n            if depth == 0 { Some(scan) } else { None }\n        };\n        if let Some(details_end) = details_end {\n            let block = &html[abs_start..details_end];\n\n            // Check if summary is \"Menu\" or \"Links\"\n            if let Some(summary_start) = block.find(\"<summary>\")\n                && let Some(summary_end) = block.find(\"</summary>\")\n            {\n                let summary_text = &block[summary_start + \"<summary>\".len()..summary_end];\n                let trimmed_summary = summary_text.trim();\n\n                if trimmed_summary == \"Menu\" || trimmed_summary == \"Links\" {\n                    // Count <a> links inside the block\n                    let link_count = block.matches(\"<a \").count() + block.matches(\"<a\\n\").count();\n                    if link_count > 0 {\n                        // Rebuild block with count in summary\n                        let new_summary = format!(\"<summary>{} ({} links)</summary>\", trimmed_summary, link_count);\n                        let before_summary = &block[..summary_start];\n                        let after_summary = &block[summary_end + \"</summary>\".len()..];\n                        result.push_str(before_summary);\n                        result.push_str(&new_summary);\n                        result.push_str(after_summary);\n                        search_from = details_end;\n                        continue;\n                    }\n                }\n            }\n\n            // Not a Menu/Links accordion — emit as-is\n            result.push_str(block);\n            search_from = details_end;\n        } else {\n            // No closing </details> — emit rest as-is\n            result.push_str(&html[abs_start..]);\n            search_from = html.len();\n        }\n    }\n\n    // Copy remaining text\n    result.push_str(&html[search_from..]);\n    result\n}\n\nfn strip_html_tags(html: &str) -> String {\n    let mut result = String::with_capacity(html.len());\n    let mut in_tag = false;\n    for ch in html.chars() {\n        if ch == '<' {\n            in_tag = true;\n        } else if ch == '>' {\n            in_tag = false;\n        } else if !in_tag {\n            result.push(ch);\n        }\n    }\n    result\n}\n\nfn slugify(text: &str) -> String {\n    text.to_lowercase()\n        .chars()\n        .map(|c| {\n            if c.is_ascii_alphanumeric() {\n                c\n            } else if c == ' ' || c == '_' || c == '-' {\n                '-'\n            } else {\n                '\\0'\n            }\n        })\n        .filter(|c| *c != '\\0')\n        .collect::<String>()\n        .split('-')\n        .filter(|s| !s.is_empty())\n        .collect::<Vec<_>>()\n        .join(\"-\")\n}\n\nfn build_breadcrumb(request_path: &str) -> String {\n    let mut parts = vec![r#\"<a href=\"/\">Home</a>\"#.to_string()];\n\n    let clean = request_path\n        .trim_end_matches(\".md\")\n        .trim_end_matches(\"/index\")\n        .trim_end_matches('/');\n\n    if !clean.is_empty() {\n        let segments: Vec<&str> = clean.split('/').filter(|s| !s.is_empty()).collect();\n        let mut accumulated = String::new();\n        for (i, segment) in segments.iter().enumerate() {\n            accumulated.push('/');\n            accumulated.push_str(segment);\n            let display = title_case_segment(segment);\n            if i == segments.len() - 1 {\n                parts.push(format!(\"<span>{}</span>\", html_escape(&display)));\n            } else {\n                parts.push(format!(\n                    r#\"<a href=\"{}\">{}</a>\"#,\n                    html_escape(&accumulated),\n                    html_escape(&display)\n                ));\n            }\n        }\n    }\n\n    parts.join(\" / \")\n}\n\n/// Convert URL path segment to Title Case: \"marketing-sites\" → \"Marketing Sites\"\nfn title_case_segment(segment: &str) -> String {\n    segment\n        .split('-')\n        .map(|word| {\n            let mut chars = word.chars();\n            match chars.next() {\n                Some(c) => {\n                    let mut s = c.to_uppercase().to_string();\n                    s.extend(chars);\n                    s\n                }\n                None => String::new(),\n            }\n        })\n        .collect::<Vec<_>>()\n        .join(\" \")\n}\n\nfn html_escape(s: &str) -> String {\n    s.replace('&', \"&amp;\")\n        .replace('<', \"&lt;\")\n        .replace('>', \"&gt;\")\n        .replace('\"', \"&quot;\")\n}\n\n// ---- Directory listing ----\n\nfn directory_listing(dir_path: &Path, url_path: &str, is_markdown: bool) -> String {\n    let mut entries: Vec<(String, bool)> = Vec::new();\n\n    if let Ok(read_dir) = std::fs::read_dir(dir_path) {\n        for entry in read_dir.flatten() {\n            let name = entry.file_name().to_string_lossy().to_string();\n            let is_dir = entry.path().is_dir();\n            entries.push((name, is_dir));\n        }\n    }\n\n    // Directories first, then alphabetical\n    entries.sort_by(|a, b| match (a.1, b.1) {\n        (true, false) => std::cmp::Ordering::Less,\n        (false, true) => std::cmp::Ordering::Greater,\n        _ => a.0.to_lowercase().cmp(&b.0.to_lowercase()),\n    });\n\n    let url_base = if url_path.is_empty() {\n        String::new()\n    } else {\n        format!(\"/{}\", url_path.trim_end_matches('/'))\n    };\n\n    let mut items = String::new();\n\n    if !url_path.is_empty() {\n        items.push_str(\"<li class=\\\"dir\\\"><a href=\\\"..\\\">..</a></li>\\n\");\n    }\n\n    for (name, is_dir) in &entries {\n        let css_class = if *is_dir { \"dir\" } else { \"file\" };\n        let href = if *is_dir {\n            format!(\"{}/{}/\", url_base, name)\n        } else {\n            format!(\"{}/{}\", url_base, name)\n        };\n        let display = if *is_dir { format!(\"{}/\", name) } else { name.clone() };\n        items.push_str(&format!(\n            \"<li class=\\\"{}\\\"><a href=\\\"{}\\\">{}</a></li>\\n\",\n            css_class,\n            html_escape(&href),\n            html_escape(&display),\n        ));\n    }\n\n    let title = if url_path.is_empty() {\n        \"Index\".to_string()\n    } else {\n        format!(\"/{}\", url_path)\n    };\n\n    if is_markdown {\n        format!(\n            r#\"<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"utf-8\">\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n<title>{title} - Directory</title>\n<style>\n{css}\n</style>\n</head>\n<body>\n<div class=\"container\">\n<nav class=\"breadcrumb\"><a href=\"/\">Home</a></nav>\n<article class=\"markdown-body\">\n<h1>{title}</h1>\n<ul class=\"directory-listing\">\n{items}\n</ul>\n</article>\n</div>\n</body>\n</html>\"#,\n            title = html_escape(&title),\n            css = MARKDOWN_CSS,\n            items = items,\n        )\n    } else {\n        format!(\n            r#\"<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"utf-8\">\n<title>{title} - Directory</title>\n<style>\nbody {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; margin: 20px 40px; color: #24292e; }}\na {{ color: #0366d6; text-decoration: none; }}\na:hover {{ text-decoration: underline; }}\nul {{ list-style: none; padding: 0; }}\nli {{ padding: 4px 0; }}\nli.dir a::before {{ content: \"[ ] \"; font-family: monospace; }}\nli.file a::before {{ content: \"  - \"; font-family: monospace; }}\n</style>\n</head>\n<body>\n<h1>{title}</h1>\n<ul>\n{items}\n</ul>\n</body>\n</html>\"#,\n            title = html_escape(&title),\n            items = items,\n        )\n    }\n}\n\n// ---- CSS Theme ----\n\nconst MARKDOWN_CSS: &str = r##\"\n* { margin: 0; padding: 0; box-sizing: border-box; }\n\nbody {\n    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;\n    font-size: 15px;\n    line-height: 1.6;\n    color: #1f2328;\n    background: #fff;\n    -webkit-font-smoothing: antialiased;\n}\n\n.container {\n    max-width: 860px;\n    margin: 0 auto;\n    padding: 16px 32px 32px;\n}\n\n/* Breadcrumb & Toolbar */\n.breadcrumb {\n    display: flex;\n    justify-content: space-between;\n    align-items: center;\n    padding: 8px 0;\n    margin-bottom: 16px;\n    border-bottom: 1px solid #d1d9e0;\n    font-size: 13px;\n    color: #656d76;\n}\n.breadcrumb a { color: #0969da; text-decoration: none; }\n.breadcrumb a:hover { text-decoration: underline; }\n.breadcrumb-path span { color: #1f2328; font-weight: 500; }\n.toolbar { display: flex; gap: 4px; }\n.toolbar button {\n    background: none; border: 1px solid #d1d9e0; border-radius: 6px;\n    cursor: pointer; font-size: 16px; width: 32px; height: 28px;\n    display: flex; align-items: center; justify-content: center;\n    color: #656d76; transition: background .15s;\n}\n.toolbar button:hover { background: #f3f4f6; }\n\n/* Headings */\n.markdown-body h1, .markdown-body h2, .markdown-body h3,\n.markdown-body h4, .markdown-body h5, .markdown-body h6 {\n    margin-top: 1.5em;\n    margin-bottom: 0.5em;\n    font-weight: 600;\n    line-height: 1.3;\n    color: #1f2328;\n}\n.markdown-body h1 { font-size: 1.85em; padding-bottom: .25em; border-bottom: 1px solid #d1d9e0; margin-top: 0; }\n.markdown-body h2 { font-size: 1.4em; padding-bottom: .2em; border-bottom: 1px solid #d1d9e0; }\n.markdown-body h3 { font-size: 1.15em; }\n.markdown-body h4 { font-size: 1em; }\n.markdown-body h5, .markdown-body h6 { font-size: .9em; color: #656d76; }\n.markdown-body h1:first-child { margin-top: 0; }\n\n/* Heading anchor links */\n.markdown-body h1[id], .markdown-body h2[id], .markdown-body h3[id], .markdown-body h4[id] {\n    position: relative;\n}\n.markdown-body .heading-link,\n.markdown-body .heading-link:hover,\n.markdown-body .heading-link:visited,\n.markdown-body .heading-link:active {\n    color: inherit;\n    text-decoration: none;\n}\n.markdown-body h1[id]:hover::before, .markdown-body h2[id]:hover::before,\n.markdown-body h3[id]:hover::before, .markdown-body h4[id]:hover::before {\n    content: \"#\";\n    position: absolute;\n    left: -1.2em;\n    color: #0969da;\n    font-weight: 400;\n}\n\n/* Paragraphs and text */\n.markdown-body p { margin-bottom: 12px; }\n.markdown-body > p:last-child { margin-bottom: 0; }\n\n.markdown-body a { color: #0969da; text-decoration: none; }\n.markdown-body a:hover { text-decoration: underline; color: #0550ae; }\n.markdown-body a:visited { color: #6639ba; }\n\n.markdown-body strong { font-weight: 600; }\n.markdown-body em { font-style: italic; }\n.markdown-body del { color: #656d76; }\n\n/* Inline code */\n.markdown-body code {\n    padding: .15em .35em;\n    font-size: 84%;\n    background-color: #eff1f3;\n    border-radius: 4px;\n    font-family: ui-monospace, SFMono-Regular, 'SF Mono', Menlo, Consolas, 'Liberation Mono', monospace;\n    color: #1f2328;\n}\n\n/* Code blocks */\n.markdown-body pre {\n    padding: 14px 16px;\n    overflow: auto;\n    font-size: 84%;\n    line-height: 1.5;\n    background-color: #f6f8fa;\n    border-radius: 8px;\n    border: 1px solid #d1d9e0;\n    margin-bottom: 14px;\n}\n.markdown-body pre code {\n    display: block;\n    padding: 0;\n    overflow: visible;\n    background: transparent;\n    border: 0;\n    font-size: 100%;\n    color: inherit;\n    border-radius: 0;\n}\n\n/* Tables */\n.markdown-body table {\n    border-collapse: collapse;\n    width: 100%;\n    margin-bottom: 14px;\n    display: block;\n    overflow-x: auto;\n    font-size: 14px;\n}\n.markdown-body th, .markdown-body td {\n    padding: 6px 12px;\n    border: 1px solid #d1d9e0;\n}\n.markdown-body th {\n    font-weight: 600;\n    background-color: #f6f8fa;\n    text-align: left;\n}\n.markdown-body tr:nth-child(2n) { background-color: #f6f8fa; }\n\n/* Blockquotes */\n.markdown-body blockquote {\n    padding: 4px 16px;\n    color: #656d76;\n    border-left: 3px solid #d1d9e0;\n    margin-bottom: 12px;\n}\n.markdown-body blockquote p { margin-bottom: 4px; }\n.markdown-body blockquote p:last-child { margin-bottom: 0; }\n\n/* Lists — compact */\n.markdown-body ul, .markdown-body ol {\n    padding-left: 1.5em;\n    margin-bottom: 10px;\n}\n.markdown-body li {\n    margin: 1px 0;\n    line-height: 1.5;\n}\n.markdown-body li > p { margin-bottom: 4px; }\n.markdown-body li > ul, .markdown-body li > ol {\n    margin-top: 2px;\n    margin-bottom: 2px;\n}\n\n/* Task lists */\n.markdown-body input[type=\"checkbox\"] {\n    margin-right: .4em;\n    vertical-align: middle;\n    position: relative;\n    top: -1px;\n}\n\n/* Images */\n.markdown-body img {\n    max-width: 100%;\n    height: auto;\n    border-style: none;\n    border-radius: 6px;\n}\n\n/* Horizontal rules */\n.markdown-body hr {\n    height: 2px;\n    padding: 0;\n    margin: 20px 0;\n    background-color: #d1d9e0;\n    border: 0;\n}\n\n/* Accordions (details/summary) */\n.markdown-body details {\n    margin: 12px 0;\n    border: 1px solid #d1d9e0;\n    border-radius: 8px;\n    padding: 0;\n    overflow: hidden;\n}\n.markdown-body details summary {\n    cursor: pointer;\n    font-weight: 600;\n    font-size: 14px;\n    padding: 8px 14px;\n    background-color: #f6f8fa;\n    user-select: none;\n    list-style: none;\n    display: flex;\n    align-items: center;\n    gap: 6px;\n}\n.markdown-body details summary::before {\n    content: \"▶\";\n    font-size: 10px;\n    color: #656d76;\n    transition: transform .15s ease;\n    display: inline-block;\n    flex-shrink: 0;\n}\n.markdown-body details[open] summary::before {\n    transform: rotate(90deg);\n}\n.markdown-body details summary::-webkit-details-marker { display: none; }\n.markdown-body details[open] summary {\n    border-bottom: 1px solid #d1d9e0;\n}\n.markdown-body details summary:hover {\n    background-color: #eaeef2;\n}\n.markdown-body details > :not(summary) {\n    padding: 0 14px;\n}\n.markdown-body details > p:first-of-type {\n    margin-top: 10px;\n}\n.markdown-body details > ul, .markdown-body details > ol {\n    padding: 8px 14px 6px 32px;\n}\n.markdown-body details > ul li, .markdown-body details > ol li {\n    font-size: 14px;\n}\n\n/* Callout boxes (Tip, Note, Caution, etc.) */\n.callout {\n    margin: 14px 0;\n    padding: 12px 16px;\n    border-radius: 8px;\n    border-left: 4px solid;\n    font-size: 14px;\n}\n.callout-note {\n    background-color: #ddf4ff;\n    border-left-color: #0969da;\n}\n.callout-tip {\n    background-color: #dafbe1;\n    border-left-color: #1a7f37;\n}\n.callout-warning {\n    background-color: #fff8c5;\n    border-left-color: #9a6700;\n}\n.callout .callout-title {\n    font-weight: 600;\n    margin-bottom: 4px;\n    font-size: 14px;\n}\n.callout p { margin-bottom: 4px; }\n.callout p:last-child { margin-bottom: 0; }\n\n/* Directory listing */\n.directory-listing { list-style: none; padding: 0 !important; }\n.directory-listing li {\n    padding: 5px 8px;\n    border-bottom: 1px solid #f0f2f4;\n}\n.directory-listing li:last-child { border-bottom: none; }\n.directory-listing li a {\n    display: block;\n    text-decoration: none;\n    color: #0969da;\n}\n.directory-listing li a:hover { text-decoration: underline; }\n.directory-listing li.dir a { font-weight: 600; color: #1f2328; }\n.directory-listing li.dir a::before { content: \"📁  \"; }\n.directory-listing li.file a::before { content: \"📄  \"; }\n\n/* Footer */\nfooter {\n    margin-top: 32px;\n    padding-top: 12px;\n    border-top: 1px solid #d1d9e0;\n    font-size: 12px;\n    color: #656d76;\n}\nfooter a { color: #0969da; text-decoration: none; }\nfooter a:hover { text-decoration: underline; }\n\n/* Selection highlight */\n::selection { background-color: #dbe9f9; }\n\n/* Smooth scroll for anchor links */\nhtml { scroll-behavior: smooth; }\n\n/* Wide mode */\nhtml.wide .container { max-width: 100%; }\n\n/* Dark mode */\nhtml.dark body { background: #0d1117; color: #e6edf3; }\nhtml.dark .breadcrumb { border-color: #30363d; color: #8b949e; }\nhtml.dark .breadcrumb a { color: #58a6ff; }\nhtml.dark .breadcrumb-path span { color: #e6edf3; }\nhtml.dark .toolbar button { border-color: #30363d; color: #8b949e; }\nhtml.dark .toolbar button:hover { background: #21262d; }\nhtml.dark .markdown-body { color: #e6edf3; }\nhtml.dark .markdown-body h1,\nhtml.dark .markdown-body h2,\nhtml.dark .markdown-body h3,\nhtml.dark .markdown-body h4 { color: #e6edf3; }\nhtml.dark .markdown-body h1, html.dark .markdown-body h2 { border-color: #30363d; }\nhtml.dark .markdown-body h5, html.dark .markdown-body h6 { color: #8b949e; }\nhtml.dark .markdown-body a { color: #58a6ff; }\nhtml.dark .markdown-body a:hover { color: #79c0ff; }\nhtml.dark .markdown-body a:visited { color: #bc8cff; }\nhtml.dark .markdown-body .heading-link,\nhtml.dark .markdown-body .heading-link:hover,\nhtml.dark .markdown-body .heading-link:visited { color: inherit; }\nhtml.dark .markdown-body h1[id]:hover::before,\nhtml.dark .markdown-body h2[id]:hover::before,\nhtml.dark .markdown-body h3[id]:hover::before,\nhtml.dark .markdown-body h4[id]:hover::before { color: #58a6ff; }\nhtml.dark .markdown-body code {\n    background-color: #161b22; color: #e6edf3; border-color: #30363d;\n}\nhtml.dark .markdown-body pre {\n    background-color: #161b22; border-color: #30363d;\n}\nhtml.dark .markdown-body pre code { background: transparent; }\nhtml.dark .markdown-body blockquote { border-color: #30363d; color: #8b949e; }\nhtml.dark .markdown-body table th { background-color: #161b22; border-color: #30363d; color: #e6edf3; }\nhtml.dark .markdown-body table td { border-color: #30363d; }\nhtml.dark .markdown-body tr:nth-child(2n) { background-color: #161b22; }\nhtml.dark .markdown-body hr { background-color: #30363d; }\nhtml.dark .markdown-body img { opacity: .85; }\nhtml.dark .markdown-body del { color: #8b949e; }\nhtml.dark .markdown-body details { border-color: #30363d; }\nhtml.dark .markdown-body details summary { color: #e6edf3; background: #161b22; }\nhtml.dark .markdown-body .callout { border-color: #30363d; background: #161b22; }\nhtml.dark .markdown-body .callout-title { color: #e6edf3; }\nhtml.dark footer { border-color: #30363d; color: #8b949e; }\nhtml.dark footer a { color: #58a6ff; }\nhtml.dark ::selection { background-color: #1f3a5f; }\n\n/* Responsive */\n@media (max-width: 768px) {\n    .container { padding: 12px 16px 24px; }\n    .markdown-body h1 { font-size: 1.5em; }\n    .markdown-body h2 { font-size: 1.25em; }\n    .markdown-body pre { font-size: 80%; padding: 10px 12px; }\n}\n\n/* Print */\n@media print {\n    .breadcrumb, footer, .toolbar { display: none; }\n    .markdown-body details { border: none; }\n    .markdown-body details > summary { display: none; }\n    .markdown-body details > * { display: block !important; }\n    .markdown-body a { color: inherit; text-decoration: underline; }\n    .markdown-body a::after { content: \" (\" attr(href) \")\"; font-size: 80%; color: #666; }\n}\n\"##;\n"
  },
  {
    "path": "src/types.rs",
    "content": "// SiteOne Crawler - Type definitions\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\nuse serde::{Deserialize, Serialize};\r\nuse std::fmt;\r\n\r\nuse crate::error::CrawlerError;\r\n\r\n// ---------------------------------------------------------------------------\r\n// DeviceType\r\n// ---------------------------------------------------------------------------\r\n\r\n#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]\r\n#[serde(rename_all = \"lowercase\")]\r\npub enum DeviceType {\r\n    Desktop,\r\n    Mobile,\r\n    Tablet,\r\n}\r\n\r\nimpl DeviceType {\r\n    pub fn from_text(text: &str) -> Result<Self, CrawlerError> {\r\n        match text.trim().to_lowercase().as_str() {\r\n            \"desktop\" => Ok(DeviceType::Desktop),\r\n            \"mobile\" => Ok(DeviceType::Mobile),\r\n            \"tablet\" => Ok(DeviceType::Tablet),\r\n            other => Err(CrawlerError::Config(format!(\r\n                \"Unknown device type '{}'. Supported values are: {}\",\r\n                other,\r\n                Self::available_text_types().join(\", \")\r\n            ))),\r\n        }\r\n    }\r\n\r\n    pub fn available_text_types() -> Vec<&'static str> {\r\n        vec![\"desktop\", \"mobile\", \"tablet\"]\r\n    }\r\n\r\n    pub fn as_str(&self) -> &'static str {\r\n        match self {\r\n            DeviceType::Desktop => \"desktop\",\r\n            DeviceType::Mobile => \"mobile\",\r\n            DeviceType::Tablet => \"tablet\",\r\n        }\r\n    }\r\n}\r\n\r\nimpl fmt::Display for DeviceType {\r\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\r\n        f.write_str(self.as_str())\r\n    }\r\n}\r\n\r\n// ---------------------------------------------------------------------------\r\n// AssetType\r\n// ---------------------------------------------------------------------------\r\n\r\n#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]\r\n#[serde(rename_all = \"lowercase\")]\r\npub enum AssetType {\r\n    Fonts,\r\n    Images,\r\n    Styles,\r\n    Scripts,\r\n    Files,\r\n}\r\n\r\nimpl AssetType {\r\n    pub fn from_text(text: &str) -> Result<Self, CrawlerError> {\r\n        match text.trim().to_lowercase().as_str() {\r\n            \"fonts\" => Ok(AssetType::Fonts),\r\n            \"images\" => Ok(AssetType::Images),\r\n            \"styles\" => Ok(AssetType::Styles),\r\n            \"scripts\" => Ok(AssetType::Scripts),\r\n            \"files\" => Ok(AssetType::Files),\r\n            other => Err(CrawlerError::Config(format!(\r\n                \"Unknown asset type '{}'. Supported values are: {}\",\r\n                other,\r\n                Self::available_text_types().join(\", \")\r\n            ))),\r\n        }\r\n    }\r\n\r\n    pub fn available_text_types() -> Vec<&'static str> {\r\n        vec![\"fonts\", \"images\", \"styles\", \"scripts\", \"files\"]\r\n    }\r\n\r\n    pub fn as_str(&self) -> &'static str {\r\n        match self {\r\n            AssetType::Fonts => \"fonts\",\r\n            AssetType::Images => \"images\",\r\n            AssetType::Styles => \"styles\",\r\n            AssetType::Scripts => \"scripts\",\r\n            AssetType::Files => \"files\",\r\n        }\r\n    }\r\n}\r\n\r\nimpl fmt::Display for AssetType {\r\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\r\n        f.write_str(self.as_str())\r\n    }\r\n}\r\n\r\n// ---------------------------------------------------------------------------\r\n// ContentTypeId\r\n// ---------------------------------------------------------------------------\r\n\r\n#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]\r\n#[repr(i32)]\r\npub enum ContentTypeId {\r\n    Html = 1,\r\n    Script = 2,\r\n    Stylesheet = 3,\r\n    Image = 4,\r\n    Video = 5,\r\n    Font = 6,\r\n    Document = 7,\r\n    Json = 8,\r\n    Redirect = 9,\r\n    Other = 10,\r\n    Audio = 11,\r\n    Xml = 12,\r\n}\r\n\r\nimpl ContentTypeId {\r\n    pub fn from_i32(value: i32) -> Option<Self> {\r\n        match value {\r\n            1 => Some(ContentTypeId::Html),\r\n            2 => Some(ContentTypeId::Script),\r\n            3 => Some(ContentTypeId::Stylesheet),\r\n            4 => Some(ContentTypeId::Image),\r\n            5 => Some(ContentTypeId::Video),\r\n            6 => Some(ContentTypeId::Font),\r\n            7 => Some(ContentTypeId::Document),\r\n            8 => Some(ContentTypeId::Json),\r\n            9 => Some(ContentTypeId::Redirect),\r\n            10 => Some(ContentTypeId::Other),\r\n            11 => Some(ContentTypeId::Audio),\r\n            12 => Some(ContentTypeId::Xml),\r\n            _ => None,\r\n        }\r\n    }\r\n\r\n    pub fn name(&self) -> &'static str {\r\n        match self {\r\n            ContentTypeId::Html => \"HTML\",\r\n            ContentTypeId::Script => \"JS\",\r\n            ContentTypeId::Stylesheet => \"CSS\",\r\n            ContentTypeId::Image => \"Image\",\r\n            ContentTypeId::Audio => \"Audio\",\r\n            ContentTypeId::Video => \"Video\",\r\n            ContentTypeId::Font => \"Font\",\r\n            ContentTypeId::Document => \"Document\",\r\n            ContentTypeId::Json => \"JSON\",\r\n            ContentTypeId::Xml => \"XML\",\r\n            ContentTypeId::Redirect => \"Redirect\",\r\n            ContentTypeId::Other => \"Other\",\r\n        }\r\n    }\r\n}\r\n\r\nimpl fmt::Display for ContentTypeId {\r\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\r\n        f.write_str(self.name())\r\n    }\r\n}\r\n\r\n// ---------------------------------------------------------------------------\r\n// SkippedReason\r\n// ---------------------------------------------------------------------------\r\n\r\n#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]\r\n#[repr(i32)]\r\npub enum SkippedReason {\r\n    NotAllowedHost = 1,\r\n    RobotsTxt = 2,\r\n    ExceedsMaxDepth = 3,\r\n}\r\n\r\nimpl SkippedReason {\r\n    pub fn from_i32(value: i32) -> Option<Self> {\r\n        match value {\r\n            1 => Some(SkippedReason::NotAllowedHost),\r\n            2 => Some(SkippedReason::RobotsTxt),\r\n            3 => Some(SkippedReason::ExceedsMaxDepth),\r\n            _ => None,\r\n        }\r\n    }\r\n\r\n    pub fn description(&self) -> &'static str {\r\n        match self {\r\n            SkippedReason::NotAllowedHost => \"Not allowed host\",\r\n            SkippedReason::RobotsTxt => \"Robots.txt\",\r\n            SkippedReason::ExceedsMaxDepth => \"Exceeds max depth\",\r\n        }\r\n    }\r\n}\r\n\r\nimpl fmt::Display for SkippedReason {\r\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\r\n        f.write_str(self.description())\r\n    }\r\n}\r\n\r\n// ---------------------------------------------------------------------------\r\n// OutputType\r\n// ---------------------------------------------------------------------------\r\n\r\n#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]\r\n#[serde(rename_all = \"lowercase\")]\r\npub enum OutputType {\r\n    Text,\r\n    Json,\r\n    Multi,\r\n}\r\n\r\nimpl OutputType {\r\n    pub fn from_text(text: &str) -> Result<Self, CrawlerError> {\r\n        match text.trim().to_lowercase().as_str() {\r\n            \"text\" => Ok(OutputType::Text),\r\n            \"json\" => Ok(OutputType::Json),\r\n            other => Err(CrawlerError::Config(format!(\r\n                \"Unknown output type '{}'. Supported values are: {}\",\r\n                other,\r\n                Self::available_text_types().join(\", \")\r\n            ))),\r\n        }\r\n    }\r\n\r\n    pub fn available_text_types() -> Vec<&'static str> {\r\n        vec![\"text\", \"json\"]\r\n    }\r\n\r\n    pub fn as_str(&self) -> &'static str {\r\n        match self {\r\n            OutputType::Text => \"text\",\r\n            OutputType::Json => \"json\",\r\n            OutputType::Multi => \"multi\",\r\n        }\r\n    }\r\n}\r\n\r\nimpl fmt::Display for OutputType {\r\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\r\n        f.write_str(self.as_str())\r\n    }\r\n}\r\n"
  },
  {
    "path": "src/utils.rs",
    "content": "// SiteOne Crawler - Utilities\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::sync::RwLock;\n\nuse regex::Regex;\n\nuse crate::types::ContentTypeId;\n\nstatic FORCED_COLOR_SETUP: RwLock<Option<bool>> = RwLock::new(None);\n\n/// Check if a string looks like a regex pattern delimited by one of / # ~ %\n/// e.g. \"/pattern/flags\" or \"#pattern#i\"\npub fn is_regex_pattern(s: &str) -> bool {\n    if s.len() < 2 {\n        return false;\n    }\n    let first = s.as_bytes()[0];\n    if !matches!(first, b'/' | b'#' | b'~' | b'%') {\n        return false;\n    }\n    // Find the last occurrence of the delimiter\n    if let Some(last_delim_pos) = s[1..].rfind(first as char) {\n        let last_delim_pos = last_delim_pos + 1; // adjust for the slice offset\n        // Everything after the last delimiter should be flags (a-z only)\n        let flags = &s[last_delim_pos + 1..];\n        flags.chars().all(|c| c.is_ascii_lowercase())\n    } else {\n        false\n    }\n}\n/// Extract the inner regex pattern from a PCRE-delimited string (e.g., /pattern/flags).\n/// If the string is not delimited, returns it as-is.\n/// Converts PCRE flags like 'i' to Rust regex inline flags like (?i).\npub fn extract_pcre_regex_pattern(s: &str) -> String {\n    if is_regex_pattern(s) {\n        let delimiter = s.as_bytes()[0] as char;\n        let rest = &s[1..];\n        if let Some(end_pos) = rest.rfind(delimiter) {\n            let pattern = &rest[..end_pos];\n            let flags = &rest[end_pos + 1..];\n            let mut regex_pattern = String::new();\n            if flags.contains('i') {\n                regex_pattern.push_str(\"(?i)\");\n            }\n            regex_pattern.push_str(pattern);\n            return regex_pattern;\n        }\n    }\n    s.to_string()\n}\n\nstatic FORCED_CONSOLE_WIDTH: RwLock<Option<usize>> = RwLock::new(None);\n\npub const IMG_SRC_TRANSPARENT_1X1_GIF: &str =\n    \"data:image/gif;base64,R0lGODlhAQABAIAAAP///wAAACH5BAEAAAAALAAAAAABAAEAAAICRAEAOw==\";\n\npub fn disable_colors() {\n    if let Ok(mut v) = FORCED_COLOR_SETUP.write() {\n        *v = Some(false);\n    }\n}\n\npub fn force_enabled_colors() {\n    if let Ok(mut v) = FORCED_COLOR_SETUP.write() {\n        *v = Some(true);\n    }\n}\n\npub fn set_forced_console_width(width: usize) {\n    if let Ok(mut v) = FORCED_CONSOLE_WIDTH.write() {\n        *v = Some(width);\n    }\n}\n\npub fn get_formatted_size(bytes: i64, precision: usize) -> String {\n    let units = [\"B\", \"kB\", \"MB\", \"GB\", \"TB\", \"PB\", \"EB\", \"ZB\", \"YB\"];\n\n    let bytes_f = (bytes.max(0)) as f64;\n    let pow = if bytes_f > 0.0 {\n        (bytes_f.ln() / 1024_f64.ln()).floor() as usize\n    } else {\n        0\n    };\n    let pow = pow.min(units.len() - 1);\n\n    let value = bytes_f / 1024_f64.powi(pow as i32);\n    let rounded = format!(\"{:.prec$}\", value, prec = precision);\n\n    format!(\"{} {}\", rounded, units[pow])\n}\n\npub fn get_formatted_duration(duration: f64) -> String {\n    if duration < 1.0 {\n        let ms = duration * 1000.0;\n        format!(\"{} ms\", ms as i64)\n    } else if duration < 10.0 {\n        let formatted = format!(\"{:.1}\", duration);\n        let formatted = formatted.trim_end_matches(\".0\");\n        format!(\"{} s\", formatted)\n    } else {\n        format!(\"{} s\", duration as i64)\n    }\n}\n\npub fn get_formatted_age(age: i64) -> String {\n    if age < 60 {\n        format!(\"{} sec(s)\", age)\n    } else if age < 3600 {\n        format!(\n            \"{} min(s)\",\n            strip_trailing_dot_zero(&format!(\"{:.1}\", age as f64 / 60.0))\n        )\n    } else if age < 86400 {\n        format!(\n            \"{} hour(s)\",\n            strip_trailing_dot_zero(&format!(\"{:.1}\", age as f64 / 3600.0))\n        )\n    } else {\n        format!(\n            \"{} day(s)\",\n            strip_trailing_dot_zero(&format!(\"{:.1}\", age as f64 / 86400.0))\n        )\n    }\n}\n\n/// Strip trailing \".0\" from formatted numbers.\nfn strip_trailing_dot_zero(s: &str) -> String {\n    s.strip_suffix(\".0\").unwrap_or(s).to_string()\n}\n\npub fn get_formatted_cache_lifetime(seconds: i64) -> String {\n    if seconds < 60 {\n        format!(\"{} s\", seconds)\n    } else if seconds <= 3600 {\n        format!(\"{} min\", seconds / 60)\n    } else if seconds <= 86400 {\n        format!(\"{} h\", seconds / 3600)\n    } else if seconds <= 86400 * 90 {\n        format!(\"{} d\", seconds / 86400)\n    } else if seconds <= 86400 * 365 * 2 {\n        format!(\"{} mon\", (seconds as f64 / 86400.0 / 30.0).round() as i64)\n    } else {\n        format!(\"{:.1} y\", seconds as f64 / 31536000.0)\n    }\n}\n\npub fn get_color_text(text: &str, color: &str, set_background: bool) -> String {\n    // Check forced color setup\n    let forced = FORCED_COLOR_SETUP.read().ok().and_then(|v| *v);\n    match forced {\n        Some(false) => return text.to_string(),\n        Some(true) => {}\n        None => {\n            // Check if stdout is a TTY\n            if !atty_is_tty() {\n                return text.to_string();\n            }\n        }\n    }\n\n    let fg_colors: &[(&str, &str)] = &[\n        (\"black\", \"0;30\"),\n        (\"red\", \"0;31\"),\n        (\"green\", \"0;32\"),\n        (\"yellow\", \"0;33\"),\n        (\"blue\", \"0;34\"),\n        (\"magenta\", \"0;35\"),\n        (\"cyan\", \"0;36\"),\n        (\"white\", \"0;37\"),\n        (\"gray\", \"38;5;244\"),\n        (\"dark-gray\", \"38;5;240\"),\n    ];\n\n    let bg_colors: &[(&str, &str)] = &[\n        (\"black\", \"1;40\"),\n        (\"red\", \"1;41\"),\n        (\"green\", \"1;42\"),\n        (\"yellow\", \"1;43\"),\n        (\"blue\", \"1;44\"),\n        (\"magenta\", \"1;45\"),\n        (\"cyan\", \"1;46\"),\n        (\"white\", \"1;47\"),\n    ];\n\n    let code = if set_background {\n        bg_colors\n            .iter()\n            .find(|(name, _)| *name == color)\n            .map(|(_, code)| *code)\n            .unwrap_or(\"0\")\n    } else {\n        fg_colors\n            .iter()\n            .find(|(name, _)| *name == color)\n            .map(|(_, code)| *code)\n            .unwrap_or(\"0\")\n    };\n\n    format!(\"\\x1b[{}m{}\\x1b[0m\", code, text)\n}\n\nfn atty_is_tty() -> bool {\n    // Simple check using libc isatty\n    unsafe { libc_isatty(1) != 0 }\n}\n\nunsafe extern \"C\" {\n    fn isatty(fd: i32) -> i32;\n}\n\nunsafe fn libc_isatty(fd: i32) -> i32 {\n    unsafe { isatty(fd) }\n}\n\npub fn convert_bash_colors_in_text_to_html(text: &str) -> String {\n    use once_cell::sync::Lazy;\n    static RE_BASH_COLORS: Lazy<Regex> = Lazy::new(|| Regex::new(r\"\\x1b\\[(.*?)m(.*?)\\x1b\\[0m\").unwrap());\n    let re = &*RE_BASH_COLORS;\n\n    re.replace_all(text, |caps: &regex::Captures| {\n        let styles_str = caps.get(1).map_or(\"\", |m| m.as_str());\n        let content = caps.get(2).map_or(\"\", |m| m.as_str());\n\n        let styles: Vec<&str> = styles_str.split(';').collect();\n        let mut font_color: Option<&str> = None;\n        let mut background_color: Option<&str> = None;\n\n        for style in &styles {\n            if [\"30\", \"31\", \"32\", \"33\", \"34\", \"35\", \"36\", \"37\"].contains(style) {\n                font_color = Some(style);\n            } else if [\"40\", \"41\", \"42\", \"43\", \"44\", \"45\", \"46\", \"47\"].contains(style) {\n                background_color = Some(style);\n            }\n        }\n\n        let mut css_style = String::new();\n        if let Some(fc) = font_color {\n            css_style.push_str(&format!(\"color: {};\", get_html_color_by_bash_color(fc)));\n        }\n        if let Some(bc) = background_color {\n            css_style.push_str(&format!(\"background-color: {};\", get_html_color_by_bash_color(bc)));\n        }\n\n        if !css_style.is_empty() {\n            format!(\"<span style=\\\"{}\\\">{}</span>\", css_style.trim_end_matches(';'), content)\n        } else {\n            content.to_string()\n        }\n    })\n    .to_string()\n}\n\nfn get_html_color_by_bash_color(color: &str) -> &'static str {\n    match color {\n        \"30\" | \"40\" => \"#000000\",\n        \"31\" | \"41\" => \"#e3342f\",\n        \"32\" | \"42\" => \"#38c172\",\n        \"33\" | \"43\" => \"#ffff00\",\n        \"34\" | \"44\" => \"#2563EB\",\n        \"35\" | \"45\" => \"#ff00ff\",\n        \"36\" | \"46\" => \"#00ffff\",\n        \"37\" | \"47\" => \"#ffffff\",\n        _ => \"#000000\",\n    }\n}\n\npub fn truncate_in_two_thirds(\n    text: &str,\n    max_length: usize,\n    placeholder: &str,\n    forced_coloring: Option<bool>,\n) -> String {\n    let char_count = text.chars().count();\n    if char_count <= max_length {\n        return text.to_string();\n    }\n\n    let placeholder_len = placeholder.chars().count();\n    let first_part_length = ((max_length as f64) * (2.0 / 3.0)).ceil() as usize;\n    let second_part_length = if max_length > first_part_length + placeholder_len {\n        max_length - first_part_length - placeholder_len\n    } else {\n        0\n    };\n\n    let first_part: String = text.chars().take(first_part_length).collect();\n    let second_part: String = text\n        .chars()\n        .rev()\n        .take(second_part_length)\n        .collect::<Vec<_>>()\n        .into_iter()\n        .rev()\n        .collect();\n\n    let final_placeholder = match forced_coloring {\n        Some(true) | None => get_color_text(placeholder, \"red\", false),\n        Some(false) => placeholder.to_string(),\n    };\n\n    format!(\"{}{}{}\", first_part.trim(), final_placeholder, second_part.trim())\n}\n\npub fn truncate_url(\n    url: &str,\n    max_length: usize,\n    placeholder: &str,\n    strip_hostname: Option<&str>,\n    scheme_of_hostname_to_strip: Option<&str>,\n    forced_coloring: Option<bool>,\n) -> String {\n    let mut url = url.to_string();\n\n    if let Some(hostname) = strip_hostname {\n        if let Some(scheme) = scheme_of_hostname_to_strip {\n            let full = format!(\"{}://{}\", scheme, hostname);\n            url = url.replace(&full, \"\");\n        } else {\n            let http = format!(\"http://{}\", hostname);\n            let https = format!(\"https://{}\", hostname);\n            url = url.replace(&http, \"\").replace(&https, \"\");\n        }\n    }\n\n    if url.chars().count() > max_length {\n        url = truncate_in_two_thirds(&url, max_length, placeholder, forced_coloring);\n    }\n\n    url\n}\n\npub fn get_progress_bar(done: usize, total: usize, segments: usize) -> String {\n    let percentage = (done as f64 / total as f64) * 100.0;\n    let filled_segments = ((done as f64 / total as f64) * segments as f64).round() as usize;\n    let empty_segments = segments.saturating_sub(filled_segments);\n\n    format!(\n        \"{:>5}|{}{}|\",\n        format!(\"{}%\", percentage as i64),\n        \">\".repeat(filled_segments),\n        \" \".repeat(empty_segments),\n    )\n}\n\npub fn remove_ansi_colors(text: &str) -> String {\n    use once_cell::sync::Lazy;\n    static RE_ANSI: Lazy<Regex> = Lazy::new(|| Regex::new(r\"\\x1b\\[\\d+(;\\d+)*m\").unwrap());\n    RE_ANSI.replace_all(text, \"\").to_string()\n}\n\npub fn get_http_client_code_with_error_description(http_code: i32, short_version: bool) -> String {\n    match http_code {\n        -1 => {\n            if short_version {\n                \"-1:CON\".to_string()\n            } else {\n                \"-1:CONN-FAIL\".to_string()\n            }\n        }\n        -2 => {\n            if short_version {\n                \"-2:TIM\".to_string()\n            } else {\n                \"-2:TIMEOUT\".to_string()\n            }\n        }\n        -3 => {\n            if short_version {\n                \"-3:RST\".to_string()\n            } else {\n                \"-3:SRV-RESET\".to_string()\n            }\n        }\n        -4 => {\n            if short_version {\n                \"-4:SND\".to_string()\n            } else {\n                \"-4:SEND-ERROR\".to_string()\n            }\n        }\n        -6 => {\n            if short_version {\n                \"-6:SKP\".to_string()\n            } else {\n                \"-6:SKIPPED\".to_string()\n            }\n        }\n        code => code.to_string(),\n    }\n}\n\npub fn get_console_width() -> usize {\n    let forced = FORCED_CONSOLE_WIDTH.read().ok().and_then(|v| *v);\n    if let Some(w) = forced {\n        return w;\n    }\n\n    if let Some((terminal_size::Width(w), _)) = terminal_size::terminal_size() {\n        return (w as usize).max(100);\n    }\n\n    138\n}\n\npub fn get_url_without_scheme_and_host(\n    url: &str,\n    only_when_host: Option<&str>,\n    initial_scheme: Option<&str>,\n) -> String {\n    if let Some(host) = only_when_host {\n        let host_marker = format!(\"://{}\", host);\n        if !url.contains(&host_marker) {\n            return url.to_string();\n        }\n    }\n\n    if let Some(scheme) = initial_scheme {\n        let prefix = format!(\"{}://\", scheme);\n        if !url.starts_with(&prefix) {\n            return url.to_string();\n        }\n    }\n\n    if let Ok(parsed) = url::Url::parse(url) {\n        let path = parsed.path();\n        if let Some(query) = parsed.query() {\n            format!(\"{}?{}\", path, query)\n        } else {\n            path.to_string()\n        }\n    } else {\n        url.to_string()\n    }\n}\n\npub fn get_safe_command(command: &str) -> String {\n    let patterns = [\n        (r\"(pass[a-z]{0,5})=\\S+\", \"$1=***\"),\n        (r\"(keys?)=\\S+\", \"$1=***\"),\n        (r\"(secrets?)=\\S+\", \"$1=***\"),\n        (r\"(auth)=\\S+\", \"$1=***\"),\n    ];\n\n    let mut result = command.to_string();\n    for (pattern, replacement) in &patterns {\n        if let Ok(re) = Regex::new(pattern) {\n            result = re.replace_all(&result, *replacement).to_string();\n        }\n    }\n    result\n}\n\npub fn get_colored_request_time(request_time: f64, str_pad_to: usize) -> String {\n    let formatted = get_formatted_duration(request_time);\n    let padded = format!(\"{:<width$}\", formatted, width = str_pad_to);\n\n    if request_time >= 2.0 {\n        get_color_text(&padded, \"red\", true)\n    } else if request_time >= 1.0 {\n        get_color_text(&padded, \"magenta\", true)\n    } else if request_time >= 0.5 {\n        get_color_text(&padded, \"yellow\", false)\n    } else {\n        get_color_text(&padded, \"green\", false)\n    }\n}\n\npub fn get_colored_status_code(status_code: i32, str_pad_to: usize) -> String {\n    if (200..300).contains(&status_code) {\n        get_color_text(&format!(\"{:<width$}\", status_code, width = str_pad_to), \"green\", false)\n    } else if (300..400).contains(&status_code) {\n        get_color_text(&format!(\"{:<width$}\", status_code, width = str_pad_to), \"yellow\", true)\n    } else if (400..500).contains(&status_code) {\n        get_color_text(&format!(\"{:<width$}\", status_code, width = str_pad_to), \"magenta\", true)\n    } else if (500..600).contains(&status_code) {\n        get_color_text(&format!(\"{:<width$}\", status_code, width = str_pad_to), \"red\", true)\n    } else {\n        get_color_text(\n            &format!(\n                \"{:<width$}\",\n                get_http_client_code_with_error_description(status_code, true),\n                width = str_pad_to\n            ),\n            \"red\",\n            true,\n        )\n    }\n}\n\npub fn get_colored_severity(severity: &str) -> String {\n    match severity {\n        \"critical\" => get_color_text(severity, \"red\", true),\n        \"warning\" => get_color_text(severity, \"magenta\", true),\n        \"notice\" => get_color_text(severity, \"blue\", false),\n        _ => get_color_text(severity, \"green\", false),\n    }\n}\n\npub fn get_colored_criticals(criticals: i32, str_pad_to: usize) -> String {\n    if criticals == 0 {\n        criticals.to_string()\n    } else {\n        get_color_text(&format!(\"{:<width$}\", criticals, width = str_pad_to), \"red\", true)\n    }\n}\n\npub fn get_colored_warnings(warnings: i32, str_pad_to: usize) -> String {\n    if warnings == 0 {\n        warnings.to_string()\n    } else {\n        get_color_text(&format!(\"{:<width$}\", warnings, width = str_pad_to), \"magenta\", false)\n    }\n}\n\npub fn get_colored_notices(notices: i32, str_pad_to: usize) -> String {\n    if notices == 0 {\n        notices.to_string()\n    } else {\n        get_color_text(&format!(\"{:<width$}\", notices, width = str_pad_to), \"blue\", false)\n    }\n}\n\npub fn get_content_type_name_by_id(content_type_id: ContentTypeId) -> &'static str {\n    content_type_id.name()\n}\n\npub fn is_href_for_requestable_resource(href: &str) -> bool {\n    if href.starts_with('#') {\n        return false;\n    }\n    if href.contains('{') {\n        return false;\n    }\n    if href.contains('<') {\n        return false;\n    }\n    if href.contains(\"&#\") {\n        return false;\n    }\n\n    // Check if href starts with a scheme that is not http/https\n    use once_cell::sync::Lazy;\n    static RE_HAS_SCHEME: Lazy<Regex> = Lazy::new(|| Regex::new(r\"^[a-zA-Z0-9]+:\").unwrap());\n    static RE_IS_HTTP: Lazy<Regex> = Lazy::new(|| Regex::new(r\"(?i)^https?:/\").unwrap());\n    let has_scheme = RE_HAS_SCHEME.is_match(href);\n    let is_http = RE_IS_HTTP.is_match(href);\n\n    if has_scheme && !is_http {\n        return false;\n    }\n\n    true\n}\n\npub fn get_absolute_url_by_base_url(base_url: &str, target_url: &str) -> String {\n    // Use the url crate for proper resolution\n    if let Ok(base) = url::Url::parse(base_url)\n        && let Ok(resolved) = base.join(target_url)\n    {\n        return resolved.to_string();\n    }\n\n    // Fallback: return target_url as-is\n    target_url.to_string()\n}\n\npub fn get_absolute_path(path: &str) -> String {\n    let p = std::path::Path::new(path);\n    if p.is_absolute() {\n        return path.to_string();\n    }\n    // On Windows, Path::join() correctly handles drive-relative (\"C:foo\"),\n    // root-relative (\"\\foo\"), and UNC paths (\"\\\\server\\share\").\n    // On Unix, it handles paths starting with \"/\" by returning them as-is.\n    let cwd = std::env::current_dir().unwrap_or_else(|_| std::path::PathBuf::from(\".\"));\n    cwd.join(p).to_string_lossy().to_string()\n}\n\npub fn get_output_formatted_path(path: &str) -> String {\n    #[cfg(windows)]\n    {\n        path.replace('/', \"\\\\\")\n    }\n    #[cfg(not(windows))]\n    {\n        path.to_string()\n    }\n}\n\npub fn mb_str_pad(input: &str, pad_length: usize, pad_char: char) -> String {\n    let char_count = input.chars().count();\n    if char_count >= pad_length {\n        input.to_string()\n    } else {\n        let padding = pad_length - char_count;\n        format!(\n            \"{}{}\",\n            input,\n            std::iter::repeat_n(pad_char, padding).collect::<String>()\n        )\n    }\n}\n\npub fn strip_javascript(html: &str) -> String {\n    let mut result = html.to_string();\n\n    // script tags\n    if let Ok(re) = Regex::new(r\"(?is)<script[^>]*>.*?</script>\") {\n        result = re.replace_all(&result, \"\").to_string();\n    }\n\n    // link tags by \"href\" pointing to .js\n    if let Ok(re) = Regex::new(r#\"(?is)<link[^>]*href=[\"'][^\"']+\\.js[^\"']*[\"'][^>]*>\"#) {\n        result = re.replace_all(&result, \"\").to_string();\n    }\n\n    // link tags by \"as=script\"\n    if let Ok(re) = Regex::new(r#\"(?is)<link[^>]*as=[\"']script[\"'][^>]*>\"#) {\n        result = re.replace_all(&result, \"\").to_string();\n    }\n\n    // on* attributes\n    if let Ok(re) = Regex::new(r#\"(?is)\\s+on[a-z]+=(\"[^\"]*\"|'[^']*'|[^\\s>]*)\"#) {\n        result = re.replace_all(&result, \"\").to_string();\n    }\n\n    result\n}\n\npub fn strip_styles(html: &str) -> String {\n    let mut result = html.to_string();\n\n    if let Ok(re) = Regex::new(r\"(?is)<style\\b[^>]*>.*?</style>\") {\n        result = re.replace_all(&result, \"\").to_string();\n    }\n\n    if let Ok(re) = Regex::new(r#\"(?is)<link\\b[^>]*rel=[\"']stylesheet[\"'][^>]*>\"#) {\n        result = re.replace_all(&result, \"\").to_string();\n    }\n\n    if let Ok(re) = Regex::new(r#\"(?is)\\s+style=(\"[^\"]*\"|'[^']*'|[^\\s>]*)\"#) {\n        result = re.replace_all(&result, \" \").to_string();\n    }\n\n    result\n}\n\npub fn strip_fonts(html_or_css: &str) -> String {\n    let mut result = html_or_css.to_string();\n\n    if let Ok(re) = Regex::new(r#\"(?is)<link\\b[^>]*href=[\"'][^\"']+\\.(eot|ttf|woff2|woff|otf)[^\"']*[\"'][^>]*>\"#) {\n        result = re.replace_all(&result, \"\").to_string();\n    }\n\n    if let Ok(re) = Regex::new(r\"(?is)@font-face\\s*\\{[^}]*\\}\\s*\") {\n        result = re.replace_all(&result, \"\").to_string();\n    }\n\n    if let Ok(re) = Regex::new(r\"(?i)\\b(font|font-family)\\s*:[^;]+;\") {\n        result = re.replace_all(&result, \"\").to_string();\n    }\n\n    if let Ok(re) = Regex::new(r#\"(?i)\\s*style=[\"']\\s*[\"']\"#) {\n        result = re.replace_all(&result, \"\").to_string();\n    }\n\n    result\n}\n\npub fn strip_images(html_or_css: &str, placeholder_image: Option<&str>) -> String {\n    let placeholder = placeholder_image.unwrap_or(IMG_SRC_TRANSPARENT_1X1_GIF);\n    let mut result = html_or_css.to_string();\n\n    let patterns_and_replacements: Vec<(&str, String)> = vec![\n        (\n            r#\"(?is)(<img[^>]+)src=['\"][^'\"]*['\"]([^>]*>)\"#,\n            format!(\"${{1}}src=\\\"{}\\\"${{2}}\", placeholder),\n        ),\n        (\n            r#\"(?is)(<img[^>]+)srcset=['\"][^'\"]*['\"]([^>]*>)\"#,\n            format!(\"${{1}}srcset=\\\"{}\\\"${{2}}\", placeholder),\n        ),\n        (\n            r#\"(?is)(<source[^>]+)srcset=['\"][^'\"]*['\"]([^>]*>)\"#,\n            format!(\"${{1}}srcset=\\\"{}\\\"${{2}}\", placeholder),\n        ),\n        (\n            r#\"(?is)(<source[^>]+)src=['\"][^'\"]*['\"]([^>]*>)\"#,\n            format!(\"${{1}}src=\\\"{}\\\"${{2}}\", placeholder),\n        ),\n        (\n            r#\"(?is)url\\(\\s*['\"]?(?!data:)([^'\")\\s]*\\.(?:png|jpe?g|gif|webp|svg|bmp))['\"]?\\s*\\)\"#,\n            format!(\"url(\\\"{}\\\")\", placeholder),\n        ),\n        (r\"(?is)<svg[^>]*>.*?</svg>\", String::new()),\n    ];\n\n    for (pattern, replacement) in &patterns_and_replacements {\n        if let Ok(re) = Regex::new(pattern) {\n            result = re.replace_all(&result, replacement.as_str()).to_string();\n        }\n    }\n\n    result\n}\n\npub fn get_colored_cache_lifetime(cache_lifetime: i64, str_pad_to: usize) -> String {\n    let color = if cache_lifetime <= 0 {\n        \"red\"\n    } else if cache_lifetime < 600 {\n        \"magenta\"\n    } else if cache_lifetime <= 86400 {\n        \"yellow\"\n    } else {\n        \"green\"\n    };\n\n    get_color_text(\n        &format!(\n            \"{:<width$}\",\n            get_formatted_cache_lifetime(cache_lifetime),\n            width = str_pad_to\n        ),\n        color,\n        false,\n    )\n}\n\npub fn is_asset_by_content_type(content_type: &str) -> bool {\n    let non_asset_content_types = [\n        \"text/html\",\n        \"application/xhtml+xml\",\n        \"application/xml\",\n        \"application/json\",\n        \"application/ld+json\",\n        \"application/rss+xml\",\n    ];\n\n    let ct_lower = content_type.to_lowercase();\n    for non_asset in &non_asset_content_types {\n        if ct_lower.contains(non_asset) {\n            return false;\n        }\n    }\n    true\n}\n\npub fn add_class_to_html_images(html: &str, class_name: &str) -> String {\n    let mut result = html.to_string();\n    if let Ok(re) = Regex::new(r#\"(?is)(<img\\b)([^>]*>)\"#) {\n        result = re\n            .replace_all(&result, |caps: &regex::Captures| {\n                let tag_start = caps.get(1).map_or(\"\", |m| m.as_str());\n                let rest = caps.get(2).map_or(\"\", |m| m.as_str());\n                if rest.contains(\"class=\") {\n                    format!(\"{}{}\", tag_start, rest)\n                } else {\n                    format!(\"{} class=\\\"{}\\\"{}\", tag_start, class_name, rest)\n                }\n            })\n            .to_string();\n    }\n    result\n}\n\npub fn get_flat_response_headers(\n    headers: &std::collections::HashMap<String, Vec<String>>,\n) -> std::collections::HashMap<String, String> {\n    headers.iter().map(|(k, v)| (k.clone(), v.join(\", \"))).collect()\n}\n\n/// Returns peak resident memory usage (VmHWM) in bytes by reading /proc/self/status.\n/// Returns 0 if the information is not available (e.g., on non-Linux platforms).\npub fn get_peak_memory_usage() -> i64 {\n    if let Ok(status) = std::fs::read_to_string(\"/proc/self/status\") {\n        for line in status.lines() {\n            if line.starts_with(\"VmHWM:\") {\n                // Format is \"VmHWM:    12345 kB\"\n                let parts: Vec<&str> = line.split_whitespace().collect();\n                if parts.len() >= 2\n                    && let Ok(kb) = parts[1].parse::<i64>()\n                {\n                    return kb * 1024; // convert kB to bytes\n                }\n            }\n        }\n    }\n    0\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    // -- get_formatted_size --\n\n    #[test]\n    fn formatted_size_zero() {\n        assert_eq!(get_formatted_size(0, 1), \"0.0 B\");\n    }\n\n    #[test]\n    fn formatted_size_bytes() {\n        assert_eq!(get_formatted_size(512, 0), \"512 B\");\n    }\n\n    #[test]\n    fn formatted_size_kilobytes() {\n        assert_eq!(get_formatted_size(1024, 1), \"1.0 kB\");\n    }\n\n    #[test]\n    fn formatted_size_megabytes() {\n        assert_eq!(get_formatted_size(1_048_576, 1), \"1.0 MB\");\n    }\n\n    #[test]\n    fn formatted_size_gigabytes() {\n        assert_eq!(get_formatted_size(1_073_741_824, 1), \"1.0 GB\");\n    }\n\n    // -- get_formatted_duration --\n\n    #[test]\n    fn formatted_duration_milliseconds() {\n        assert_eq!(get_formatted_duration(0.001), \"1 ms\");\n    }\n\n    #[test]\n    fn formatted_duration_half_second() {\n        assert_eq!(get_formatted_duration(0.5), \"500 ms\");\n    }\n\n    #[test]\n    fn formatted_duration_seconds() {\n        assert_eq!(get_formatted_duration(1.5), \"1.5 s\");\n    }\n\n    // -- get_formatted_age --\n\n    #[test]\n    fn formatted_age_seconds() {\n        assert_eq!(get_formatted_age(0), \"0 sec(s)\");\n        assert_eq!(get_formatted_age(59), \"59 sec(s)\");\n    }\n\n    #[test]\n    fn formatted_age_minutes() {\n        assert_eq!(get_formatted_age(60), \"1 min(s)\");\n    }\n\n    #[test]\n    fn formatted_age_hours() {\n        assert_eq!(get_formatted_age(3600), \"1 hour(s)\");\n    }\n\n    #[test]\n    fn formatted_age_days() {\n        assert_eq!(get_formatted_age(86400), \"1 day(s)\");\n    }\n\n    // -- get_formatted_cache_lifetime --\n\n    #[test]\n    fn cache_lifetime_seconds() {\n        assert_eq!(get_formatted_cache_lifetime(0), \"0 s\");\n    }\n\n    #[test]\n    fn cache_lifetime_minutes() {\n        assert_eq!(get_formatted_cache_lifetime(60), \"1 min\");\n    }\n\n    #[test]\n    fn cache_lifetime_hours() {\n        // 3600 is exactly boundary of <= 3600, so still \"min\"\n        assert_eq!(get_formatted_cache_lifetime(3601), \"1 h\");\n    }\n\n    #[test]\n    fn cache_lifetime_days() {\n        // 86400 is exactly boundary of <= 86400, so still \"h\"\n        assert_eq!(get_formatted_cache_lifetime(86401), \"1 d\");\n    }\n\n    #[test]\n    fn cache_lifetime_months() {\n        // 86400*90 = 7776000, must exceed that for \"mon\"\n        assert_eq!(get_formatted_cache_lifetime(86400 * 91), \"3 mon\");\n    }\n\n    // -- is_regex_pattern --\n\n    #[test]\n    fn regex_pattern_slash_delimited() {\n        assert!(is_regex_pattern(\"/test/i\"));\n    }\n\n    #[test]\n    fn regex_pattern_hash_delimited() {\n        assert!(is_regex_pattern(\"#pat#\"));\n    }\n\n    #[test]\n    fn regex_pattern_plain_text() {\n        assert!(!is_regex_pattern(\"plain\"));\n    }\n\n    #[test]\n    fn regex_pattern_empty() {\n        assert!(!is_regex_pattern(\"\"));\n    }\n\n    #[test]\n    fn regex_pattern_single_slash() {\n        assert!(!is_regex_pattern(\"/\"));\n    }\n\n    // -- extract_pcre_regex_pattern --\n\n    #[test]\n    fn extract_pcre_with_case_insensitive() {\n        assert_eq!(extract_pcre_regex_pattern(\"/hello/i\"), \"(?i)hello\");\n    }\n\n    #[test]\n    fn extract_pcre_hash_delimiter() {\n        assert_eq!(extract_pcre_regex_pattern(\"#test#\"), \"test\");\n    }\n\n    #[test]\n    fn extract_pcre_tilde_with_flags() {\n        // Only 'i' flag is converted to (?i); other flags are silently ignored\n        let result = extract_pcre_regex_pattern(\"~foo~ms\");\n        assert_eq!(result, \"foo\");\n    }\n\n    // -- strip_javascript --\n\n    #[test]\n    fn strip_javascript_removes_script_tags() {\n        let input = \"<p>ok</p><script>alert(1)</script>\";\n        assert_eq!(strip_javascript(input), \"<p>ok</p>\");\n    }\n\n    // -- strip_styles --\n\n    #[test]\n    fn strip_styles_removes_style_tags() {\n        let input = \"<p>ok</p><style>.x{}</style>\";\n        assert_eq!(strip_styles(input), \"<p>ok</p>\");\n    }\n\n    // -- mb_str_pad --\n\n    #[test]\n    fn str_pad_shorter_input() {\n        assert_eq!(mb_str_pad(\"hi\", 5, ' '), \"hi   \");\n    }\n\n    #[test]\n    fn str_pad_longer_input() {\n        assert_eq!(mb_str_pad(\"long\", 2, ' '), \"long\");\n    }\n\n    // -- is_href_for_requestable_resource --\n\n    #[test]\n    fn requestable_http_url() {\n        assert!(is_href_for_requestable_resource(\"https://x.com\"));\n    }\n\n    #[test]\n    fn requestable_javascript_void() {\n        assert!(!is_href_for_requestable_resource(\"javascript:void(0)\"));\n    }\n\n    #[test]\n    fn requestable_mailto() {\n        assert!(!is_href_for_requestable_resource(\"mailto:a@b.c\"));\n    }\n\n    #[test]\n    fn requestable_data_uri() {\n        assert!(!is_href_for_requestable_resource(\"data:text/html\"));\n    }\n\n    // -- get_absolute_url_by_base_url --\n\n    #[test]\n    fn absolute_url_from_root_relative() {\n        assert_eq!(\n            get_absolute_url_by_base_url(\"https://x.com/a/\", \"/b\"),\n            \"https://x.com/b\"\n        );\n    }\n\n    #[test]\n    fn absolute_url_from_relative() {\n        assert_eq!(\n            get_absolute_url_by_base_url(\"https://x.com/a/\", \"c\"),\n            \"https://x.com/a/c\"\n        );\n    }\n}\n"
  },
  {
    "path": "src/version.rs",
    "content": "// SiteOne Crawler - Version\r\n// (c) Jan Reges <jan.reges@siteone.cz>\r\n\r\npub const CODE: &str = \"2.3.0.20260330\";\r\n"
  },
  {
    "path": "src/wizard/form.rs",
    "content": "// SiteOne Crawler - Interactive settings form with arrow-key cycling\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse crossterm::{\n    cursor,\n    event::{self, Event, KeyCode, KeyEventKind, KeyModifiers},\n    execute,\n    terminal::{self, Clear, ClearType},\n};\nuse std::io::{self, Write};\n\nuse super::WizardError;\nuse super::presets::WizardState;\n\n// ── FormSetting ─────────────────────────────────────────────────────────────\n\npub struct FormSetting {\n    pub label: &'static str,\n    pub options: Vec<&'static str>,\n    pub current: usize,\n}\n\nimpl FormSetting {\n    fn new(label: &'static str, options: Vec<&'static str>, default: &str) -> Self {\n        let current = options.iter().position(|o| *o == default).unwrap_or(0);\n        FormSetting {\n            label,\n            options,\n            current,\n        }\n    }\n\n    pub fn value(&self) -> &str {\n        self.options[self.current]\n    }\n\n    fn cycle_right(&mut self) {\n        self.current = (self.current + 1) % self.options.len();\n    }\n\n    fn cycle_left(&mut self) {\n        if self.current == 0 {\n            self.current = self.options.len() - 1;\n        } else {\n            self.current -= 1;\n        }\n    }\n}\n\n// ── Setting indices (order in the form) ─────────────────────────────────────\n\nconst S_TIMEOUT: usize = 0;\nconst S_WORKERS: usize = 1;\nconst S_MAX_RPS: usize = 2;\nconst S_MAX_URLS: usize = 3;\nconst S_DEVICE: usize = 4;\nconst S_JAVASCRIPT: usize = 5;\nconst S_CSS: usize = 6;\nconst S_FONTS: usize = 7;\nconst S_IMAGES: usize = 8;\nconst S_FILES: usize = 9;\nconst S_SINGLE_PAGE: usize = 10;\nconst S_OFFLINE: usize = 11;\nconst S_MARKDOWN: usize = 12;\nconst S_SITEMAP: usize = 13;\nconst S_CACHE: usize = 14;\nconst S_STORAGE: usize = 15;\nconst S_ROBOTS: usize = 16;\n\n// ── Build form from WizardState ─────────────────────────────────────────────\n\npub fn build_form_settings(state: &WizardState) -> Vec<FormSetting> {\n    vec![\n        // Performance & Limits\n        FormSetting::new(\n            \"Timeout\",\n            vec![\"1s\", \"2s\", \"3s\", \"5s\", \"10s\", \"30s\", \"60s\"],\n            format_static_timeout(state.timeout),\n        ),\n        FormSetting::new(\n            \"Workers\",\n            vec![\"1\", \"2\", \"3\", \"5\", \"8\", \"10\", \"20\", \"50\"],\n            format_static_workers(state.workers),\n        ),\n        FormSetting::new(\n            \"Max requests/sec\",\n            vec![\"unlimited\", \"5/s\", \"10/s\", \"20/s\", \"50/s\", \"100/s\", \"500/s\"],\n            format_static_rps(state.max_reqs_per_sec),\n        ),\n        FormSetting::new(\n            \"Max visited URLs\",\n            vec![\"unlimited\", \"100\", \"500\", \"1000\", \"5000\", \"10000\", \"50000\", \"100000\"],\n            format_static_max_urls(state.max_visited_urls),\n        ),\n        // Device\n        FormSetting::new(\"Device\", vec![\"desktop\", \"mobile\", \"tablet\"], &state.device),\n        // Content types\n        FormSetting::new(\n            \"JavaScript\",\n            vec![\"yes\", \"no\"],\n            if state.disable_javascript { \"no\" } else { \"yes\" },\n        ),\n        FormSetting::new(\n            \"CSS stylesheets\",\n            vec![\"yes\", \"no\"],\n            if state.disable_styles { \"no\" } else { \"yes\" },\n        ),\n        FormSetting::new(\n            \"Fonts\",\n            vec![\"yes\", \"no\"],\n            if state.disable_fonts { \"no\" } else { \"yes\" },\n        ),\n        FormSetting::new(\n            \"Images\",\n            vec![\"yes\", \"no\"],\n            if state.disable_images { \"no\" } else { \"yes\" },\n        ),\n        FormSetting::new(\n            \"Files (PDFs, ZIPs..)\",\n            vec![\"yes\", \"no\"],\n            if state.disable_files { \"no\" } else { \"yes\" },\n        ),\n        // Scope\n        FormSetting::new(\n            \"Single page only\",\n            vec![\"no\", \"yes\"],\n            if state.single_page { \"yes\" } else { \"no\" },\n        ),\n        // Generators\n        FormSetting::new(\n            \"Offline export\",\n            vec![\"disabled\", \"./tmp/\"],\n            if state.offline_export_dir.is_some() {\n                \"./tmp/\"\n            } else {\n                \"disabled\"\n            },\n        ),\n        FormSetting::new(\n            \"Markdown export\",\n            vec![\"disabled\", \"./tmp/\"],\n            if state.markdown_export_dir.is_some() {\n                \"./tmp/\"\n            } else {\n                \"disabled\"\n            },\n        ),\n        FormSetting::new(\n            \"Sitemap XML\",\n            vec![\"disabled\", \"./sitemap.xml\"],\n            if state.sitemap_xml_file.is_some() {\n                \"./sitemap.xml\"\n            } else {\n                \"disabled\"\n            },\n        ),\n        // Caching\n        FormSetting::new(\n            \"HTTP caching\",\n            vec![\"enabled\", \"disabled\"],\n            if state.http_cache_enabled {\n                \"enabled\"\n            } else {\n                \"disabled\"\n            },\n        ),\n        FormSetting::new(\n            \"Data storage\",\n            vec![\"memory\", \"file\"],\n            if state.result_storage_file { \"file\" } else { \"memory\" },\n        ),\n        // Advanced\n        FormSetting::new(\n            \"Ignore robots.txt\",\n            vec![\"no\", \"yes\"],\n            if state.ignore_robots_txt { \"yes\" } else { \"no\" },\n        ),\n    ]\n}\n\n// Match default values to the closest available option\nfn format_static_timeout(val: u32) -> &'static str {\n    match val {\n        0..=1 => \"1s\",\n        2 => \"2s\",\n        3..=4 => \"3s\",\n        5..=9 => \"5s\",\n        10..=29 => \"10s\",\n        30..=59 => \"30s\",\n        _ => \"60s\",\n    }\n}\n\nfn format_static_workers(val: u32) -> &'static str {\n    match val {\n        0..=1 => \"1\",\n        2 => \"2\",\n        3..=4 => \"3\",\n        5..=7 => \"5\",\n        8..=9 => \"8\",\n        10..=19 => \"10\",\n        20..=49 => \"20\",\n        _ => \"50\",\n    }\n}\n\nfn format_static_rps(val: u32) -> &'static str {\n    match val {\n        0 => \"unlimited\",\n        1..=7 => \"5/s\",\n        8..=14 => \"10/s\",\n        15..=34 => \"20/s\",\n        35..=74 => \"50/s\",\n        75..=299 => \"100/s\",\n        _ => \"500/s\",\n    }\n}\n\nfn format_static_max_urls(val: u32) -> &'static str {\n    match val {\n        0 => \"unlimited\",\n        1..=299 => \"100\",\n        300..=749 => \"500\",\n        750..=2499 => \"1000\",\n        2500..=7499 => \"5000\",\n        7500..=29999 => \"10000\",\n        30000..=74999 => \"50000\",\n        _ => \"100000\",\n    }\n}\n\n// ── Apply form values back to WizardState ───────────────────────────────────\n\npub fn apply_form_to_state(settings: &[FormSetting], state: &mut WizardState) {\n    // Timeout\n    state.timeout = parse_timeout(settings[S_TIMEOUT].value());\n    // Workers\n    state.workers = settings[S_WORKERS].value().parse().unwrap_or(3);\n    // Max req/s\n    state.max_reqs_per_sec = parse_rps(settings[S_MAX_RPS].value());\n    // Max URLs\n    state.max_visited_urls = parse_max_urls(settings[S_MAX_URLS].value());\n    // Device\n    state.device = settings[S_DEVICE].value().to_string();\n    // Content types\n    state.disable_javascript = settings[S_JAVASCRIPT].value() == \"no\";\n    state.disable_styles = settings[S_CSS].value() == \"no\";\n    state.disable_fonts = settings[S_FONTS].value() == \"no\";\n    state.disable_images = settings[S_IMAGES].value() == \"no\";\n    state.disable_files = settings[S_FILES].value() == \"no\";\n    // Scope\n    state.single_page = settings[S_SINGLE_PAGE].value() == \"yes\";\n    // Generators\n    state.offline_export_dir = if settings[S_OFFLINE].value() == \"disabled\" {\n        None\n    } else {\n        Some(\"./tmp/offline-{domain}-{date}/\".to_string())\n    };\n    state.markdown_export_dir = if settings[S_MARKDOWN].value() == \"disabled\" {\n        None\n    } else {\n        Some(\"./tmp/markdown-{domain}-{date}/\".to_string())\n    };\n    state.sitemap_xml_file = if settings[S_SITEMAP].value() == \"disabled\" {\n        None\n    } else {\n        Some(settings[S_SITEMAP].value().to_string())\n    };\n    // Caching\n    state.http_cache_enabled = settings[S_CACHE].value() == \"enabled\";\n    state.result_storage_file = settings[S_STORAGE].value() == \"file\";\n    // Advanced\n    state.ignore_robots_txt = settings[S_ROBOTS].value() == \"yes\";\n}\n\nfn parse_timeout(val: &str) -> u32 {\n    val.strip_suffix('s').and_then(|n| n.parse().ok()).unwrap_or(5)\n}\n\nfn parse_rps(val: &str) -> u32 {\n    if val == \"unlimited\" {\n        0\n    } else {\n        val.strip_suffix(\"/s\").and_then(|n| n.parse().ok()).unwrap_or(10)\n    }\n}\n\nfn parse_max_urls(val: &str) -> u32 {\n    if val == \"unlimited\" {\n        0\n    } else {\n        val.parse().unwrap_or(10000)\n    }\n}\n\n// ── Interactive form loop ───────────────────────────────────────────────────\n\n/// Run the interactive settings form. Returns Ok(true) on confirm, Ok(false) on cancel.\npub fn run_form(settings: &mut [FormSetting], preset_name: &str) -> Result<bool, WizardError> {\n    let mut stdout = io::stdout();\n    let mut cursor_idx: usize = 0;\n\n    // Get current cursor position for drawing\n    let start_row = cursor::position().map(|(_, row)| row).unwrap_or(0);\n\n    terminal::enable_raw_mode().map_err(|e: std::io::Error| WizardError::IoError(e.to_string()))?;\n    execute!(stdout, cursor::Hide).ok();\n\n    // Drain any leftover key events (e.g. Enter release from the previous prompt)\n    std::thread::sleep(std::time::Duration::from_millis(100));\n    while event::poll(std::time::Duration::from_millis(50))\n        .map_err(|e: std::io::Error| WizardError::IoError(e.to_string()))?\n    {\n        let _ = event::read();\n    }\n\n    let (result, final_start_row) = form_event_loop(settings, &mut cursor_idx, start_row, &mut stdout, preset_name);\n\n    // Always restore terminal\n    execute!(stdout, cursor::Show).ok();\n    terminal::disable_raw_mode().ok();\n\n    // Move past the form area using the scroll-adjusted start row\n    let total_rows = settings.len() as u16 + 5;\n    execute!(stdout, cursor::MoveTo(0, final_start_row + total_rows)).ok();\n    println!();\n\n    result\n}\n\nfn form_event_loop(\n    settings: &mut [FormSetting],\n    cursor_idx: &mut usize,\n    mut start_row: u16,\n    stdout: &mut io::Stdout,\n    preset_name: &str,\n) -> (Result<bool, WizardError>, u16) {\n    render_form(settings, *cursor_idx, &mut start_row, stdout, preset_name);\n\n    // Ignore Enter events that arrive within a short window after form start,\n    // to prevent a stale Enter from the previous inquire prompt from confirming immediately.\n    let form_start = std::time::Instant::now();\n    let debounce = std::time::Duration::from_millis(300);\n\n    loop {\n        match event::read().map_err(|e: std::io::Error| WizardError::IoError(e.to_string())) {\n            Err(e) => return (Err(e), start_row),\n            Ok(Event::Key(key)) => {\n                // Only react to Press events; ignore Release and Repeat to avoid double-firing\n                if key.kind != KeyEventKind::Press {\n                    continue;\n                }\n                match key.code {\n                    KeyCode::Up | KeyCode::Char('k') => {\n                        if *cursor_idx > 0 {\n                            *cursor_idx -= 1;\n                        } else {\n                            *cursor_idx = settings.len() - 1;\n                        }\n                    }\n                    KeyCode::Down | KeyCode::Char('j') => {\n                        *cursor_idx = (*cursor_idx + 1) % settings.len();\n                    }\n                    KeyCode::Left | KeyCode::Char('h') => {\n                        settings[*cursor_idx].cycle_left();\n                    }\n                    KeyCode::Right | KeyCode::Char('l') => {\n                        settings[*cursor_idx].cycle_right();\n                    }\n                    KeyCode::Enter => {\n                        if form_start.elapsed() >= debounce {\n                            return (Ok(true), start_row);\n                        }\n                        continue; // ignore stale Enter from previous prompt\n                    }\n                    KeyCode::Esc | KeyCode::Char('q') => return (Ok(false), start_row),\n                    KeyCode::Char('c') if key.modifiers.contains(KeyModifiers::CONTROL) => {\n                        return (Err(WizardError::Cancelled), start_row);\n                    }\n                    _ => continue, // skip re-render for unknown keys\n                }\n                render_form(settings, *cursor_idx, &mut start_row, stdout, preset_name);\n            }\n            Ok(_) => continue, // ignore non-key events\n        }\n    }\n}\n\nfn render_form(\n    settings: &[FormSetting],\n    cursor_idx: usize,\n    start_row: &mut u16,\n    stdout: &mut io::Stdout,\n    preset_name: &str,\n) {\n    execute!(stdout, cursor::MoveTo(0, *start_row), Clear(ClearType::FromCursorDown)).ok();\n\n    let label_w = 22;\n    let val_w = 18;\n\n    // Header\n    write_line(\n        stdout,\n        &format!(\"\\x1b[1m  Settings\\x1b[0m \\x1b[90m(preset: {})\\x1b[0m\", preset_name),\n    );\n    write_line(\n        stdout,\n        \"  \\x1b[33mUp/Down\\x1b[90m = navigate  \\x1b[33mLeft/Right\\x1b[90m = change value  \\x1b[33mEnter\\x1b[90m = confirm  \\x1b[33mEsc\\x1b[90m = cancel\\x1b[0m\",\n    );\n    write_line(stdout, \"\");\n\n    for (i, setting) in settings.iter().enumerate() {\n        let is_focused = i == cursor_idx;\n        let val = setting.value();\n\n        if is_focused {\n            // Focused: yellow arrow, bold label, yellow value with < >\n            write!(\n                stdout,\n                \"  \\x1b[33m>\\x1b[0m \\x1b[1m{:<lw$}\\x1b[0m \\x1b[33m<\\x1b[0m \\x1b[1;33m{:^vw$}\\x1b[0m \\x1b[33m>\\x1b[0m\\r\\n\",\n                setting.label,\n                val,\n                lw = label_w,\n                vw = val_w,\n            )\n            .ok();\n        } else {\n            // Normal: dimmed value\n            write!(\n                stdout,\n                \"    {:<lw$} \\x1b[90m{:^vw$}\\x1b[0m\\r\\n\",\n                setting.label,\n                val,\n                lw = label_w,\n                vw = val_w,\n            )\n            .ok();\n        }\n    }\n\n    write_line(stdout, \"\");\n\n    stdout.flush().ok();\n\n    // Recalculate start_row in case terminal scrolled (e.g. form near bottom of window).\n    // Total lines: header + help + blank + settings + trailing blank = settings.len() + 4\n    let total_lines = settings.len() as u16 + 4;\n    if let Ok((_, current_row)) = cursor::position() {\n        *start_row = current_row.saturating_sub(total_lines);\n    }\n}\n\nfn write_line(stdout: &mut io::Stdout, text: &str) {\n    write!(stdout, \"{}\\r\\n\", text).ok();\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn parse_timeout_values() {\n        assert_eq!(parse_timeout(\"1s\"), 1);\n        assert_eq!(parse_timeout(\"5s\"), 5);\n        assert_eq!(parse_timeout(\"60s\"), 60);\n    }\n\n    #[test]\n    fn parse_rps_values() {\n        assert_eq!(parse_rps(\"unlimited\"), 0);\n        assert_eq!(parse_rps(\"10/s\"), 10);\n        assert_eq!(parse_rps(\"100/s\"), 100);\n    }\n\n    #[test]\n    fn parse_max_urls_values() {\n        assert_eq!(parse_max_urls(\"unlimited\"), 0);\n        assert_eq!(parse_max_urls(\"10000\"), 10000);\n    }\n\n    #[test]\n    fn format_static_timeout_snaps() {\n        assert_eq!(format_static_timeout(1), \"1s\");\n        assert_eq!(format_static_timeout(4), \"3s\");\n        assert_eq!(format_static_timeout(5), \"5s\");\n        assert_eq!(format_static_timeout(15), \"10s\");\n        assert_eq!(format_static_timeout(100), \"60s\");\n    }\n\n    #[test]\n    fn format_static_workers_snaps() {\n        assert_eq!(format_static_workers(1), \"1\");\n        assert_eq!(format_static_workers(3), \"3\");\n        assert_eq!(format_static_workers(5), \"5\");\n        assert_eq!(format_static_workers(10), \"10\");\n    }\n\n    #[test]\n    fn cycle_wraps_around() {\n        let mut s = FormSetting::new(\"test\", vec![\"a\", \"b\", \"c\"], \"a\");\n        assert_eq!(s.current, 0);\n        s.cycle_left();\n        assert_eq!(s.current, 2); // wraps to last\n        s.cycle_right();\n        assert_eq!(s.current, 0); // wraps to first\n    }\n\n    #[test]\n    fn apply_form_roundtrip() {\n        let mut state = WizardState::from_preset(&super::super::presets::PRESETS[0]); // Quick audit\n        let settings = build_form_settings(&state);\n        // Apply unchanged form back → state should match\n        apply_form_to_state(&settings, &mut state);\n        assert_eq!(state.workers, 5);\n        assert_eq!(state.timeout, 5);\n        assert!(!state.disable_javascript);\n    }\n}\n"
  },
  {
    "path": "src/wizard/mod.rs",
    "content": "// SiteOne Crawler - Interactive wizard for no-args invocation\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nmod form;\nmod presets;\n\nuse colored::Colorize;\nuse inquire::ui::{Color, RenderConfig, StyleSheet, Styled};\nuse inquire::validator::Validation;\nuse inquire::{Confirm, InquireError, Select, Text};\nuse std::io::IsTerminal;\n\nuse crate::version;\nuse presets::{PRESETS, WizardState};\n\n// ── Public API ──────────────────────────────────────────────────────────────\n\n/// Returns true when stdin AND stdout are interactive TTYs.\npub fn is_interactive_tty() -> bool {\n    std::io::stdin().is_terminal() && std::io::stdout().is_terminal()\n}\n\n/// After an offline/markdown export crawl (wizard mode), offer to immediately\n/// serve the exported content via HTTP.\n/// Returns `Some((dir_path, \"offline\"|\"markdown\"))` if the user confirms, else `None`.\npub fn offer_serve_after_export(crawl_argv: &[String]) -> Option<(String, String)> {\n    let (dir, kind) = if let Some(arg) = crawl_argv.iter().find(|a| a.starts_with(\"--offline-export-dir=\")) {\n        let raw = arg.trim_start_matches(\"--offline-export-dir=\");\n        (raw.trim_matches('\\'').to_string(), \"offline\")\n    } else if let Some(arg) = crawl_argv.iter().find(|a| a.starts_with(\"--markdown-export-dir=\")) {\n        let raw = arg.trim_start_matches(\"--markdown-export-dir=\");\n        (raw.trim_matches('\\'').to_string(), \"markdown\")\n    } else {\n        return None;\n    };\n\n    println!();\n    let confirmed = Confirm::new(&format!(\"Serve the {} export via HTTP?\", kind))\n        .with_default(true)\n        .prompt()\n        .unwrap_or(false);\n\n    if confirmed { Some((dir, kind.to_string())) } else { None }\n}\n\n/// Block until the user presses Enter. Used after a wizard-launched crawl\n/// so the terminal window stays open (especially on Windows double-click).\npub fn press_enter_to_exit() {\n    println!();\n    println!(\"{}\", \"Press Enter to exit...\".dimmed());\n    let mut buf = String::new();\n    let _ = std::io::stdin().read_line(&mut buf);\n}\n\n/// Run the interactive wizard. Returns a synthetic argv `Vec<String>` ready\n/// to be fed into `Initiator::new()` / `parse_argv()`.\npub fn run_wizard() -> Result<Vec<String>, WizardError> {\n    // Set inquire theme: yellow accents instead of default cyan, gray help text\n    let mut render_config = RenderConfig::default_colored();\n    render_config.help_message = StyleSheet::new().with_fg(Color::DarkGrey);\n    render_config.highlighted_option_prefix = Styled::new(\"❯\").with_fg(Color::DarkYellow);\n    render_config.answer = StyleSheet::new().with_fg(Color::DarkYellow);\n    inquire::set_global_render_config(render_config);\n\n    print_banner();\n\n    // Step 1: Preset selection (+ dynamic serve items if exports exist)\n    let choice = prompt_preset_or_serve()?;\n    match choice {\n        PresetChoice::Serve(argv) => Ok(argv),\n        PresetChoice::Preset(preset_idx) => {\n            let preset = &PRESETS[preset_idx];\n            let mut state = WizardState::from_preset(preset);\n\n            // Step 2: URL (required)\n            state.url = prompt_url()?;\n\n            // Resolve {domain} and {date} placeholders in export paths\n            resolve_export_paths(&mut state);\n\n            // Step 3: Interactive settings form (arrow-key navigation + value cycling)\n            let mut settings = form::build_form_settings(&state);\n            println!();\n\n            // Show warning for Stress Test preset\n            if preset.name == \"Stress Test\" {\n                println!(\n                    \"  {} {}\",\n                    \"WARNING:\".yellow().bold(),\n                    \"Stress testing generates high-concurrency load with cache-busting\".yellow()\n                );\n                println!(\n                    \"           {}\",\n                    \"random query params. This can overload a server and cause downtime.\".yellow()\n                );\n                println!(\n                    \"           {}\",\n                    \"Only run this against your own websites or with explicit permission!\"\n                        .yellow()\n                        .bold()\n                );\n                println!();\n            }\n\n            let confirmed = form::run_form(&mut settings, preset.name)?;\n            if !confirmed {\n                return Err(WizardError::Cancelled);\n            }\n            form::apply_form_to_state(&settings, &mut state);\n\n            // Re-resolve export paths — apply_form_to_state may have reset them to templates\n            resolve_export_paths(&mut state);\n\n            // Step 4: Summary & confirm\n            let argv = state.build_argv();\n            print_summary(&state, &argv);\n\n            let run = Confirm::new(\"Start the crawl?\").with_default(true).prompt()?;\n\n            if run {\n                println!();\n                Ok(argv)\n            } else {\n                Err(WizardError::Cancelled)\n            }\n        }\n    }\n}\n\n// ── Preset or Serve choice ──────────────────────────────────────────────────\n\nenum PresetChoice {\n    Preset(usize),\n    Serve(Vec<String>),\n}\n\n/// Separator label used in the menu to visually separate serve items.\nconst SERVE_SEPARATOR: &str = \"──────────────────────────────────────\";\n\nfn prompt_preset_or_serve() -> Result<PresetChoice, WizardError> {\n    let mut labels: Vec<String> = PRESETS.iter().map(|p| p.to_string()).collect();\n\n    // Detect existing exports in ./tmp/\n    let offline_dirs = find_export_dirs(\"offline\");\n    let markdown_dirs = find_export_dirs(\"markdown\");\n\n    let has_serve_items = !offline_dirs.is_empty() || !markdown_dirs.is_empty();\n    let serve_offline_label = \"Browse offline export     Serve a previously exported offline site via HTTP\";\n    let serve_markdown_label = \"Browse markdown export    Serve a previously exported markdown site via HTTP\";\n\n    if has_serve_items {\n        labels.push(SERVE_SEPARATOR.to_string());\n        if !offline_dirs.is_empty() {\n            labels.push(serve_offline_label.to_string());\n        }\n        if !markdown_dirs.is_empty() {\n            labels.push(serve_markdown_label.to_string());\n        }\n    }\n\n    let choice = Select::new(\"Choose a crawl mode:\", labels.clone())\n        .with_page_size(labels.len())\n        .prompt()?;\n\n    // Check if it's a serve option\n    if choice == serve_offline_label {\n        return prompt_serve_export(&offline_dirs, \"offline\");\n    }\n    if choice == serve_markdown_label {\n        return prompt_serve_export(&markdown_dirs, \"markdown\");\n    }\n    if choice == SERVE_SEPARATOR {\n        // User selected the separator — re-prompt\n        return prompt_preset_or_serve();\n    }\n\n    // It's a preset\n    let preset_idx = PRESETS.iter().position(|p| choice.starts_with(p.name)).unwrap_or(0);\n    Ok(PresetChoice::Preset(preset_idx))\n}\n\n/// Prompt the user to select from available exports, then return serve argv.\nfn prompt_serve_export(dirs: &[ExportDir], kind: &str) -> Result<PresetChoice, WizardError> {\n    let labels: Vec<String> = dirs.iter().map(|d| format!(\"{:40} {}\", d.name, d.date_label)).collect();\n\n    let choice = Select::new(&format!(\"Select {} export to serve:\", kind), labels).prompt()?;\n\n    // Find matching dir\n    let selected = dirs.iter().find(|d| choice.starts_with(&d.name)).unwrap();\n\n    let serve_flag = match kind {\n        \"offline\" => format!(\"--serve-offline={}\", selected.path),\n        _ => format!(\"--serve-markdown={}\", selected.path),\n    };\n\n    Ok(PresetChoice::Serve(vec![\"siteone-crawler\".to_string(), serve_flag]))\n}\n\n// ── Export directory detection ───────────────────────────────────────────────\n\nstruct ExportDir {\n    name: String,\n    path: String,\n    date_label: String,\n}\n\n/// Find export directories matching `./tmp/{kind}-*/` pattern, sorted newest first.\nfn find_export_dirs(kind: &str) -> Vec<ExportDir> {\n    let tmp_path = std::path::Path::new(\"./tmp\");\n    if !tmp_path.is_dir() {\n        return Vec::new();\n    }\n\n    let prefix = format!(\"{}-\", kind);\n    let mut dirs: Vec<ExportDir> = Vec::new();\n\n    if let Ok(entries) = std::fs::read_dir(tmp_path) {\n        for entry in entries.flatten() {\n            let path = entry.path();\n            if !path.is_dir() {\n                continue;\n            }\n            let name = entry.file_name().to_string_lossy().to_string();\n            if !name.starts_with(&prefix) {\n                continue;\n            }\n\n            // Extract a human-readable date from metadata\n            let date_label = std::fs::metadata(&path)\n                .and_then(|m| m.modified())\n                .ok()\n                .map(|t| {\n                    let dt: chrono::DateTime<chrono::Local> = t.into();\n                    dt.format(\"%Y-%m-%d %H:%M\").to_string()\n                })\n                .unwrap_or_default();\n\n            dirs.push(ExportDir {\n                name,\n                path: path.to_string_lossy().to_string(),\n                date_label,\n            });\n        }\n    }\n\n    // Sort newest first (by name descending — names contain date)\n    dirs.sort_by(|a, b| b.name.cmp(&a.name));\n    dirs\n}\n\n// ── Export path resolution ──────────────────────────────────────────────────\n\n/// Replace `{domain}` and `{date}` placeholders in export dir paths after URL is known.\nfn resolve_export_paths(state: &mut WizardState) {\n    let url = &state.url;\n    if let Some(ref dir) = state.offline_export_dir\n        && (dir.contains(\"{domain}\") || dir.contains(\"{date}\"))\n    {\n        state.offline_export_dir = Some(presets::resolve_export_path(dir, url));\n    }\n    if let Some(ref dir) = state.markdown_export_dir\n        && (dir.contains(\"{domain}\") || dir.contains(\"{date}\"))\n    {\n        state.markdown_export_dir = Some(presets::resolve_export_path(dir, url));\n    }\n}\n\n// ── Error type ──────────────────────────────────────────────────────────────\n\n#[derive(Debug)]\npub enum WizardError {\n    Cancelled,\n    IoError(String),\n}\n\nimpl std::fmt::Display for WizardError {\n    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {\n        match self {\n            WizardError::Cancelled => write!(f, \"Wizard cancelled.\"),\n            WizardError::IoError(msg) => write!(f, \"Wizard error: {}\", msg),\n        }\n    }\n}\n\nimpl From<InquireError> for WizardError {\n    fn from(err: InquireError) -> Self {\n        match err {\n            InquireError::OperationCanceled | InquireError::OperationInterrupted => WizardError::Cancelled,\n            other => WizardError::IoError(other.to_string()),\n        }\n    }\n}\n\n// ── Banner ──────────────────────────────────────────────────────────────────\n\nfn print_banner() {\n    let separator = \"=\".repeat(60);\n    println!();\n    println!(\"{}\", separator.dimmed());\n    println!(\n        \"  {} {}\",\n        \"SiteOne Crawler\".bold(),\n        format!(\"v{}\", version::CODE).dimmed()\n    );\n    println!(\n        \"  {}\",\n        \"Website QA toolkit: audit, clone, export, sitemap, CI/CD\".dimmed()\n    );\n    println!(\"{}\", separator.dimmed());\n    println!();\n}\n\n// ── URL prompt ──────────────────────────────────────────────────────────────\n\nfn prompt_url() -> Result<String, WizardError> {\n    let url = Text::new(\"Enter the website URL to crawl:\")\n        .with_placeholder(\"https://example.com\")\n        .with_help_message(\"Enter a domain (e.g. example.com) or full URL (https://...)\")\n        .with_validator(|input: &str| {\n            let trimmed = input.trim();\n            if trimmed.is_empty() {\n                return Ok(Validation::Invalid(\"URL is required.\".into()));\n            }\n            let url_str = normalize_url_input(trimmed);\n            match url::Url::parse(&url_str) {\n                Ok(u) if (u.scheme() == \"http\" || u.scheme() == \"https\") && u.host().is_some() => Ok(Validation::Valid),\n                _ => Ok(Validation::Invalid(\n                    \"Invalid URL. Enter a domain name or a valid http(s) address.\".into(),\n                )),\n            }\n        })\n        .prompt()?;\n\n    Ok(normalize_url_input(url.trim()))\n}\n\nfn normalize_url_input(input: &str) -> String {\n    let trimmed = input.trim();\n    if !trimmed.starts_with(\"http://\") && !trimmed.starts_with(\"https://\") {\n        format!(\"https://{}\", trimmed)\n    } else {\n        trimmed.to_string()\n    }\n}\n\n// ── Summary ─────────────────────────────────────────────────────────────────\n\nfn print_summary(state: &WizardState, argv: &[String]) {\n    println!();\n    let separator = \"=\".repeat(60);\n    println!(\"{}\", separator.dimmed());\n    println!(\"  {}\", \"Configuration Summary\".bold());\n    println!(\"{}\", separator.dimmed());\n    println!();\n\n    let label_width = 22;\n\n    print_row(\"URL:\", &state.url, label_width);\n    print_row(\"Preset:\", &state.preset_name, label_width);\n    print_row(\"Workers:\", &state.workers.to_string(), label_width);\n    print_row(\"Timeout:\", &format!(\"{}s\", state.timeout), label_width);\n    let rate_limit = if state.max_reqs_per_sec == 0 {\n        \"unlimited\".to_string()\n    } else {\n        format!(\"{}/s\", state.max_reqs_per_sec)\n    };\n    print_row(\"Rate limit:\", &rate_limit, label_width);\n\n    let max_urls = if state.max_visited_urls == 0 {\n        \"unlimited\".to_string()\n    } else {\n        state.max_visited_urls.to_string()\n    };\n    print_row(\"Max URLs:\", &max_urls, label_width);\n    print_row(\"Device:\", &state.device, label_width);\n    print_row(\"Content types:\", &state.content_summary(), label_width);\n\n    if state.single_page {\n        print_row(\"Scope:\", \"single page\", label_width);\n    }\n    if let Some(ref dir) = state.offline_export_dir {\n        print_row(\"Offline export:\", dir, label_width);\n    }\n    if let Some(ref dir) = state.markdown_export_dir {\n        print_row(\"Markdown export:\", dir, label_width);\n    }\n    if let Some(ref file) = state.sitemap_xml_file {\n        print_row(\"Sitemap XML:\", file, label_width);\n    }\n    if let Some(ref cols) = state.extra_columns {\n        print_row(\"Extra columns:\", cols, label_width);\n    }\n    if !state.http_cache_enabled {\n        print_row(\"HTTP cache:\", \"disabled\", label_width);\n    }\n    if state.result_storage_file {\n        print_row(\"Storage:\", \"file\", label_width);\n    }\n    if state.ignore_robots_txt {\n        print_row(\"Robots.txt:\", \"ignored\", label_width);\n    }\n    if state.http_auth.is_some() {\n        print_row(\"HTTP auth:\", \"configured\", label_width);\n    }\n    if let Some(ref proxy) = state.proxy {\n        print_row(\"Proxy:\", proxy, label_width);\n    }\n\n    // Show generated CLI command\n    println!();\n    println!(\"  {}\", \"Equivalent CLI command:\".yellow());\n    let cmd = argv[1..].join(\" \\\\\\n    \");\n    println!(\"  {} {}\", \"siteone-crawler\".yellow(), cmd.yellow());\n    println!();\n    println!(\"  {}\", \"Tip: Copy this command to skip the wizard next time.\".dimmed());\n    println!();\n}\n\nfn print_row(label: &str, value: &str, label_width: usize) {\n    println!(\"  {:<width$} {}\", label.dimmed(), value, width = label_width);\n}\n"
  },
  {
    "path": "src/wizard/presets.rs",
    "content": "// SiteOne Crawler - Wizard preset definitions and state\n// (c) Jan Reges <jan.reges@siteone.cz>\n\nuse std::fmt;\n\n/// A wizard preset — predefined configuration for common use cases.\npub struct Preset {\n    pub name: &'static str,\n    pub description: &'static str,\n    pub workers: u32,\n    pub timeout: u32,\n    pub max_reqs_per_sec: u32,\n    pub max_visited_urls: u32,\n    pub disable_javascript: bool,\n    pub disable_styles: bool,\n    pub disable_fonts: bool,\n    pub disable_images: bool,\n    pub disable_files: bool,\n    pub single_page: bool,\n    pub offline_export_dir: Option<&'static str>,\n    pub markdown_export_dir: Option<&'static str>,\n    pub sitemap_xml_file: Option<&'static str>,\n    pub http_cache_enabled: bool,\n    pub result_storage_file: bool,\n    pub extra_columns: Option<&'static str>,\n    pub ignore_robots_txt: bool,\n    pub add_random_query_params: bool,\n    pub allowed_domains_for_external_files: Option<&'static str>,\n    pub hide_columns: Option<&'static str>,\n}\n\nimpl fmt::Display for Preset {\n    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {\n        write!(f, \"{:26} {}\", self.name, self.description)\n    }\n}\n\npub const PRESETS: &[Preset] = &[\n    // 1. Quick Audit — the most common starting point\n    Preset {\n        name: \"Quick Audit\",\n        description: \"Fast site health overview — crawls all pages and assets\",\n        workers: 5,\n        timeout: 5,\n        max_reqs_per_sec: 10,\n        max_visited_urls: 10000,\n        disable_javascript: false,\n        disable_styles: false,\n        disable_fonts: false,\n        disable_images: false,\n        disable_files: false,\n        single_page: false,\n        offline_export_dir: None,\n        markdown_export_dir: None,\n        sitemap_xml_file: None,\n        http_cache_enabled: true,\n        result_storage_file: false,\n        extra_columns: Some(\"Title(20)\"),\n        ignore_robots_txt: false,\n        add_random_query_params: false,\n        allowed_domains_for_external_files: None,\n        hide_columns: Some(\"cache\"),\n    },\n    // 2. SEO Analysis — metadata, headings, OpenGraph\n    Preset {\n        name: \"SEO Analysis\",\n        description: \"Extract titles, descriptions, keywords, and OpenGraph tags\",\n        workers: 8,\n        timeout: 5,\n        max_reqs_per_sec: 20,\n        max_visited_urls: 50000,\n        disable_javascript: true,\n        disable_styles: true,\n        disable_fonts: true,\n        disable_images: true,\n        disable_files: true,\n        single_page: false,\n        offline_export_dir: None,\n        markdown_export_dir: None,\n        sitemap_xml_file: None,\n        http_cache_enabled: true,\n        result_storage_file: false,\n        extra_columns: Some(\"Title(20),Description(20),H1=xpath://h1/text()(40)\"),\n        ignore_robots_txt: false,\n        add_random_query_params: false,\n        allowed_domains_for_external_files: None,\n        hide_columns: Some(\"cache\"),\n    },\n    // 3. Performance Test — realistic timing, no cache\n    Preset {\n        name: \"Performance Test\",\n        description: \"Measure response times with cache disabled — find bottlenecks\",\n        workers: 3,\n        timeout: 10,\n        max_reqs_per_sec: 5,\n        max_visited_urls: 5000,\n        disable_javascript: false,\n        disable_styles: false,\n        disable_fonts: false,\n        disable_images: false,\n        disable_files: false,\n        single_page: false,\n        offline_export_dir: None,\n        markdown_export_dir: None,\n        sitemap_xml_file: None,\n        http_cache_enabled: false,\n        result_storage_file: false,\n        extra_columns: Some(\"Title(30),DOM\"),\n        ignore_robots_txt: false,\n        add_random_query_params: false,\n        allowed_domains_for_external_files: None,\n        hide_columns: None,\n    },\n    // 4. Security Check — headers, SSL/TLS, CSP\n    Preset {\n        name: \"Security Check\",\n        description: \"Check SSL/TLS, security headers, and redirects site-wide\",\n        workers: 5,\n        timeout: 5,\n        max_reqs_per_sec: 15,\n        max_visited_urls: 10000,\n        disable_javascript: false,\n        disable_styles: true,\n        disable_fonts: true,\n        disable_images: true,\n        disable_files: true,\n        single_page: false,\n        offline_export_dir: None,\n        markdown_export_dir: None,\n        sitemap_xml_file: None,\n        http_cache_enabled: true,\n        result_storage_file: false,\n        extra_columns: Some(\"Title(30)\"),\n        ignore_robots_txt: false,\n        add_random_query_params: false,\n        allowed_domains_for_external_files: None,\n        hide_columns: Some(\"cache\"),\n    },\n    // 5. Offline Clone — full site download\n    Preset {\n        name: \"Offline Clone\",\n        description: \"Download entire website with all assets for offline browsing\",\n        workers: 2,\n        timeout: 5,\n        max_reqs_per_sec: 8,\n        max_visited_urls: 100000,\n        disable_javascript: false,\n        disable_styles: false,\n        disable_fonts: false,\n        disable_images: false,\n        disable_files: false,\n        single_page: false,\n        offline_export_dir: Some(\"./tmp/offline-{domain}-{date}/\"),\n        markdown_export_dir: None,\n        sitemap_xml_file: None,\n        http_cache_enabled: false,\n        result_storage_file: false,\n        extra_columns: None,\n        ignore_robots_txt: false,\n        add_random_query_params: false,\n        allowed_domains_for_external_files: Some(\"*\"),\n        hide_columns: Some(\"cache\"),\n    },\n    // 6. Markdown Export — content for AI/docs\n    Preset {\n        name: \"Markdown Export\",\n        description: \"Convert pages to Markdown for AI models or documentation\",\n        workers: 3,\n        timeout: 5,\n        max_reqs_per_sec: 10,\n        max_visited_urls: 20000,\n        disable_javascript: true,\n        disable_styles: true,\n        disable_fonts: true,\n        disable_images: false,\n        disable_files: false,\n        single_page: false,\n        offline_export_dir: None,\n        markdown_export_dir: Some(\"./tmp/markdown-{domain}-{date}/\"),\n        sitemap_xml_file: None,\n        http_cache_enabled: true,\n        result_storage_file: false,\n        extra_columns: Some(\"Title(40)\"),\n        ignore_robots_txt: false,\n        add_random_query_params: false,\n        allowed_domains_for_external_files: None,\n        hide_columns: Some(\"cache\"),\n    },\n    // 7. Stress Test — high concurrency load testing with cache busting\n    Preset {\n        name: \"Stress Test\",\n        description: \"High-concurrency load test with cache-busting random params\",\n        workers: 20,\n        timeout: 10,\n        max_reqs_per_sec: 20,\n        max_visited_urls: 10000,\n        disable_javascript: true,\n        disable_styles: true,\n        disable_fonts: true,\n        disable_images: true,\n        disable_files: true,\n        single_page: false,\n        offline_export_dir: None,\n        markdown_export_dir: None,\n        sitemap_xml_file: None,\n        http_cache_enabled: false,\n        result_storage_file: false,\n        extra_columns: Some(\"Title(30)\"),\n        ignore_robots_txt: true,\n        add_random_query_params: true,\n        allowed_domains_for_external_files: None,\n        hide_columns: Some(\"cache\"),\n    },\n    // 8. Single Page — deep dive on one URL\n    Preset {\n        name: \"Single Page\",\n        description: \"Deep analysis of a single URL — SEO, security, performance\",\n        workers: 1,\n        timeout: 10,\n        max_reqs_per_sec: 10,\n        max_visited_urls: 1,\n        disable_javascript: false,\n        disable_styles: false,\n        disable_fonts: false,\n        disable_images: false,\n        disable_files: false,\n        single_page: true,\n        offline_export_dir: None,\n        markdown_export_dir: None,\n        sitemap_xml_file: None,\n        http_cache_enabled: true,\n        result_storage_file: false,\n        extra_columns: Some(\"Title(50),Description(50),Keywords(30),DOM\"),\n        ignore_robots_txt: false,\n        add_random_query_params: false,\n        allowed_domains_for_external_files: None,\n        hide_columns: None,\n    },\n    // 9. Large Site Crawl — optimized for scale\n    Preset {\n        name: \"Large Site Crawl\",\n        description: \"High-throughput HTML-only crawl for large sites (100k+ pages)\",\n        workers: 10,\n        timeout: 3,\n        max_reqs_per_sec: 50,\n        max_visited_urls: 0, // unlimited\n        disable_javascript: true,\n        disable_styles: true,\n        disable_fonts: true,\n        disable_images: true,\n        disable_files: true,\n        single_page: false,\n        offline_export_dir: None,\n        markdown_export_dir: None,\n        sitemap_xml_file: Some(\"./sitemap.xml\"),\n        http_cache_enabled: true,\n        result_storage_file: false,\n        extra_columns: Some(\"Title(40)\"),\n        ignore_robots_txt: true,\n        add_random_query_params: false,\n        allowed_domains_for_external_files: None,\n        hide_columns: Some(\"cache\"),\n    },\n    // 10. Custom — power users\n    Preset {\n        name: \"Custom\",\n        description: \"Start from defaults and configure every option manually\",\n        workers: 3,\n        timeout: 5,\n        max_reqs_per_sec: 10,\n        max_visited_urls: 10000,\n        disable_javascript: false,\n        disable_styles: false,\n        disable_fonts: false,\n        disable_images: false,\n        disable_files: false,\n        single_page: false,\n        offline_export_dir: None,\n        markdown_export_dir: None,\n        sitemap_xml_file: None,\n        http_cache_enabled: true,\n        result_storage_file: false,\n        extra_columns: None,\n        ignore_robots_txt: false,\n        add_random_query_params: false,\n        allowed_domains_for_external_files: None,\n        hide_columns: None,\n    },\n];\n\n/// Mutable state collected by the wizard, built from a preset and optionally customized.\npub struct WizardState {\n    pub preset_name: String,\n    pub url: String,\n    pub workers: u32,\n    pub timeout: u32,\n    pub max_reqs_per_sec: u32,\n    pub max_visited_urls: u32,\n    pub device: String,\n    pub disable_javascript: bool,\n    pub disable_styles: bool,\n    pub disable_fonts: bool,\n    pub disable_images: bool,\n    pub disable_files: bool,\n    pub single_page: bool,\n    pub offline_export_dir: Option<String>,\n    pub markdown_export_dir: Option<String>,\n    pub sitemap_xml_file: Option<String>,\n    pub http_cache_enabled: bool,\n    pub result_storage_file: bool,\n    pub ignore_robots_txt: bool,\n    pub add_random_query_params: bool,\n    pub allowed_domains_for_external_files: Option<String>,\n    pub hide_columns: Option<String>,\n    pub extra_columns: Option<String>,\n    pub http_auth: Option<String>,\n    pub proxy: Option<String>,\n}\n\nimpl WizardState {\n    pub fn from_preset(preset: &Preset) -> Self {\n        WizardState {\n            preset_name: preset.name.to_string(),\n            url: String::new(),\n            workers: preset.workers,\n            timeout: preset.timeout,\n            max_reqs_per_sec: preset.max_reqs_per_sec,\n            max_visited_urls: preset.max_visited_urls,\n            device: \"desktop\".to_string(),\n            disable_javascript: preset.disable_javascript,\n            disable_styles: preset.disable_styles,\n            disable_fonts: preset.disable_fonts,\n            disable_images: preset.disable_images,\n            disable_files: preset.disable_files,\n            single_page: preset.single_page,\n            offline_export_dir: preset.offline_export_dir.map(String::from),\n            markdown_export_dir: preset.markdown_export_dir.map(String::from),\n            sitemap_xml_file: preset.sitemap_xml_file.map(String::from),\n            http_cache_enabled: preset.http_cache_enabled,\n            result_storage_file: preset.result_storage_file,\n            ignore_robots_txt: preset.ignore_robots_txt,\n            add_random_query_params: preset.add_random_query_params,\n            allowed_domains_for_external_files: preset.allowed_domains_for_external_files.map(String::from),\n            hide_columns: preset.hide_columns.map(String::from),\n            extra_columns: preset.extra_columns.map(String::from),\n            http_auth: None,\n            proxy: None,\n        }\n    }\n\n    /// Build synthetic argv from wizard state. Only includes flags that differ from\n    /// siteone-crawler defaults so the generated command is minimal and readable.\n    pub fn build_argv(&self) -> Vec<String> {\n        let mut args = vec![\"siteone-crawler\".to_string(), format!(\"--url='{}'\", self.url)];\n\n        // Performance & limits (defaults: workers=3, timeout=5, rps=10, max-urls=10000)\n        if self.workers != 3 {\n            args.push(format!(\"--workers={}\", self.workers));\n        }\n        if self.timeout != 5 {\n            args.push(format!(\"--timeout={}\", self.timeout));\n        }\n        if self.max_reqs_per_sec != 10 {\n            args.push(format!(\"--max-reqs-per-sec={}\", self.max_reqs_per_sec));\n        }\n        if self.max_visited_urls != 10000 {\n            args.push(format!(\"--max-visited-urls={}\", self.max_visited_urls));\n        }\n\n        // Device (default: desktop)\n        if self.device != \"desktop\" {\n            args.push(format!(\"--device='{}'\", self.device));\n        }\n\n        // Scope\n        if self.single_page {\n            args.push(\"--single-page\".to_string());\n        }\n\n        // Content filtering\n        if self.disable_javascript {\n            args.push(\"--disable-javascript\".to_string());\n        }\n        if self.disable_styles {\n            args.push(\"--disable-styles\".to_string());\n        }\n        if self.disable_fonts {\n            args.push(\"--disable-fonts\".to_string());\n        }\n        if self.disable_images {\n            args.push(\"--disable-images\".to_string());\n        }\n        if self.disable_files {\n            args.push(\"--disable-files\".to_string());\n        }\n\n        // Generators / exports\n        if let Some(ref dir) = self.offline_export_dir {\n            args.push(format!(\"--offline-export-dir='{}'\", dir));\n        }\n        if let Some(ref dir) = self.markdown_export_dir {\n            args.push(format!(\"--markdown-export-dir='{}'\", dir));\n        }\n        if let Some(ref file) = self.sitemap_xml_file {\n            args.push(format!(\"--sitemap-xml-file='{}'\", file));\n        }\n\n        // Caching (default: enabled)\n        if !self.http_cache_enabled {\n            args.push(\"--no-cache\".to_string());\n        }\n        if self.result_storage_file {\n            args.push(\"--result-storage='file'\".to_string());\n        }\n\n        // Extra columns\n        if let Some(ref cols) = self.extra_columns {\n            args.push(format!(\"--extra-columns='{}'\", cols));\n        }\n\n        // Advanced\n        if self.ignore_robots_txt {\n            args.push(\"--ignore-robots-txt\".to_string());\n        }\n        if self.add_random_query_params {\n            args.push(\"--add-random-query-params\".to_string());\n        }\n        if let Some(ref domains) = self.allowed_domains_for_external_files {\n            args.push(format!(\"--allowed-domain-for-external-files='{}'\", domains));\n        }\n        if let Some(ref cols) = self.hide_columns {\n            args.push(format!(\"--hide-columns='{}'\", cols));\n        }\n        if let Some(ref auth) = self.http_auth {\n            args.push(format!(\"--http-auth='{}'\", auth));\n        }\n        if let Some(ref proxy) = self.proxy {\n            args.push(format!(\"--proxy='{}'\", proxy));\n        }\n\n        args\n    }\n\n    /// Format a human-readable summary of non-default content types.\n    pub fn content_summary(&self) -> String {\n        let mut types = vec![\"HTML\"];\n        if !self.disable_javascript {\n            types.push(\"JS\");\n        }\n        if !self.disable_styles {\n            types.push(\"CSS\");\n        }\n        if !self.disable_fonts {\n            types.push(\"Fonts\");\n        }\n        if !self.disable_images {\n            types.push(\"Images\");\n        }\n        if !self.disable_files {\n            types.push(\"Files\");\n        }\n        types.join(\", \")\n    }\n}\n\n/// Replace `{domain}` and `{date}` placeholders in export directory paths.\n/// Called after the URL is known.\npub fn resolve_export_path(template: &str, url: &str) -> String {\n    let domain = url::Url::parse(url)\n        .ok()\n        .and_then(|u| u.host_str().map(String::from))\n        .unwrap_or_else(|| \"unknown\".to_string());\n    let date = chrono::Local::now().format(\"%Y%m%d\").to_string();\n    template.replace(\"{domain}\", &domain).replace(\"{date}\", &date)\n}\n\n#[cfg(test)]\nmod tests {\n    use super::*;\n\n    #[test]\n    fn preset_count_is_10() {\n        assert_eq!(PRESETS.len(), 10);\n    }\n\n    #[test]\n    fn last_preset_is_custom() {\n        assert_eq!(PRESETS[PRESETS.len() - 1].name, \"Custom\");\n    }\n\n    #[test]\n    fn build_argv_contains_url() {\n        let mut state = WizardState::from_preset(&PRESETS[0]);\n        state.url = \"https://example.com\".to_string();\n        let argv = state.build_argv();\n        assert_eq!(argv[0], \"siteone-crawler\");\n        assert_eq!(argv[1], \"--url='https://example.com'\");\n    }\n\n    #[test]\n    fn build_argv_custom_is_minimal() {\n        let mut state = WizardState::from_preset(&PRESETS[9]); // Custom\n        state.url = \"https://example.com\".to_string();\n        let argv = state.build_argv();\n        // Custom preset uses all defaults, so only binary name + URL\n        assert_eq!(argv.len(), 2);\n    }\n\n    #[test]\n    fn build_argv_quick_audit() {\n        let mut state = WizardState::from_preset(&PRESETS[0]);\n        state.url = \"https://example.com\".to_string();\n        let argv = state.build_argv();\n        assert!(argv.contains(&\"--workers=5\".to_string()));\n        assert!(argv.contains(&\"--extra-columns='Title(20)'\".to_string()));\n    }\n\n    #[test]\n    fn build_argv_seo_disables_assets() {\n        let mut state = WizardState::from_preset(&PRESETS[1]); // SEO\n        state.url = \"https://example.com\".to_string();\n        let argv = state.build_argv();\n        assert!(argv.contains(&\"--disable-javascript\".to_string()));\n        assert!(argv.contains(&\"--disable-styles\".to_string()));\n        assert!(argv.contains(&\"--disable-fonts\".to_string()));\n        assert!(argv.contains(&\"--disable-images\".to_string()));\n        assert!(argv.contains(&\"--disable-files\".to_string()));\n        assert!(argv.contains(&\"--workers=8\".to_string()));\n        assert!(argv.contains(&\"--max-reqs-per-sec=20\".to_string()));\n    }\n\n    #[test]\n    fn build_argv_seo_has_extra_columns() {\n        let mut state = WizardState::from_preset(&PRESETS[1]); // SEO\n        state.url = \"https://example.com\".to_string();\n        let argv = state.build_argv();\n        assert!(argv.contains(&\"--extra-columns='Title(20),Description(20),H1=xpath://h1/text()(40)'\".to_string()));\n    }\n\n    #[test]\n    fn build_argv_performance_test() {\n        let mut state = WizardState::from_preset(&PRESETS[2]);\n        state.url = \"https://example.com\".to_string();\n        let argv = state.build_argv();\n        assert!(argv.contains(&\"--timeout=10\".to_string()));\n        assert!(argv.contains(&\"--max-reqs-per-sec=5\".to_string()));\n        assert!(argv.contains(&\"--no-cache\".to_string()));\n        assert!(argv.contains(&\"--max-visited-urls=5000\".to_string()));\n    }\n\n    #[test]\n    fn build_argv_security_check() {\n        let mut state = WizardState::from_preset(&PRESETS[3]);\n        state.url = \"https://example.com\".to_string();\n        let argv = state.build_argv();\n        assert!(argv.contains(&\"--disable-styles\".to_string()));\n        assert!(argv.contains(&\"--disable-fonts\".to_string()));\n        assert!(argv.contains(&\"--disable-images\".to_string()));\n        assert!(!argv.contains(&\"--disable-javascript\".to_string())); // JS stays enabled\n    }\n\n    #[test]\n    fn build_argv_offline_clone() {\n        let mut state = WizardState::from_preset(&PRESETS[4]);\n        state.url = \"https://example.com\".to_string();\n        let argv = state.build_argv();\n        assert!(argv.iter().any(|a| a.starts_with(\"--offline-export-dir=\")));\n        assert!(argv.contains(&\"--no-cache\".to_string()));\n        assert!(argv.contains(&\"--max-visited-urls=100000\".to_string()));\n        assert!(argv.contains(&\"--workers=2\".to_string()));\n    }\n\n    #[test]\n    fn build_argv_markdown_export() {\n        let mut state = WizardState::from_preset(&PRESETS[5]);\n        state.url = \"https://example.com\".to_string();\n        let argv = state.build_argv();\n        assert!(argv.iter().any(|a| a.starts_with(\"--markdown-export-dir=\")));\n        assert!(argv.contains(&\"--disable-javascript\".to_string()));\n        assert!(!argv.contains(&\"--disable-images\".to_string())); // images stay enabled\n        assert!(argv.contains(&\"--max-visited-urls=20000\".to_string()));\n    }\n\n    #[test]\n    fn build_argv_stress_test() {\n        let mut state = WizardState::from_preset(&PRESETS[6]);\n        state.url = \"https://example.com\".to_string();\n        let argv = state.build_argv();\n        assert!(argv.contains(&\"--workers=20\".to_string()));\n        assert!(argv.contains(&\"--max-reqs-per-sec=20\".to_string()));\n        assert!(argv.contains(&\"--add-random-query-params\".to_string()));\n        assert!(argv.contains(&\"--ignore-robots-txt\".to_string()));\n        assert!(argv.contains(&\"--no-cache\".to_string()));\n        assert!(argv.contains(&\"--disable-javascript\".to_string()));\n        assert!(argv.contains(&\"--disable-styles\".to_string()));\n        assert!(argv.contains(&\"--disable-fonts\".to_string()));\n        assert!(argv.contains(&\"--disable-images\".to_string()));\n        assert!(argv.contains(&\"--disable-files\".to_string()));\n    }\n\n    #[test]\n    fn build_argv_single_page() {\n        let mut state = WizardState::from_preset(&PRESETS[7]);\n        state.url = \"https://example.com\".to_string();\n        let argv = state.build_argv();\n        assert!(argv.contains(&\"--single-page\".to_string()));\n        assert!(argv.contains(&\"--workers=1\".to_string()));\n        assert!(argv.contains(&\"--timeout=10\".to_string()));\n    }\n\n    #[test]\n    fn build_argv_large_site() {\n        let mut state = WizardState::from_preset(&PRESETS[8]);\n        state.url = \"https://example.com\".to_string();\n        let argv = state.build_argv();\n        assert!(argv.contains(&\"--workers=10\".to_string()));\n        assert!(argv.contains(&\"--max-reqs-per-sec=50\".to_string()));\n        assert!(argv.contains(&\"--max-visited-urls=0\".to_string()));\n        assert!(argv.contains(&\"--timeout=3\".to_string()));\n        assert!(argv.contains(&\"--ignore-robots-txt\".to_string()));\n        assert!(argv.contains(&\"--sitemap-xml-file='./sitemap.xml'\".to_string()));\n    }\n\n    #[test]\n    fn content_summary_all_enabled() {\n        let state = WizardState::from_preset(&PRESETS[0]);\n        assert_eq!(state.content_summary(), \"HTML, JS, CSS, Fonts, Images, Files\");\n    }\n\n    #[test]\n    fn content_summary_html_only() {\n        let state = WizardState::from_preset(&PRESETS[1]); // SEO\n        assert_eq!(state.content_summary(), \"HTML\");\n    }\n\n    #[test]\n    fn description_lengths_within_range() {\n        for preset in PRESETS {\n            let len = preset.description.len();\n            assert!(\n                (50..=65).contains(&len),\n                \"Preset '{}' description is {} chars (expected 50-65): \\\"{}\\\"\",\n                preset.name,\n                len,\n                preset.description\n            );\n        }\n    }\n}\n"
  },
  {
    "path": "tests/common/mod.rs",
    "content": "// Shared helpers for integration tests\n\nuse std::path::PathBuf;\nuse std::process::{Command, Output};\n\n/// Get path to the compiled binary.\n/// Tries release first, falls back to debug.\npub fn binary_path() -> PathBuf {\n    let release = PathBuf::from(env!(\"CARGO_MANIFEST_DIR\"))\n        .join(\"target\")\n        .join(\"release\")\n        .join(\"siteone-crawler\");\n    if release.exists() {\n        return release;\n    }\n    // Fall back to debug build\n    PathBuf::from(env!(\"CARGO_MANIFEST_DIR\"))\n        .join(\"target\")\n        .join(\"debug\")\n        .join(\"siteone-crawler\")\n}\n\n/// Run the crawler with given arguments and return Output.\npub fn run_crawler(args: &[&str]) -> Output {\n    Command::new(binary_path())\n        .args(args)\n        .output()\n        .expect(\"Failed to execute crawler binary\")\n}\n\n/// Run crawler and parse stdout as JSON.\npub fn run_crawler_json(args: &[&str]) -> serde_json::Value {\n    let output = run_crawler(args);\n    let stdout = String::from_utf8_lossy(&output.stdout);\n    // JSON output may be preceded by progress lines on stderr, but stdout should be pure JSON\n    serde_json::from_str(&stdout).unwrap_or_else(|e| {\n        panic!(\n            \"Failed to parse JSON output: {}\\nFirst 500 chars: {}\",\n            e,\n            &stdout[..stdout.len().min(500)]\n        )\n    })\n}\n\n/// Create a temporary directory that is cleaned up when dropped.\npub struct TempDir {\n    pub path: PathBuf,\n}\n\nimpl TempDir {\n    pub fn new(prefix: &str) -> Self {\n        let path = std::env::temp_dir().join(format!(\"siteone-test-{}-{}\", prefix, std::process::id()));\n        if path.exists() {\n            std::fs::remove_dir_all(&path).ok();\n        }\n        std::fs::create_dir_all(&path).expect(\"Failed to create temp dir\");\n        TempDir { path }\n    }\n}\n\nimpl Drop for TempDir {\n    fn drop(&mut self) {\n        std::fs::remove_dir_all(&self.path).ok();\n    }\n}\n"
  },
  {
    "path": "tests/integration_crawl.rs",
    "content": "// Integration tests: crawl crawler.siteone.io and verify output correctness.\n//\n// These tests require network access and a built binary.\n// Run with: cargo test --test integration_crawl -- --ignored\n//\n// They are #[ignore] by default so `cargo test` stays fast and offline.\n//\n// Network tests use a serial mutex to prevent parallel crawls against the\n// same server, which would cause rate-limiting and flaky failures.\n\nmod common;\n\nuse common::{TempDir, run_crawler, run_crawler_json};\nuse std::path::Path;\nuse std::sync::Mutex;\n\n/// Mutex to serialize all network tests that hit crawler.siteone.io.\n/// Prevents parallel crawls from overwhelming the server.\nstatic SERIAL: Mutex<()> = Mutex::new(());\n\n/// Common crawler flags to be gentle on the remote server.\nconst GENTLE_FLAGS: [&str; 3] = [\"--workers=2\", \"--max-reqs-per-sec=5\", \"--http-cache-dir=\"];\n\n// =========================================================================\n// 1. Full crawl of crawler.siteone.io — verify content type counts\n// =========================================================================\n\n#[test]\n#[ignore]\nfn crawl_siteone_content_type_counts() {\n    let _guard = SERIAL.lock().unwrap_or_else(|e| e.into_inner());\n\n    let mut args: Vec<&str> = vec![\"--url=https://crawler.siteone.io\", \"--output=json\"];\n    args.extend_from_slice(&GENTLE_FLAGS);\n    let json = run_crawler_json(&args);\n\n    let tables = &json[\"tables\"];\n    let ct = &tables[\"content-types\"];\n    let rows = ct[\"rows\"].as_array().expect(\"content-types rows\");\n\n    let find_count = |content_type: &str| -> i64 {\n        rows.iter()\n            .find(|r| r[\"contentType\"].as_str() == Some(content_type))\n            .and_then(|r| r[\"count\"].as_str())\n            .and_then(|s| s.parse().ok())\n            .unwrap_or(0)\n    };\n\n    let html_count = find_count(\"HTML\");\n    let js_count = find_count(\"JS\");\n    let css_count = find_count(\"CSS\");\n    let image_count = find_count(\"Image\");\n\n    // Verified baseline (March 2026): HTML=54, JS=5, CSS=3, Image=10\n    // Allow ±5 tolerance for HTML (site may add/remove pages)\n    assert!(\n        (49..=59).contains(&html_count),\n        \"Expected ~54 HTML pages, got {}\",\n        html_count\n    );\n    assert!((3..=8).contains(&js_count), \"Expected ~5 JS files, got {}\", js_count);\n    assert!((2..=6).contains(&css_count), \"Expected ~3 CSS files, got {}\", css_count);\n    assert!(\n        (7..=15).contains(&image_count),\n        \"Expected ~10 images, got {}\",\n        image_count\n    );\n\n    // Total URLs: ~73\n    let total_urls = json[\"stats\"][\"totalUrls\"].as_i64().expect(\"totalUrls\");\n    assert!(\n        (65..=85).contains(&total_urls),\n        \"Expected ~73 total URLs, got {}\",\n        total_urls\n    );\n\n    // Only 200 and 404 status codes expected\n    let status_counts = json[\"stats\"][\"countByStatus\"].as_object().expect(\"countByStatus\");\n    let count_200 = status_counts.get(\"200\").and_then(|v| v.as_i64()).unwrap_or(0);\n    let count_404 = status_counts.get(\"404\").and_then(|v| v.as_i64()).unwrap_or(0);\n    assert!(count_200 > 60, \"Expected >60 successful URLs, got {}\", count_200);\n    assert!(\n        count_404 >= 0 && count_404 <= 10,\n        \"Expected 0-10 404s, got {}\",\n        count_404\n    );\n\n    // Quality score should be reasonable\n    let overall_score = json[\"qualityScores\"][\"overall\"][\"score\"]\n        .as_f64()\n        .expect(\"overall score\");\n    assert!(\n        overall_score >= 7.0,\n        \"Expected overall score >= 7.0, got {}\",\n        overall_score\n    );\n}\n\n// =========================================================================\n// 2. Non-existent domain — verify exit code 3 and graceful handling\n// =========================================================================\n\n#[test]\n#[ignore]\nfn crawl_nonexistent_domain_exits_with_code_3() {\n    let _guard = SERIAL.lock().unwrap_or_else(|e| e.into_inner());\n\n    let domain = format!(\n        \"https://nonexistent-{}.invalid\",\n        std::time::SystemTime::now()\n            .duration_since(std::time::UNIX_EPOCH)\n            .unwrap()\n            .as_millis()\n    );\n\n    let output = run_crawler(&[\n        &format!(\"--url={}\", domain),\n        \"--single-page\",\n        \"--timeout=5\",\n        \"--http-cache-dir=\",\n    ]);\n\n    assert_eq!(\n        output.status.code(),\n        Some(3),\n        \"Expected exit code 3 for non-existent domain, got {:?}\",\n        output.status.code()\n    );\n}\n\n// =========================================================================\n// 3. Non-existent domain with --ci — verify exit code 10\n// =========================================================================\n\n#[test]\n#[ignore]\nfn crawl_nonexistent_domain_ci_exits_with_code_10() {\n    let _guard = SERIAL.lock().unwrap_or_else(|e| e.into_inner());\n\n    let domain = format!(\n        \"https://nonexistent-{}.invalid\",\n        std::time::SystemTime::now()\n            .duration_since(std::time::UNIX_EPOCH)\n            .unwrap()\n            .as_millis()\n    );\n\n    let output = run_crawler(&[\n        &format!(\"--url={}\", domain),\n        \"--single-page\",\n        \"--timeout=5\",\n        \"--ci\",\n        \"--http-cache-dir=\",\n    ]);\n\n    assert_eq!(\n        output.status.code(),\n        Some(10),\n        \"Expected exit code 10 for CI gate with no pages, got {:?}\",\n        output.status.code()\n    );\n}\n\n// =========================================================================\n// 4. Offline export — verify key pages exist and links are relative\n// =========================================================================\n\n#[test]\n#[ignore]\nfn crawl_siteone_offline_export() {\n    let _guard = SERIAL.lock().unwrap_or_else(|e| e.into_inner());\n\n    let tmp = TempDir::new(\"offline\");\n    let offline_dir = tmp.path.join(\"site\");\n\n    let mut args: Vec<&str> = vec![\"--url=https://crawler.siteone.io\", &\"--offline-export-dir=PLACEHOLDER\"];\n    // We need the offline_dir path as a string that lives long enough\n    let offline_dir_str = format!(\"--offline-export-dir={}\", offline_dir.display());\n    args = vec![\"--url=https://crawler.siteone.io\", &offline_dir_str];\n    args.extend_from_slice(&GENTLE_FLAGS);\n    let output = run_crawler(&args);\n\n    assert!(\n        output.status.success(),\n        \"Crawler failed with {:?}\",\n        output.status.code()\n    );\n\n    // Key pages must exist\n    assert!(offline_dir.join(\"index.html\").exists(), \"Missing index.html\");\n    assert!(\n        offline_dir.join(\"introduction/overview/index.html\").exists(),\n        \"Missing introduction/overview/index.html\"\n    );\n    assert!(\n        offline_dir\n            .join(\"features/seo-and-opengraph-analysis/index.html\")\n            .exists(),\n        \"Missing features/seo-and-opengraph-analysis/index.html\"\n    );\n\n    // Check relative links in index.html\n    let index_html = std::fs::read_to_string(offline_dir.join(\"index.html\")).expect(\"Failed to read index.html\");\n    // Should contain relative link to introduction/overview\n    assert!(\n        index_html.contains(\"introduction/overview/index.html\"),\n        \"index.html should contain relative link to introduction/overview/index.html\"\n    );\n    // Should contain relative CSS reference\n    assert!(\n        index_html.contains(\"_astro/index.BRwACyc2.css\") || index_html.contains(\"_astro/\"),\n        \"index.html should contain relative reference to CSS in _astro/\"\n    );\n    // Should NOT contain absolute https://crawler.siteone.io links for internal pages\n    // (external links like GitHub are OK)\n    let internal_absolute_links: Vec<&str> = index_html\n        .match_indices(\"href=\\\"https://crawler.siteone.io\")\n        .map(|(i, _)| &index_html[i..i.min(index_html.len()).min(i + 80)])\n        .collect();\n    assert!(\n        internal_absolute_links.is_empty(),\n        \"Offline index.html should not contain absolute links to crawler.siteone.io: {:?}\",\n        &internal_absolute_links[..internal_absolute_links.len().min(3)]\n    );\n\n    // Check links in a subpage point correctly back up\n    let overview_html = std::fs::read_to_string(offline_dir.join(\"introduction/overview/index.html\"))\n        .expect(\"Failed to read overview page\");\n    // From introduction/overview/ the root is ../../\n    assert!(\n        overview_html.contains(\"../../index.html\") || overview_html.contains(\"../../\"),\n        \"Overview page should have ../../ relative paths to root\"\n    );\n\n    // Verify CSS and JS assets exist\n    let has_css = std::fs::read_dir(offline_dir.join(\"_astro\"))\n        .map(|entries| {\n            entries\n                .filter_map(|e| e.ok())\n                .any(|e| e.path().extension().map(|ext| ext == \"css\").unwrap_or(false))\n        })\n        .unwrap_or(false);\n    assert!(has_css, \"Should have CSS files in _astro/\");\n}\n\n// =========================================================================\n// 5. Markdown export — verify pages and internal links use .md extension\n// =========================================================================\n\n#[test]\n#[ignore]\nfn crawl_siteone_markdown_export() {\n    let _guard = SERIAL.lock().unwrap_or_else(|e| e.into_inner());\n\n    let tmp = TempDir::new(\"markdown\");\n    let md_dir = tmp.path.join(\"md\");\n\n    let md_dir_str = format!(\"--markdown-export-dir={}\", md_dir.display());\n    let mut args: Vec<&str> = vec![\"--url=https://crawler.siteone.io\", &md_dir_str];\n    args.extend_from_slice(&GENTLE_FLAGS);\n    let output = run_crawler(&args);\n\n    assert!(\n        output.status.success(),\n        \"Crawler failed with {:?}\",\n        output.status.code()\n    );\n\n    // Key markdown files must exist\n    assert!(md_dir.join(\"index.md\").exists(), \"Missing index.md\");\n    assert!(\n        md_dir.join(\"introduction/overview/index.md\").exists(),\n        \"Missing introduction/overview/index.md\"\n    );\n    assert!(\n        md_dir.join(\"features/seo-and-opengraph-analysis/index.md\").exists(),\n        \"Missing features/seo-and-opengraph-analysis/index.md\"\n    );\n    assert!(\n        md_dir.join(\"configuration/command-line-options/index.md\").exists(),\n        \"Missing configuration/command-line-options/index.md\"\n    );\n\n    // Count total markdown files (baseline: ~51)\n    let md_count = walkdir(md_dir.as_path(), \"md\");\n    assert!(\n        (45..=60).contains(&md_count),\n        \"Expected ~51 markdown files, got {}\",\n        md_count\n    );\n\n    // Check internal links in overview page use .md extension\n    let overview_md = std::fs::read_to_string(md_dir.join(\"introduction/overview/index.md\"))\n        .expect(\"Failed to read overview markdown\");\n\n    // Internal links should be relative .md paths\n    assert!(\n        overview_md.contains(\"../../introduction/key-features/index.md\"),\n        \"Overview should link to key-features/index.md\"\n    );\n    assert!(\n        overview_md.contains(\"../../getting-started/quick-start-guide/index.md\"),\n        \"Overview should link to quick-start-guide/index.md\"\n    );\n\n    // External links should remain as absolute URLs\n    assert!(\n        overview_md.contains(\"https://github.com/\"),\n        \"External GitHub links should stay absolute\"\n    );\n\n    // Check index.md links\n    let index_md = std::fs::read_to_string(md_dir.join(\"index.md\")).expect(\"Failed to read index.md\");\n    // Internal links should use .md extension, not .html\n    // (index.html self-reference in nav logo is acceptable)\n    let html_internal_links: Vec<&str> = index_md\n        .lines()\n        .filter(|line| {\n            line.contains(\".html)\")\n                && !line.contains(\"http://\")\n                && !line.contains(\"https://\")\n                && !line.contains(\"index.html)\")\n        })\n        .collect();\n    assert!(\n        html_internal_links.is_empty(),\n        \"Markdown index.md should not have internal .html links: {:?}\",\n        &html_internal_links[..html_internal_links.len().min(3)]\n    );\n}\n\n/// Count files with given extension recursively.\nfn walkdir(dir: &Path, extension: &str) -> usize {\n    let mut count = 0;\n    if let Ok(entries) = std::fs::read_dir(dir) {\n        for entry in entries.flatten() {\n            let path = entry.path();\n            if path.is_dir() {\n                count += walkdir(&path, extension);\n            } else if path.extension().map(|e| e == extension).unwrap_or(false) {\n                count += 1;\n            }\n        }\n    }\n    count\n}\n\n// =========================================================================\n// 6. Single page crawl — verify only one HTML page is fetched\n// =========================================================================\n\n#[test]\n#[ignore]\nfn crawl_siteone_single_page() {\n    let _guard = SERIAL.lock().unwrap_or_else(|e| e.into_inner());\n\n    let json = run_crawler_json(&[\n        \"--url=https://crawler.siteone.io\",\n        \"--single-page\",\n        \"--output=json\",\n        \"--workers=2\",\n        \"--max-reqs-per-sec=5\",\n        \"--http-cache-dir=\",\n    ]);\n\n    let tables = &json[\"tables\"];\n    let ct = &tables[\"content-types\"];\n    let rows = ct[\"rows\"].as_array().expect(\"content-types rows\");\n\n    let html_count: i64 = rows\n        .iter()\n        .find(|r| r[\"contentType\"].as_str() == Some(\"HTML\"))\n        .and_then(|r| r[\"count\"].as_str())\n        .and_then(|s| s.parse().ok())\n        .unwrap_or(0);\n\n    assert_eq!(html_count, 1, \"Single page should crawl exactly 1 HTML page\");\n\n    // Should still fetch assets (JS, CSS, images)\n    let total_urls = json[\"stats\"][\"totalUrls\"].as_i64().unwrap_or(0);\n    assert!(\n        total_urls > 1,\n        \"Single page should still fetch assets, got {} total URLs\",\n        total_urls\n    );\n}\n\n// =========================================================================\n// 7. --version and --help flags\n// =========================================================================\n\n#[test]\nfn version_flag_exits_with_code_2() {\n    let output = run_crawler(&[\"--version\"]);\n    assert_eq!(output.status.code(), Some(2));\n    let stdout = String::from_utf8_lossy(&output.stdout);\n    assert!(stdout.contains(\"Version:\"), \"Expected version output, got: {}\", stdout);\n}\n\n#[test]\nfn help_flag_exits_with_code_2() {\n    let output = run_crawler(&[\"--help\"]);\n    assert_eq!(output.status.code(), Some(2));\n    let stdout = String::from_utf8_lossy(&output.stdout);\n    assert!(\n        stdout.contains(\"--url\") && stdout.contains(\"--output\"),\n        \"Help should list --url and --output options\"\n    );\n}\n\n// =========================================================================\n// 8. Invalid option — verify error and exit code 101\n// =========================================================================\n\n#[test]\nfn invalid_option_exits_with_code_101() {\n    let output = run_crawler(&[\"--url=https://example.com\", \"--nonexistent-option=foo\"]);\n    assert_eq!(\n        output.status.code(),\n        Some(101),\n        \"Expected exit code 101 for unknown option\"\n    );\n    let stderr = String::from_utf8_lossy(&output.stderr);\n    assert!(\n        stderr.contains(\"Unknown options: --nonexistent-option=foo\"),\n        \"Error should mention the unknown option, got: {}\",\n        stderr\n    );\n}\n\n#[test]\nfn unknown_option_after_bool_flag_detected() {\n    // Regression: bool flags (--ci, --single-page, --debug) must NOT consume\n    // the next argument as their \"value\", otherwise unknown options get skipped.\n    let output = run_crawler(&[\"--url=https://example.com\", \"--ci\", \"--no-cach\"]);\n    assert_eq!(\n        output.status.code(),\n        Some(101),\n        \"Expected exit code 101 for --no-cach after --ci\"\n    );\n    let stderr = String::from_utf8_lossy(&output.stderr);\n    assert!(\n        stderr.contains(\"--no-cach\"),\n        \"Error should mention --no-cach, got: {}\",\n        stderr\n    );\n}\n\n#[test]\nfn unknown_option_typo_without_value() {\n    let output = run_crawler(&[\"--url=https://example.com\", \"--signle-page\"]);\n    assert_eq!(\n        output.status.code(),\n        Some(101),\n        \"Expected exit code 101 for misspelled --signle-page\"\n    );\n    let stderr = String::from_utf8_lossy(&output.stderr);\n    assert!(\n        stderr.contains(\"--signle-page\"),\n        \"Error should mention --signle-page, got: {}\",\n        stderr\n    );\n}\n\n// =========================================================================\n// --html-to-markdown: standalone HTML-to-Markdown conversion (no network)\n// =========================================================================\n\n#[test]\nfn html_to_markdown_basic_conversion() {\n    let tmp = TempDir::new(\"htm-convert\");\n    let html_path = tmp.path.join(\"page.html\");\n    std::fs::write(\n        &html_path,\n        \"<html><body><h1>Hello World</h1><p>Paragraph with <strong>bold</strong> text.</p>\\\n         <ul><li>Item 1</li><li>Item 2</li></ul></body></html>\",\n    )\n    .unwrap();\n\n    let output = run_crawler(&[&format!(\"--html-to-markdown={}\", html_path.display())]);\n    assert!(output.status.success(), \"Should exit 0\");\n\n    let stdout = String::from_utf8_lossy(&output.stdout);\n    assert!(stdout.contains(\"# Hello World\"), \"Should contain h1: {}\", stdout);\n    assert!(stdout.contains(\"**bold**\"), \"Should contain bold: {}\", stdout);\n    assert!(stdout.contains(\"- Item 1\"), \"Should contain list: {}\", stdout);\n    assert!(stdout.contains(\"- Item 2\"), \"Should contain list item 2: {}\", stdout);\n}\n\n#[test]\nfn html_to_markdown_output_to_file() {\n    let tmp = TempDir::new(\"htm-output\");\n    let html_path = tmp.path.join(\"input.html\");\n    let md_path = tmp.path.join(\"output.md\");\n    std::fs::write(&html_path, \"<html><body><h1>Title</h1><p>Content</p></body></html>\").unwrap();\n\n    let output = run_crawler(&[\n        &format!(\"--html-to-markdown={}\", html_path.display()),\n        &format!(\"--html-to-markdown-output={}\", md_path.display()),\n    ]);\n    assert!(output.status.success(), \"Should exit 0\");\n    assert!(md_path.exists(), \"Output file should exist\");\n\n    let md_content = std::fs::read_to_string(&md_path).unwrap();\n    assert!(\n        md_content.contains(\"# Title\"),\n        \"Output file should contain heading: {}\",\n        md_content\n    );\n\n    // stdout should be empty (output went to file)\n    assert!(output.stdout.is_empty(), \"stdout should be empty when writing to file\");\n\n    // status message should be on stderr\n    let stderr = String::from_utf8_lossy(&output.stderr);\n    assert!(\n        stderr.contains(\"Markdown written to\"),\n        \"stderr should contain success message: {}\",\n        stderr\n    );\n}\n\n#[test]\nfn html_to_markdown_nonexistent_file() {\n    let output = run_crawler(&[\"--html-to-markdown=/tmp/siteone_nonexistent_file_12345.html\"]);\n    assert_eq!(output.status.code(), Some(101), \"Should exit 101 for nonexistent file\");\n    let stderr = String::from_utf8_lossy(&output.stderr);\n    assert!(\n        stderr.contains(\"does not exist\"),\n        \"Error should mention file doesn't exist: {}\",\n        stderr\n    );\n}\n\n#[test]\nfn html_to_markdown_with_disable_images() {\n    let tmp = TempDir::new(\"htm-no-img\");\n    let html_path = tmp.path.join(\"page.html\");\n    std::fs::write(\n        &html_path,\n        \"<html><body><h1>Title</h1><img src=\\\"photo.jpg\\\" alt=\\\"Photo\\\"><p>Text</p></body></html>\",\n    )\n    .unwrap();\n\n    let output = run_crawler(&[\n        &format!(\"--html-to-markdown={}\", html_path.display()),\n        \"--markdown-disable-images\",\n    ]);\n    assert!(output.status.success());\n\n    let stdout = String::from_utf8_lossy(&output.stdout);\n    assert!(!stdout.contains(\"photo.jpg\"), \"Images should be removed: {}\", stdout);\n    assert!(stdout.contains(\"# Title\"));\n    assert!(stdout.contains(\"Text\"));\n}\n\n#[test]\nfn html_to_markdown_preserves_original_links() {\n    let tmp = TempDir::new(\"htm-links\");\n    let html_path = tmp.path.join(\"page.html\");\n    std::fs::write(\n        &html_path,\n        r#\"<html><body><h1>Title</h1><a href=\"/about.html\">About</a>\n           <a href=\"https://example.com\">External</a>\n           <a href=\"tel:+420123456\">Call</a></body></html>\"#,\n    )\n    .unwrap();\n\n    let output = run_crawler(&[&format!(\"--html-to-markdown={}\", html_path.display())]);\n    assert!(output.status.success());\n\n    let stdout = String::from_utf8_lossy(&output.stdout);\n    // Links should NOT be rewritten to .md (standalone mode)\n    assert!(\n        stdout.contains(\"/about.html\"),\n        \"HTML links should be preserved as-is: {}\",\n        stdout\n    );\n    assert!(\n        stdout.contains(\"https://example.com\"),\n        \"External links preserved: {}\",\n        stdout\n    );\n    assert!(stdout.contains(\"tel:+420123456\"), \"Tel links preserved: {}\", stdout);\n}\n\n#[test]\nfn html_to_markdown_with_exclude_selector() {\n    let tmp = TempDir::new(\"htm-exclude\");\n    let html_path = tmp.path.join(\"page.html\");\n    std::fs::write(\n        &html_path,\n        \"<html><body><h1>Title</h1><nav><a href=\\\"/\\\">Home</a></nav><p>Main content</p></body></html>\",\n    )\n    .unwrap();\n\n    let output = run_crawler(&[\n        &format!(\"--html-to-markdown={}\", html_path.display()),\n        \"--markdown-exclude-selector=nav\",\n    ]);\n    assert!(output.status.success());\n\n    let stdout = String::from_utf8_lossy(&output.stdout);\n    assert!(stdout.contains(\"Main content\"), \"Content should be present: {}\", stdout);\n    assert!(!stdout.contains(\"Home\"), \"Nav should be excluded: {}\", stdout);\n}\n\n#[test]\nfn html_to_markdown_aria_hidden_excluded() {\n    let tmp = TempDir::new(\"htm-aria\");\n    let html_path = tmp.path.join(\"page.html\");\n    std::fs::write(\n        &html_path,\n        r#\"<html><body><h1>Title</h1>\n           <div aria-hidden=\"true\"><p>Hidden mega menu</p></div>\n           <p>Visible content</p></body></html>\"#,\n    )\n    .unwrap();\n\n    let output = run_crawler(&[&format!(\"--html-to-markdown={}\", html_path.display())]);\n    assert!(output.status.success());\n\n    let stdout = String::from_utf8_lossy(&output.stdout);\n    assert!(stdout.contains(\"Visible content\"));\n    assert!(\n        !stdout.contains(\"Hidden mega menu\"),\n        \"aria-hidden should be excluded: {}\",\n        stdout\n    );\n}\n\n#[test]\nfn html_to_markdown_output_without_input_fails() {\n    let output = run_crawler(&[\"--html-to-markdown-output=/tmp/out.md\"]);\n    assert_eq!(\n        output.status.code(),\n        Some(101),\n        \"Should exit 101 when output is set without input\"\n    );\n    let stderr = String::from_utf8_lossy(&output.stderr);\n    assert!(\n        stderr.contains(\"--html-to-markdown-output requires --html-to-markdown\"),\n        \"Should mention missing input: {}\",\n        stderr\n    );\n}\n\n#[test]\nfn html_to_markdown_with_move_before_h1() {\n    let tmp = TempDir::new(\"htm-move-h1\");\n    let html_path = tmp.path.join(\"page.html\");\n    std::fs::write(\n        &html_path,\n        \"<html><body><nav><a href=\\\"/\\\">Home</a><a href=\\\"/about\\\">About</a></nav>\\\n         <h1>Main Title</h1><p>Page body</p></body></html>\",\n    )\n    .unwrap();\n\n    let output = run_crawler(&[\n        &format!(\"--html-to-markdown={}\", html_path.display()),\n        \"--markdown-move-content-before-h1-to-end\",\n    ]);\n    assert!(output.status.success());\n\n    let stdout = String::from_utf8_lossy(&output.stdout);\n    assert!(\n        stdout.starts_with(\"# Main Title\"),\n        \"Should start with h1 heading: {}\",\n        stdout\n    );\n    assert!(stdout.contains(\"Page body\"));\n}\n"
  }
]