Repository: hahwul/deadfinder Branch: main Commit: a8d3f5e6f12c Files: 97 Total size: 250.9 KB Directory structure: gitextract_ukz2km7y/ ├── .dockerignore ├── .github/ │ ├── FUNDING.yml │ ├── dependabot.yml │ ├── labeler.yml │ └── workflows/ │ ├── ci.yml │ ├── compat.yml │ ├── contributors.yml │ ├── crystal-release.yml │ ├── docker-build.yml │ ├── docker-ghcr.yml │ ├── docs.yml │ ├── goyo-update.yml │ ├── labeler.yml │ ├── publish-snapcraft.yml │ ├── release-apk.yml │ ├── release-aur.yml │ ├── release-deb.yml │ ├── release-major-tag.yml │ ├── release-rpm.yml │ └── release-sbom.yml ├── .gitignore ├── AGENTS.md ├── CHANGELOG.md ├── Dockerfile ├── LICENSE ├── README.md ├── SECURITY.md ├── action.yml ├── aur/ │ └── PKGBUILD ├── docs/ │ ├── AGENTS.md │ ├── config.toml │ ├── content/ │ │ ├── about.md │ │ ├── docs/ │ │ │ ├── _index.md │ │ │ ├── getting-started/ │ │ │ │ ├── _index.md │ │ │ │ ├── installation.md │ │ │ │ └── quickstart.md │ │ │ ├── integration/ │ │ │ │ ├── _index.md │ │ │ │ ├── docker.md │ │ │ │ └── github-action.md │ │ │ ├── reference/ │ │ │ │ ├── _index.md │ │ │ │ └── cli-flags.md │ │ │ └── usage/ │ │ │ ├── _index.md │ │ │ ├── filtering.md │ │ │ ├── output-formats.md │ │ │ └── subcommands.md │ │ └── index.md │ ├── static/ │ │ ├── CNAME │ │ ├── css/ │ │ │ └── style.css │ │ ├── icons/ │ │ │ └── site.webmanifest │ │ └── js/ │ │ └── search.js │ └── templates/ │ ├── 404.html │ ├── footer.html │ ├── header.html │ ├── page.html │ ├── section.html │ ├── shortcodes/ │ │ └── alert.html │ ├── taxonomy.html │ └── taxonomy_term.html ├── flake.nix ├── github-action/ │ └── README.md ├── justfile ├── scripts/ │ ├── version_check.cr │ └── version_update.cr ├── shard.yml ├── shards.nix ├── snap/ │ └── snapcraft.yaml ├── spec/ │ ├── compat/ │ │ ├── README.md │ │ ├── fixtures/ │ │ │ └── server.rb │ │ ├── golden/ │ │ │ ├── file_json.json │ │ │ ├── pipe_json.json │ │ │ ├── url_csv.csv │ │ │ ├── url_json.json │ │ │ ├── url_json_include30x.json │ │ │ ├── url_toml.toml │ │ │ └── url_yaml.yaml │ │ └── run.rb │ ├── deadfinder/ │ │ ├── cli_spec.cr │ │ ├── http_client_spec.cr │ │ ├── logger_spec.cr │ │ ├── runner_spec.cr │ │ ├── url_pattern_matcher_spec.cr │ │ ├── utils_spec.cr │ │ └── visualizer_spec.cr │ ├── deadfinder_spec.cr │ └── spec_helper.cr └── src/ ├── cli_main.cr ├── deadfinder/ │ ├── cli.cr │ ├── completion.cr │ ├── http_client.cr │ ├── logger.cr │ ├── runner.cr │ ├── types.cr │ ├── url_pattern_matcher.cr │ ├── utils.cr │ ├── version.cr │ └── visualizer.cr └── deadfinder.cr ================================================ FILE CONTENTS ================================================ ================================================ FILE: .dockerignore ================================================ .git .github docs examples github-action spec tmp coverage lib deadfinder AGENTS.md README.md SECURITY.md action.yml ================================================ FILE: .github/FUNDING.yml ================================================ github: hahwul ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: github-actions directory: / schedule: interval: weekly - package-ecosystem: docker directory: / schedule: interval: weekly - package-ecosystem: bundler directory: "/" schedule: interval: weekly target-branch: "main" ================================================ FILE: .github/labeler.yml ================================================ --- config: - changed-files: - any-glob-to-any-file: - shard.yml - shard.lock - .github/labeler.yml dependencies: - changed-files: - any-glob-to-any-file: - shard.yml - shard.lock workflow: - changed-files: - any-glob-to-any-file: - .github/workflows/** - .github/labeler.yml github-action: - changed-files: - any-glob-to-any-file: - action.yml docker: - changed-files: - any-glob-to-any-file: - Dockerfile - .dockerignore - .github/workflows/docker-ghcr.yml - .github/workflows/docker-build.yml code: - changed-files: - any-glob-to-any-file: - src/** - spec/** documentation: - changed-files: - any-glob-to-any-file: - README.md - CHANGELOG.md - AGENTS.md - SECURITY.md - docs/** ================================================ FILE: .github/workflows/ci.yml ================================================ --- name: CI on: push: branches: [main] pull_request: branches: [main] jobs: spec: runs-on: ubuntu-latest strategy: fail-fast: false matrix: crystal-version: ["1.19.1", "1.20.0"] steps: - uses: actions/checkout@v6 - name: Set up Crystal ${{ matrix.crystal-version }} uses: crystal-lang/install-crystal@v1 with: crystal: ${{ matrix.crystal-version }} - name: Install cmake (lexbor dependency) run: sudo apt-get update && sudo apt-get install -y cmake - name: Install shards run: shards install - name: Run crystal spec run: crystal spec format: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - uses: crystal-lang/install-crystal@v1 - name: Check formatting run: crystal tool format --check src spec ================================================ FILE: .github/workflows/compat.yml ================================================ --- name: Compat Tests on: push: branches: [main] pull_request: branches: [main] jobs: compat: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Set up Ruby (harness driver) uses: ruby/setup-ruby@v1 with: ruby-version: '3.4' bundler-cache: false - name: Install harness Ruby deps run: gem install --no-document toml-rb - name: Set up Crystal uses: crystal-lang/install-crystal@v1 - name: Install cmake (for lexbor) run: sudo apt-get update && sudo apt-get install -y cmake - name: Build Crystal binary run: | shards install crystal build src/cli_main.cr -o deadfinder --release - name: Compat — Crystal implementation env: BIN: ./deadfinder run: ruby spec/compat/run.rb ================================================ FILE: .github/workflows/contributors.yml ================================================ --- name: Contributors on: push: branches: [main] workflow_dispatch: inputs: logLevel: description: manual run required: false default: '' permissions: contents: write pull-requests: write jobs: contributors: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - uses: wow-actions/contributors-list@v1.2.1 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} round: false includeBots: false svgPath: docs/static/images/CONTRIBUTORS.svg noCommit: true - uses: peter-evans/create-pull-request@v8.1.1 with: token: ${{ secrets.GITHUB_TOKEN }} commit-message: "chore: update contributors" title: "chore: update contributors" body: Automated update of `docs/static/images/CONTRIBUTORS.svg`. branch: chore/update-contributors delete-branch: true ================================================ FILE: .github/workflows/crystal-release.yml ================================================ --- name: Crystal Release Builds on: release: types: [published] workflow_dispatch: permissions: contents: write env: CRYSTAL_BUILD_IMAGE: crystallang/crystal:1.19.1-alpine jobs: build-linux: strategy: fail-fast: false matrix: include: - arch: x86_64 runs-on: ubuntu-latest - arch: aarch64 runs-on: ubuntu-24.04-arm runs-on: ${{ matrix.runs-on }} steps: - uses: actions/checkout@v6 - name: Build static binary (Alpine / musl) run: | docker run --rm -v "$PWD":/workspace -w /workspace \ ${{ env.CRYSTAL_BUILD_IMAGE }} \ sh -c 'apk add --no-cache cmake make g++ \ && shards install \ && crystal build src/cli_main.cr -o deadfinder --release --static --no-debug' - name: Package run: | # Docker container ran as root, so the binary lands as root-owned. # Reclaim ownership before chmod, otherwise it fails with EPERM. sudo chown "$(id -u):$(id -g)" deadfinder chmod +x deadfinder tar czf deadfinder-linux-${{ matrix.arch }}.tar.gz deadfinder sha256sum deadfinder-linux-${{ matrix.arch }}.tar.gz > deadfinder-linux-${{ matrix.arch }}.tar.gz.sha256 - name: Upload to release if: github.event_name == 'release' uses: softprops/action-gh-release@v3 with: files: | deadfinder-linux-${{ matrix.arch }}.tar.gz deadfinder-linux-${{ matrix.arch }}.tar.gz.sha256 - name: Upload as workflow artifact if: github.event_name == 'workflow_dispatch' uses: actions/upload-artifact@v7 with: name: deadfinder-linux-${{ matrix.arch }} path: | deadfinder-linux-${{ matrix.arch }}.tar.gz deadfinder-linux-${{ matrix.arch }}.tar.gz.sha256 build-macos: # macOS x86_64 (macos-13) is no longer built — Apple's Intel transition # has shrunk GitHub's macos-13 runner pool to the point where releases # routinely sit in the queue indefinitely. Apple Silicon binaries cover # current macOS users; Intel users can `brew install` from source or run # the Apple Silicon binary under Rosetta. strategy: fail-fast: false matrix: include: - arch: arm64 runs-on: macos-latest runs-on: ${{ matrix.runs-on }} steps: - uses: actions/checkout@v6 - name: Install Crystal and cmake run: brew install crystal cmake - name: Build release binary run: | shards install crystal build src/cli_main.cr -o deadfinder --release --no-debug - name: Package run: | chmod +x deadfinder tar czf deadfinder-macos-${{ matrix.arch }}.tar.gz deadfinder shasum -a 256 deadfinder-macos-${{ matrix.arch }}.tar.gz > deadfinder-macos-${{ matrix.arch }}.tar.gz.sha256 - name: Upload to release if: github.event_name == 'release' uses: softprops/action-gh-release@v3 with: files: | deadfinder-macos-${{ matrix.arch }}.tar.gz deadfinder-macos-${{ matrix.arch }}.tar.gz.sha256 - name: Upload as workflow artifact if: github.event_name == 'workflow_dispatch' uses: actions/upload-artifact@v7 with: name: deadfinder-macos-${{ matrix.arch }} path: | deadfinder-macos-${{ matrix.arch }}.tar.gz deadfinder-macos-${{ matrix.arch }}.tar.gz.sha256 ================================================ FILE: .github/workflows/docker-build.yml ================================================ --- name: Docker Build CI on: pull_request: branches: [main] push: branches: [main] workflow_dispatch: jobs: build-docker: strategy: fail-fast: false matrix: include: - arch: linux/amd64 runner: ubuntu-latest - arch: linux/arm64 runner: ubuntu-24.04-arm runs-on: ${{ matrix.runner }} steps: - uses: actions/checkout@v6 - name: Setup Docker buildx uses: docker/setup-buildx-action@v4 - name: Prepare platform slug id: platform run: echo "slug=$(echo '${{ matrix.arch }}' | tr '/' '-')" >> "$GITHUB_OUTPUT" - name: Extract Docker metadata id: meta uses: docker/metadata-action@v6 with: images: ghcr.io/${{ github.repository }} - name: Build Docker image uses: docker/build-push-action@v7 with: context: . push: false tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} platforms: ${{ matrix.arch }} cache-from: type=gha,scope=build-${{ steps.platform.outputs.slug }} cache-to: type=gha,mode=max,scope=build-${{ steps.platform.outputs.slug }} ================================================ FILE: .github/workflows/docker-ghcr.yml ================================================ --- name: GHCR Publish on: push: branches: [main] release: types: [published] workflow_dispatch: inputs: version: description: Version to build and tag (e.g., 2.0.0) required: true type: string env: REGISTRY: ghcr.io IMAGE_NAME: ${{ github.repository }} jobs: build: runs-on: ubuntu-latest permissions: contents: read packages: write id-token: write strategy: fail-fast: false matrix: platform: [linux/amd64, linux/arm64] steps: - uses: actions/checkout@v6 - name: Set up QEMU uses: docker/setup-qemu-action@v4 - name: Setup Docker buildx uses: docker/setup-buildx-action@v4 - name: Log into ${{ env.REGISTRY }} if: github.event_name != 'pull_request' uses: docker/login-action@v4 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Extract Docker metadata id: meta uses: docker/metadata-action@v6 with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - name: Prepare platform slug id: platform run: echo "slug=$(echo '${{ matrix.platform }}' | tr '/' '-')" >> "$GITHUB_OUTPUT" - name: Build and push by digest id: build uses: docker/build-push-action@v7 with: context: . platforms: ${{ matrix.platform }} labels: ${{ steps.meta.outputs.labels }} outputs: type=image,name=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true cache-from: type=gha,scope=ghcr-${{ steps.platform.outputs.slug }} cache-to: type=gha,mode=max,scope=ghcr-${{ steps.platform.outputs.slug }} # push-by-digest only pushes the image manifest; provenance wraps it in # a manifest list, so the reported digest would point at a list that # was never pushed and the merge step fails with "not found". provenance: false sbom: false - name: Export digest run: | mkdir -p /tmp/digests digest="${{ steps.build.outputs.digest }}" touch "/tmp/digests/${digest#sha256:}" - name: Upload digest uses: actions/upload-artifact@v7 with: name: digests-${{ steps.platform.outputs.slug }} path: /tmp/digests/* if-no-files-found: error retention-days: 1 merge: runs-on: ubuntu-latest needs: build permissions: contents: read packages: write steps: - name: Download digests uses: actions/download-artifact@v8 with: path: /tmp/digests pattern: digests-* merge-multiple: true - name: Setup Docker buildx uses: docker/setup-buildx-action@v4 - name: Log into ${{ env.REGISTRY }} uses: docker/login-action@v4 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Normalize dispatch version if: github.event_name == 'workflow_dispatch' id: normalize run: | RAW_VERSION="${{ inputs.version }}" VERSION="${RAW_VERSION#v}" echo "version=$VERSION" >> "$GITHUB_OUTPUT" - name: Extract Docker metadata (tags) id: meta uses: docker/metadata-action@v6 with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} tags: | type=ref,event=branch type=semver,pattern={{version}},enable=${{ github.event_name == 'release' }} type=semver,pattern={{major}}.{{minor}},enable=${{ github.event_name == 'release' }} type=raw,value=latest,enable=${{ github.event_name == 'release' }} type=raw,value=${{ steps.normalize.outputs.version }},enable=${{ github.event_name == 'workflow_dispatch' }} type=raw,value=latest,enable=${{ github.event_name == 'workflow_dispatch' }} - name: Create manifest list and push working-directory: /tmp/digests run: | docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ $(printf '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}@sha256:%s ' *) - name: Inspect image run: docker buildx imagetools inspect ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.meta.outputs.version }} cleanup: runs-on: ubuntu-latest needs: [build, merge] if: always() && needs.build.result == 'success' permissions: packages: write steps: # The build matrix pushes per-platform digests with push-by-digest=true, # which leaves untagged manifests in GHCR after the merge job assembles # the multi-arch manifest list. Prune them so only tagged versions # (main, latest, semver) remain — run even if merge fails so orphaned # per-platform digests don't accumulate in the package listing. - name: Delete untagged GHCR versions uses: actions/delete-package-versions@v5 with: package-name: deadfinder package-type: container delete-only-untagged-versions: 'true' min-versions-to-keep: 0 ================================================ FILE: .github/workflows/docs.yml ================================================ --- name: Docs CI/CD on: push: branches: [main] paths: - "docs/**" - ".github/workflows/docs.yml" pull_request: branches: [main] paths: - "docs/**" - ".github/workflows/docs.yml" workflow_dispatch: permissions: contents: write jobs: build: if: github.event_name == 'pull_request' runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Build (no deploy) uses: hahwul/hwaro@main with: build_dir: "docs" build_only: true deploy: if: github.event_name != 'pull_request' runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Build and deploy to GitHub Pages uses: hahwul/hwaro@main with: build_dir: "docs" token: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .github/workflows/goyo-update.yml ================================================ name: Update Goyo Theme on: schedule: # Run every Monday at 9:00 AM UTC - cron: "0 9 * * 1" workflow_dispatch: # Allow manual trigger env: GIT_USER_NAME: "hahwul" GIT_USER_EMAIL: "hahwul@gmail.com" THEME_PATH: "docs/themes/goyo" jobs: update-theme: runs-on: ubuntu-latest permissions: contents: write pull-requests: write steps: - name: Checkout repository uses: actions/checkout@v6 with: submodules: true token: ${{ secrets.GITHUB_TOKEN }} - name: Update Goyo submodule id: update run: | git config user.name "${{ env.GIT_USER_NAME }}" git config user.email "${{ env.GIT_USER_EMAIL }}" # Get current commit hash OLD_COMMIT=$(git rev-parse HEAD:${{ env.THEME_PATH }}) # Update submodule to latest git submodule update --remote ${{ env.THEME_PATH }} git add ${{ env.THEME_PATH }} # Get new commit hash NEW_COMMIT=$(git --git-dir=${{ env.THEME_PATH }}/.git rev-parse HEAD) # Check if there are changes if [ "$OLD_COMMIT" != "$NEW_COMMIT" ]; then echo "updated=true" >> $GITHUB_OUTPUT echo "old_commit=$OLD_COMMIT" >> $GITHUB_OUTPUT echo "new_commit=$NEW_COMMIT" >> $GITHUB_OUTPUT else echo "updated=false" >> $GITHUB_OUTPUT fi - name: Create Pull Request if: steps.update.outputs.updated == 'true' uses: peter-evans/create-pull-request@v8 with: token: ${{ secrets.GITHUB_TOKEN }} commit-message: "Update Goyo theme to latest version" title: "Update Goyo theme" body: | This PR updates the Goyo theme to the latest version. **Changes:** ${{ steps.update.outputs.old_commit }} → ${{ steps.update.outputs.new_commit }} Please review the [Goyo changelog](https://github.com/hahwul/goyo/releases) for details on what's new. --- *This PR was automatically created by the Update Goyo Theme workflow.* branch: update-goyo-theme delete-branch: true labels: dependencies, documentation ================================================ FILE: .github/workflows/labeler.yml ================================================ --- name: Pull Request Labeler on: [pull_request_target] jobs: labeler: permissions: contents: read pull-requests: write runs-on: ubuntu-latest steps: - uses: actions/labeler@v6 ================================================ FILE: .github/workflows/publish-snapcraft.yml ================================================ --- name: Snapcraft Publish on: release: types: [published] workflow_dispatch: inputs: logLevel: description: Log level required: true default: warning tags: description: Test scenario tags jobs: snapcraft-releaser: runs-on: ubuntu-latest strategy: fail-fast: false matrix: platform: - amd64 steps: - uses: actions/checkout@v6 - name: Build snap id: build uses: canonical/action-build@v1 - name: Publish snap to the stable channel if: github.event_name == 'release' uses: snapcore/action-publish@master env: SNAPCRAFT_STORE_CREDENTIALS: ${{ secrets.SNAP_STORE_LOGIN }} with: snap: ${{ steps.build.outputs.snap }} release: stable - name: Upload snap as workflow artifact if: github.event_name == 'workflow_dispatch' uses: actions/upload-artifact@v7 with: name: deadfinder-snap-${{ matrix.platform }} path: ${{ steps.build.outputs.snap }} ================================================ FILE: .github/workflows/release-apk.yml ================================================ --- name: Build and Release .apk Package on: workflow_dispatch: inputs: version: description: "Version to build (e.g., 2.0.0)" required: true type: string upload_to_release: description: "Upload .apk to GitHub Release (requires existing tag)" required: false type: boolean default: false workflow_run: workflows: ["Crystal Release Builds"] types: [completed] permissions: contents: write jobs: build-apk: if: >- github.event_name == 'workflow_dispatch' || (github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'release') strategy: fail-fast: false matrix: include: - arch: x86_64 asset_arch: x86_64 - arch: aarch64 asset_arch: aarch64 runs-on: ubuntu-latest container: image: alpine:latest steps: - name: Install build tools run: apk add --no-cache alpine-sdk sudo github-cli git - name: Trust workspace run: git config --global --add safe.directory "$GITHUB_WORKSPACE" - uses: actions/checkout@v6 with: ref: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.version || github.event.workflow_run.head_branch }} - name: Resolve version run: | if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then RAW="${{ github.event.inputs.version }}" else RAW="${{ github.event.workflow_run.head_branch }}" fi VERSION="${RAW#v}" echo "VERSION=$VERSION" >> "$GITHUB_ENV" - name: Download prebuilt binary env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | gh release download "${{ env.VERSION }}" \ --pattern "deadfinder-linux-${{ matrix.asset_arch }}.tar.gz" \ --output deadfinder.tar.gz tar xzf deadfinder.tar.gz chmod +x deadfinder - name: Setup abuild run: | adduser -D builder addgroup builder abuild echo "builder ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers sudo -u builder abuild-keygen -ain - name: Create APKBUILD run: | mkdir -p /home/builder/deadfinder cp deadfinder /home/builder/deadfinder/ cp LICENSE /home/builder/deadfinder/ cat > /home/builder/deadfinder/APKBUILD < pkgname=deadfinder pkgver=${{ env.VERSION }} pkgrel=0 pkgdesc="Find dead (broken) links in web pages, URL lists, and sitemaps." url="https://github.com/hahwul/deadfinder" arch="${{ matrix.arch }}" license="MIT" source="" options="!check !strip !tracedeps" package() { install -Dm755 "\$srcdir/../deadfinder" "\$pkgdir/usr/bin/deadfinder" install -Dm644 "\$srcdir/../LICENSE" "\$pkgdir/usr/share/licenses/\$pkgname/LICENSE" } APKEOF sed -i 's/^ //' /home/builder/deadfinder/APKBUILD chown -R builder:builder /home/builder/deadfinder - name: Build .apk run: | cd /home/builder/deadfinder sudo -u builder -H CARCH=${{ matrix.arch }} abuild -F checksum sudo -u builder -H CARCH=${{ matrix.arch }} abuild -Fr - name: Collect artifacts run: | mkdir -p output # abuild emits deadfinder-${VERSION}-r${pkgrel}.apk without arch in # the filename, so rename it to include the arch and avoid x86_64 / # aarch64 jobs overwriting each other on the GitHub Release. for src in $(find /home/builder/packages -name "*.apk" ! -name "APKINDEX*"); do cp "$src" "output/deadfinder-${{ env.VERSION }}-${{ matrix.arch }}.apk" done ls -la output/ - name: Upload artifact uses: actions/upload-artifact@v7 with: name: deadfinder-${{ env.VERSION }}-${{ matrix.arch }}.apk path: output/*.apk - name: Upload .apk to Release if: github.event_name == 'workflow_run' || (github.event_name == 'workflow_dispatch' && inputs.upload_to_release) env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | for f in output/*.apk; do gh release upload "${{ env.VERSION }}" "$f" --clobber done ================================================ FILE: .github/workflows/release-aur.yml ================================================ --- name: Publish AUR Package on: workflow_dispatch: inputs: version: description: "Version to publish (e.g., 2.0.0)" required: true type: string workflow_run: workflows: ["Crystal Release Builds"] types: [completed] jobs: publish-aur: if: >- github.event_name == 'workflow_dispatch' || (github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'release') runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 with: ref: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.version || github.event.workflow_run.head_branch }} - name: Resolve version run: | if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then RAW="${{ github.event.inputs.version }}" else RAW="${{ github.event.workflow_run.head_branch }}" fi VERSION="${RAW#v}" echo "VERSION=$VERSION" >> "$GITHUB_ENV" - name: Update PKGBUILD run: | sed -i "s/^pkgver=.*/pkgver=${{ env.VERSION }}/" aur/PKGBUILD sed -i "s/^pkgrel=.*/pkgrel=1/" aur/PKGBUILD cat aur/PKGBUILD - name: Publish to AUR uses: KSXGitHub/github-actions-deploy-aur@v4.1.3 with: pkgname: deadfinder pkgbuild: aur/PKGBUILD commit_username: hahwul commit_email: hahwul@gmail.com ssh_private_key: ${{ secrets.AUR_SSH_PRIVATE_KEY }} ================================================ FILE: .github/workflows/release-deb.yml ================================================ --- name: Build and Release .deb Package on: workflow_dispatch: inputs: version: description: "Version to build (e.g., 2.0.0)" required: true type: string upload_to_release: description: "Upload .deb to GitHub Release (requires existing tag)" required: false type: boolean default: false workflow_run: workflows: ["Crystal Release Builds"] types: [completed] permissions: contents: write jobs: build-deb: if: >- github.event_name == 'workflow_dispatch' || (github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'release') strategy: fail-fast: false matrix: include: - arch: amd64 asset_arch: x86_64 - arch: arm64 asset_arch: aarch64 runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 with: ref: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.version || github.event.workflow_run.head_branch }} - name: Resolve version id: version run: | if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then RAW="${{ github.event.inputs.version }}" else RAW="${{ github.event.workflow_run.head_branch }}" fi VERSION="${RAW#v}" echo "VERSION=$VERSION" >> "$GITHUB_ENV" echo "Resolved: $VERSION" - name: Download prebuilt binary env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | gh release download "${{ env.VERSION }}" \ --pattern "deadfinder-linux-${{ matrix.asset_arch }}.tar.gz" \ --output deadfinder.tar.gz tar xzf deadfinder.tar.gz chmod +x deadfinder - name: Build Debian package layout run: | PKGDIR="deadfinder_${{ env.VERSION }}_${{ matrix.arch }}" mkdir -p "$PKGDIR/DEBIAN" "$PKGDIR/usr/bin" "$PKGDIR/usr/share/doc/deadfinder" cp deadfinder "$PKGDIR/usr/bin/" cp README.md "$PKGDIR/usr/share/doc/deadfinder/" cp LICENSE "$PKGDIR/usr/share/doc/deadfinder/" cat > "$PKGDIR/DEBIAN/control" < Description: Find dead (broken) links in web pages, URL lists, and sitemaps. EOF dpkg-deb --build "$PKGDIR" - name: Upload artifact uses: actions/upload-artifact@v7 with: name: deadfinder_${{ env.VERSION }}_${{ matrix.arch }}.deb path: deadfinder_${{ env.VERSION }}_${{ matrix.arch }}.deb - name: Upload .deb to Release if: github.event_name == 'workflow_run' || (github.event_name == 'workflow_dispatch' && inputs.upload_to_release) env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | gh release upload "${{ env.VERSION }}" \ "deadfinder_${{ env.VERSION }}_${{ matrix.arch }}.deb" --clobber ================================================ FILE: .github/workflows/release-major-tag.yml ================================================ --- name: Update Major Version Tag on: release: types: [published] permissions: contents: write # Force-update the floating `v` tag (e.g. `v2`) to point at the # latest published .. release. Lets callers pin # `uses: hahwul/deadfinder@v2` and receive bug-fix patches automatically. # The `v` prefix is required — GitHub Actions rejects bare `2` as a # "shortened commit SHA". Skips pre-releases so RC tags don't displace # the stable pointer. jobs: bump-major-tag: if: github.event.release.prerelease == false runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 with: fetch-depth: 0 - name: Move v tag to release commit env: TAG: ${{ github.event.release.tag_name }} run: | set -e stripped="${TAG#v}" major="${stripped%%.*}" if ! [[ "$major" =~ ^[0-9]+$ ]]; then echo "Skipping: derived major '$major' from tag '$TAG' is not numeric." exit 0 fi movable="v${major}" git config user.name "github-actions[bot]" git config user.email "41898282+github-actions[bot]@users.noreply.github.com" git tag -f "$movable" "$TAG" git push origin "refs/tags/$movable" --force echo "Moved tag '$movable' → '$TAG'." ================================================ FILE: .github/workflows/release-rpm.yml ================================================ --- name: Build and Release .rpm Package on: workflow_dispatch: inputs: version: description: "Version to build (e.g., 2.0.0)" required: true type: string upload_to_release: description: "Upload .rpm to GitHub Release (requires existing tag)" required: false type: boolean default: false workflow_run: workflows: ["Crystal Release Builds"] types: [completed] permissions: contents: write jobs: build-rpm: if: >- github.event_name == 'workflow_dispatch' || (github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.event == 'release') strategy: fail-fast: false matrix: include: - arch: x86_64 asset_arch: x86_64 - arch: aarch64 asset_arch: aarch64 runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 with: ref: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.version || github.event.workflow_run.head_branch }} - name: Resolve version run: | if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then RAW="${{ github.event.inputs.version }}" else RAW="${{ github.event.workflow_run.head_branch }}" fi VERSION="${RAW#v}" echo "VERSION=$VERSION" >> "$GITHUB_ENV" - name: Download prebuilt binary env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | gh release download "${{ env.VERSION }}" \ --pattern "deadfinder-linux-${{ matrix.asset_arch }}.tar.gz" \ --output deadfinder.tar.gz tar xzf deadfinder.tar.gz chmod +x deadfinder - name: Set up Go uses: actions/setup-go@v6 with: go-version: "stable" - name: Install nfpm run: go install github.com/goreleaser/nfpm/v2/cmd/nfpm@latest - name: Build .rpm run: | cat > nfpm.yaml < description: "Find dead (broken) links in web pages, URL lists, and sitemaps." license: MIT contents: - src: deadfinder dst: /usr/bin/deadfinder file_info: mode: 0755 - src: LICENSE dst: /usr/share/licenses/deadfinder/LICENSE file_info: mode: 0644 EOF nfpm package --packager rpm --target deadfinder-${{ env.VERSION }}.${{ matrix.arch }}.rpm - name: Upload artifact uses: actions/upload-artifact@v7 with: name: deadfinder-${{ env.VERSION }}.${{ matrix.arch }}.rpm path: deadfinder-${{ env.VERSION }}.${{ matrix.arch }}.rpm - name: Upload .rpm to Release if: github.event_name == 'workflow_run' || (github.event_name == 'workflow_dispatch' && inputs.upload_to_release) env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | gh release upload "${{ env.VERSION }}" \ "deadfinder-${{ env.VERSION }}.${{ matrix.arch }}.rpm" --clobber ================================================ FILE: .github/workflows/release-sbom.yml ================================================ --- name: Generate and Upload SBOM on: release: types: [published] workflow_dispatch: permissions: contents: write jobs: generate-sbom: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Generate SBOM (CycloneDX, Crystal) uses: hahwul/cyclonedx-cr@v1.3.0 with: shard_file: ./shard.yml lock_file: ./shard.lock output_file: ./sbom.xml output_format: xml spec_version: 1.6 - name: Upload SBOM to Release if: github.event_name == 'release' uses: softprops/action-gh-release@v3 with: files: ./sbom.xml token: ${{ secrets.GITHUB_TOKEN }} - name: Upload SBOM as workflow artifact if: github.event_name == 'workflow_dispatch' uses: actions/upload-artifact@v7 with: name: sbom path: ./sbom.xml ================================================ FILE: .gitignore ================================================ /lib/ /.shards/ *.dwarf # Built binary /deadfinder # Release artifacts /deadfinder-*.tar.gz /deadfinder-*.tar.gz.sha256 # Nix /result /result-* .direnv/ # macOS .DS_Store # Hwaro docs site docs/public/* ================================================ FILE: AGENTS.md ================================================ # DeadFinder — Agent Guide DeadFinder is a CLI tool that finds broken links in web pages, sitemaps, and URL lists. It is written in **Crystal** (v2.x+). The legacy Ruby v1.x implementation lives on the `legacy/v1` branch. Reference this file first; fall back to the source only when something here is stale. ## Prerequisites - Crystal >= 1.19.1 - cmake, make, g++ (for building the `lexbor` HTML parser) ## Bootstrap ```bash shards install ``` ## Build ```bash # Debug (fast compile, slower binary) crystal build src/cli_main.cr -o deadfinder # Release (slow compile, fast binary) crystal build src/cli_main.cr -o deadfinder --release --no-debug ``` ## Test ```bash # Unit specs crystal spec # Cross-implementation compat harness (golden files from v1 Ruby output) BIN="./deadfinder" ruby spec/compat/run.rb ``` The compat harness requires `toml-rb` (`gem install toml-rb`) and spins up a local fixture HTTP server on a random port. ## Run ```bash ./deadfinder url https://example.com ./deadfinder file urls.txt cat urls.txt | ./deadfinder pipe ./deadfinder sitemap https://example.com/sitemap.xml ``` Full flag list lives in `src/deadfinder/cli.cr` (the `OptionParser` block). ## Layout ``` src/ ├── cli_main.cr # binary entry ├── deadfinder.cr # module root (run_* dispatchers, output serialization) └── deadfinder/ ├── cli.cr # OptionParser + subcommand routing ├── types.cr # Options + coverage structs ├── runner.cr # fiber workers, link extraction, HTTP calls ├── http_client.cr # HTTP::Client wrapper (proxy, CONNECT tunneling) ├── utils.cr # URL resolution helpers ├── url_pattern_matcher.cr # match/ignore regex with 1s timeout ├── logger.cr # silent/verbose/debug gating ├── completion.cr # bash/zsh/fish completion generators ├── visualizer.cr # PNG coverage chart (stumpy_png) └── version.cr spec/ ├── deadfinder_spec.cr ├── spec_helper.cr ├── deadfinder/ # unit specs per module └── compat/ # black-box harness (v1 golden files) ``` ## Conventions - Output surface is stable: CLI flags, subcommands, and JSON/YAML/TOML/CSV shapes match v1 Ruby. The golden files in `spec/compat/golden/` lock this contract. - Resolved URLs must preserve the base URL's port (see `utils.cr::origin`). This was a v1 pain point; don't regress. - Silent default is `false` — the CLI emits logs by default. `-s` / `--silent` opts in. ## CI - `.github/workflows/compat.yml` — Crystal build + compat harness on every PR - `.github/workflows/crystal-release.yml` — release-triggered builds for linux x86_64/aarch64 and macOS arm64; uploads tar.gz + sha256 as release assets - `.github/workflows/docker-build.yml` / `docker-ghcr.yml` — multi-arch image builds (Crystal static binary in Alpine) ## Distribution channels | Channel | How it picks up a new release | |---|---| | GitHub Release binaries | `crystal-release.yml` auto-uploads on `release: published` | | Docker (`ghcr.io/hahwul/deadfinder`) | `docker-ghcr.yml` on push to main / release | | Homebrew (homebrew-core) | Manual PR via `brew bump-formula-pr` after tagging | | GitHub Action (`hahwul/deadfinder@`) | `action.yml` in repo root; downloads the release binary | ## Legacy (Ruby v1) branch Gem releases still happen on `legacy/v1`. Bug-fix and security updates only — no new features. Do not port v1 changes to main unless they are true behavioral fixes that should also apply to Crystal. ================================================ FILE: CHANGELOG.md ================================================ # Changelog All notable changes are documented here. Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), versioning follows [SemVer](https://semver.org/). ## [Unreleased] ## [2.0.2] ### Fixed - `action.yml`: save the downloaded release tarball under its real filename (`deadfinder-linux-x86_64.tar.gz` etc.) instead of a generic `deadfinder.tar.gz`, so `sha256sum -c` can resolve the path referenced inside the sidecar. Composite-action callers hit `sha256sum: deadfinder-linux-x86_64.tar.gz: No such file or directory` right after a successful download — the earlier 2.0.0 YAML parser error was masking this. Surfaced by owasp-noir/noir run #24651380673. ## [2.0.1] ### Fixed - `action.yml`: quote the `version` input description so its embedded `(default: latest)` doesn't trip strict YAML parsers used by the GitHub Actions runner. Caller workflows on `uses: hahwul/deadfinder@2.0.0` saw `Mapping values are not allowed in this context.` and failed at job startup. - `scripts/version_update.cr`: constrain `^version:\s*.+$/m` patterns with `[^\n]+` — Crystal's `m` flag enables both line-anchor and DOTALL semantics, so `.+$` greedily ate the rest of the file and truncated `shard.yml`/`snap/snapcraft.yaml`/`aur/PKGBUILD` on the first 2.0.1 bump attempt. ## [2.0.0] — Crystal rewrite ### Added - Crystal implementation (fiber-based concurrency via `spawn` + `Channel`) replaces the Ruby gem as the supported runtime. - Multi-platform release binaries auto-attached on every GitHub Release: linux x86_64/aarch64 (static/musl), macOS arm64. Each tarball ships alongside a `.sha256` sidecar. (Intel macOS isn't shipped as a prebuilt — see [installation docs](https://hahwul.github.io/deadfinder/docs/getting-started/installation/) for source/Rosetta options.) - Cross-implementation compatibility harness (`spec/compat/`) — black-box golden files captured from v1 Ruby output, locking the CLI/output contract for Crystal. - GitHub Action migrated to a composite action that downloads the release binary and verifies its sha256 before running. The `version` input (defaulting to `latest`) lets callers pin a specific release. `worker_headers` is now a first-class input. - Docker image rebuilt on Crystal static binary (`alpine:3.21` runtime, `<15 MB`). OCI labels, semver tags (`2.0.0` / `2.0` / `latest`), and keyless cosign signatures on every published tag. ### Changed - Repository layout: Crystal at the root. `src/`, `spec/`, `shard.yml`, `shard.lock` live at the top level; the old `crystal/` subdirectory is gone. - CLI flag behavior aligns with Ruby v1 exactly — the compat harness enforces this. No user-visible flag renames. - `--silent` default remains `false`; `-s` opts in. (An earlier Crystal port defaulted silent to `true`; that regression was fixed before the 2.0.0 cut.) - `--user_agent`, `--proxy_auth`, `--worker_headers` use underscores (as implemented). Prior dashed forms never worked reliably in the old Docker-based action; the new composite action passes the correct names. ### Fixed - Resolved URLs preserve the base URL's non-default port for both `href="/path"` and `href="relative/path"` shapes (was dropping the port in the Crystal port). - Docker-based GitHub Action chain: previously relied on a Ruby-gem image and a brittle entrypoint.sh; replaced with a composite action that downloads the release binary directly. ### Removed - Ruby gem publishing from `main`. The gem continues on the [`legacy/v1`](https://github.com/hahwul/deadfinder/tree/legacy/v1) branch for bug-fix and security releases only. - `lib/`, `bin/`, `Gemfile`, `Gemfile.lock`, `Rakefile`, `deadfinder.gemspec`, `gemset.nix`, `.rubocop.yml`, `ruby-version`, Ruby-based `flake.nix`, and the legacy Ruby spec suite. - `github-action/Dockerfile` + `entrypoint.sh` (replaced by composite action in `action.yml`). ### Migration from v1 | You had | Switch to | |---|---| | `gem install deadfinder` | `brew install deadfinder` or prebuilt binary from the release | | `bundle exec deadfinder …` | Same binary on `PATH`, no bundler | | Docker image (same name) | No change — the image now ships the Crystal binary | | `uses: hahwul/deadfinder@…` | No change — the action now uses the Crystal binary under the hood | | `require 'deadfinder'` | Library usage is gone from main. If you depend on it, pin to a v1 gem release or use the CLI. | If you need a bugfix in v1, open an issue/PR against the [`legacy/v1`](https://github.com/hahwul/deadfinder/tree/legacy/v1) branch. --- History prior to 2.0.0 was not maintained in this file. See [GitHub Releases](https://github.com/hahwul/deadfinder/releases?q=prerelease%3Afalse) and the [`legacy/v1`](https://github.com/hahwul/deadfinder/tree/legacy/v1) branch for v1 release history. [Unreleased]: https://github.com/hahwul/deadfinder/compare/2.0.2...HEAD [2.0.2]: https://github.com/hahwul/deadfinder/releases/tag/2.0.2 [2.0.1]: https://github.com/hahwul/deadfinder/releases/tag/2.0.1 [2.0.0]: https://github.com/hahwul/deadfinder/releases/tag/2.0.0 ================================================ FILE: Dockerfile ================================================ FROM crystallang/crystal:1.20.2-alpine AS builder RUN apk add --no-cache cmake make g++ git WORKDIR /build COPY shard.yml shard.lock ./ COPY src/ ./src/ RUN shards install RUN crystal build src/cli_main.cr -o /build/deadfinder --release --static --no-debug FROM alpine:3.23 LABEL org.opencontainers.image.title="DeadFinder" LABEL org.opencontainers.image.description="Find dead links (broken links)." LABEL org.opencontainers.image.authors="HAHWUL " LABEL org.opencontainers.image.source="https://github.com/hahwul/deadfinder" LABEL org.opencontainers.image.documentation="https://github.com/hahwul/deadfinder" LABEL org.opencontainers.image.licenses="MIT" LABEL "com.github.actions.name"="DeadFinder" LABEL "com.github.actions.description"="Find dead (broken) links in files, URLs, or sitemaps" LABEL "com.github.actions.icon"="link" LABEL "com.github.actions.color"="red" ENV LC_ALL=C.UTF-8 RUN apk add --no-cache ca-certificates COPY --from=builder /build/deadfinder /usr/local/bin/deadfinder CMD ["deadfinder"] ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2026 hahwul Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================
DeadFinder Logo

Find dead-links (broken links)

DocumentationInstallationGithub ActionContributingChangelog

Dead link (broken link) means a link within a web page that cannot be connected. These links can have a negative impact to SEO and Security. This tool makes it easy to identify and modify. ![](https://github.com/user-attachments/assets/92129de9-90c6-41e0-a424-883fe30858f6) > **Looking for v1 (Ruby gem)?** It now lives on the [`legacy/v1`](https://github.com/hahwul/deadfinder/tree/legacy/v1) branch and continues to publish the `deadfinder` gem for bug-fix and security releases. `main` hosts the Crystal rewrite (v2+). ## Installation ### Homebrew ```bash brew install deadfinder # https://formulae.brew.sh/formula/deadfinder ``` ### Docker ```bash docker run ghcr.io/hahwul/deadfinder:latest deadfinder url https://example.com ``` ### Prebuilt binary Download the archive for your platform from the [latest release](https://github.com/hahwul/deadfinder/releases/latest), extract, and place `deadfinder` on your `PATH`. ### Nix ```bash nix run github:hahwul/deadfinder nix profile install github:hahwul/deadfinder nix develop github:hahwul/deadfinder ``` ### Build from source Requires Crystal >= 1.19.1 and `cmake` (for the `lexbor` HTML parser's postinstall — without it `shards install` fails with `'cmake': No such file or directory`). ```bash # macOS brew install crystal cmake # Debian / Ubuntu sudo apt install crystal cmake ``` ```bash shards install crystal build src/cli_main.cr -o deadfinder --release # or: just build ``` ## Using In ### CLI ```bash deadfinder sitemap https://www.hahwul.com/sitemap.xml ``` ### GitHub Action Pin a specific release tag. `@latest` is **not** a valid Actions ref. ```yml steps: - name: Run DeadFinder uses: hahwul/deadfinder@v2 # tracks the latest 2.x — pin a specific tag (e.g. @2.0.2) for stricter reproducibility id: broken-link with: command: sitemap # url / file / sitemap / pipe target: https://www.hahwul.com/sitemap.xml # timeout: 10 # concurrency: 50 # silent: false # headers: "X-API-Key: 123444" # worker_headers: "User-Agent: Deadfinder Bot" # include30x: false # user_agent: "Apple" # proxy: "http://localhost:8070" # proxy_auth: "id:pw" # match: "" # ignore: "" # coverage: true # visualize: report.png - name: Output Handling run: echo '${{ steps.broken-link.outputs.output }}' ``` If you have found a Dead Link and want to automatically add it as an issue, please refer to the "[Automating Dead Link Detection](https://www.hahwul.com/2024/10/20/automating-dead-link-detection/)" article. ## Usage ``` Usage: deadfinder [options] Commands: pipe Scan the URLs from STDIN file Scan the URLs from File url Scan the Single URL sitemap Scan the URLs from sitemap completion Generate completion script (bash/zsh/fish) version Show version Options: -r, --include30x Include 30x redirections as dead links -c, --concurrency=N Number of concurrent workers (default: 50) -t, --timeout=N Timeout in seconds (default: 10) -o, --output=FILE File to write results -f, --output_format=FORMAT Output format: json, yaml, toml, csv, sarif (default: json) -H, --headers=HEADER Custom HTTP headers for initial request --worker_headers=HEADER Custom HTTP headers for worker requests --user_agent=UA User-Agent string -p, --proxy=PROXY Proxy server (HTTP and HTTPS CONNECT) --proxy_auth=USER:PASS Proxy authentication -m, --match=PATTERN Match URL pattern (regex) -i, --ignore=PATTERN Ignore URL pattern (regex) -s, --silent Silent mode -v, --verbose Verbose mode --debug Debug mode --limit=N Limit number of URLs to scan --coverage Enable coverage tracking and reporting --visualize=PATH Generate visualization PNG ``` ## Modes ```bash # Scan the URLs from STDIN (multiple URLs) cat urls.txt | deadfinder pipe # Scan the URLs from a file deadfinder file urls.txt # Scan a single URL deadfinder url https://www.hahwul.com # Scan the URLs from a sitemap deadfinder sitemap https://www.hahwul.com/sitemap.xml ``` ## JSON Handling ```bash deadfinder sitemap https://www.hahwul.com/sitemap.xml -o output.json cat output.json | jq ``` ```json { "Target URL": [ "DeadLink URL", "DeadLink URL", "DeadLink URL" ] } ``` With `--coverage`: ```bash deadfinder sitemap https://www.hahwul.com/sitemap.xml --coverage -o output.json ``` ```json { "dead_links": { "Target URL": ["DeadLink URL 1", "DeadLink URL 2"] }, "coverage": { "targets": { "Target URL": { "total_tested": 14, "dead_links": 7, "coverage_percentage": 50.0 } }, "summary": { "total_tested": 14, "total_dead": 7, "overall_coverage_percentage": 50.0 } } } ``` ## Shell Completion ```bash deadfinder completion bash > /etc/bash_completion.d/deadfinder deadfinder completion zsh > ~/.zsh/completion/_deadfinder deadfinder completion fish > ~/.config/fish/completions/deadfinder.fish ``` ## Contributing Contributions are welcome! If you have an idea for an improvement or want to report a bug: - **Fork the repository.** - **Create a new branch** for your feature or bug fix (e.g., `feature/awesome-feature` or `bugfix/annoying-bug`). - **Make your changes.** - **Commit your changes** with a clear message. - **Push** to the branch. - **Submit a Pull Request (PR)** to our `main` branch. ### Contributors ![](docs/static/images/CONTRIBUTORS.svg) ================================================ FILE: SECURITY.md ================================================ # Security Policy ## Reporting a Vulnerability Found a security issue? Let us know so we can fix it. ### How to Report * **For general security concerns**, please open a [GitHub issue](https://github.com/hahwul/deadfinder/issues). Use the `security` label and describe the issue in as much detail as you can. This helps us to understand and address the problem more effectively. * **For sensitive matters**, we encourage you to directly report it on our [GitHub security page](https://github.com/hahwul/deadfinder/security). Handling these issues discreetly is vital for everyone's safety. ## Conclusion Your vigilance and willingness to report security issues are what help keep our project robust and secure. We appreciate the time and effort you put into making our community a safer place. Remember, no concern is too small; we're here to listen and act. Together, we can ensure a secure environment for all our users and contributors. Thank you for being an essential part of our project's security. Thank you for your support in maintaining the security and integrity of our project! ================================================ FILE: action.yml ================================================ --- name: DeadFinder Action description: A GitHub Action to find and report dead (broken) links in files, URLs, or sitemaps. branding: icon: link color: red inputs: command: description: The type of command to execute (e.g.,file, url, sitemap) required: true target: description: The target resource for the command (e.g., file path, URL, or sitemap URL) required: true timeout: description: The maximum time to wait for each request, in seconds required: false default: "" concurrency: description: The number of concurrent requests to make required: false default: "" silent: description: Enable silent mode to suppress output required: false default: "false" headers: description: Custom HTTP headers to include in requests, separated by commas required: false default: "" worker_headers: description: Custom HTTP headers for worker requests, separated by commas required: false default: "" verbose: description: Enable verbose mode for detailed logging required: false default: "false" include30x: description: Include HTTP 30x status codes in the results required: false default: "false" user_agent: description: User-Agent string to use for requests required: false default: "" proxy: description: Proxy server to use for requests required: false default: "" proxy_auth: description: Proxy server authentication credentials required: false default: "" match: description: Match the URL with the given pattern required: false default: "" ignore: description: Ignore the URL with the given pattern required: false default: "" coverage: description: Enable coverage reporting to show dead link ratios required: false default: "false" visualize: description: Generate a visualization of the scan results (e.g., report.png) required: false default: "" version: description: "DeadFinder release tag to download (default: latest)" required: false default: "latest" outputs: output: description: JSON formatted result of the dead-link check value: ${{ steps.scan.outputs.output }} runs: using: composite steps: - name: Detect platform id: platform shell: bash run: | case "${RUNNER_OS}-${RUNNER_ARCH}" in Linux-X64) asset="deadfinder-linux-x86_64.tar.gz" ;; Linux-ARM64) asset="deadfinder-linux-aarch64.tar.gz" ;; macOS-ARM64) asset="deadfinder-macos-arm64.tar.gz" ;; macOS-X64) echo "::error title=macOS Intel not supported::DeadFinder no longer ships a macOS x86_64 prebuilt binary. Use an Apple Silicon (macos-latest) runner, or install from source via 'brew install deadfinder'." exit 1 ;; *) echo "::error::Unsupported platform: ${RUNNER_OS}-${RUNNER_ARCH}"; exit 1 ;; esac echo "asset=${asset}" >> "$GITHUB_OUTPUT" - name: Download deadfinder binary shell: bash run: | set -e version="${{ inputs.version }}" asset="${{ steps.platform.outputs.asset }}" if [ "${version}" = "latest" ]; then base_url="https://github.com/hahwul/deadfinder/releases/latest/download" else base_url="https://github.com/hahwul/deadfinder/releases/download/${version}" fi echo "Downloading ${base_url}/${asset}" # The sha256 sidecar was generated with the tarball's real filename # (deadfinder-linux-x86_64.tar.gz etc.), so we must save the download # under the same name for `sha256sum -c` to resolve it. if ! curl -fsSL "${base_url}/${asset}" -o "/tmp/${asset}"; then echo "::error title=DeadFinder binary not found::Failed to download ${base_url}/${asset}" echo "::error::Common causes:" echo "::error:: 1. Using 'uses: hahwul/deadfinder@main' or '@latest' — neither resolves to a release." echo "::error:: → Pin a released ref instead: uses: hahwul/deadfinder@v2 (latest 2.x) or @2.0.2 (exact)." echo "::error:: 2. Requested version (input: version=${version}) is not a published release tag." echo "::error:: → See https://github.com/hahwul/deadfinder/releases for available tags." echo "::error:: 3. Using a v1.x workflow with a v2 ref — v1 users should pin hahwul/deadfinder@1.10.0." exit 1 fi if ! curl -fsSL "${base_url}/${asset}.sha256" -o "/tmp/${asset}.sha256"; then echo "::error::Downloaded ${asset} but its .sha256 sidecar is missing at ${base_url}/${asset}.sha256" exit 1 fi cd /tmp # macOS runners ship `shasum`, Linux ships `sha256sum`. if command -v sha256sum >/dev/null 2>&1; then sha256sum -c "${asset}.sha256" else shasum -a 256 -c "${asset}.sha256" fi tar xzf "${asset}" chmod +x deadfinder ./deadfinder version - name: Run deadfinder id: scan shell: bash env: DF_COMMAND: ${{ inputs.command }} DF_TARGET: ${{ inputs.target }} DF_TIMEOUT: ${{ inputs.timeout }} DF_CONCURRENCY: ${{ inputs.concurrency }} DF_SILENT: ${{ inputs.silent }} DF_HEADERS: ${{ inputs.headers }} DF_WORKER_HEADERS: ${{ inputs.worker_headers }} DF_VERBOSE: ${{ inputs.verbose }} DF_INCLUDE30X: ${{ inputs.include30x }} DF_USER_AGENT: ${{ inputs.user_agent }} DF_PROXY: ${{ inputs.proxy }} DF_PROXY_AUTH: ${{ inputs.proxy_auth }} DF_MATCH: ${{ inputs.match }} DF_IGNORE: ${{ inputs.ignore }} DF_COVERAGE: ${{ inputs.coverage }} DF_VISUALIZE: ${{ inputs.visualize }} run: | set -e args=( "${DF_COMMAND}" "${DF_TARGET}" -o /tmp/output.json -f json ) [ -n "${DF_TIMEOUT}" ] && args+=( --timeout="${DF_TIMEOUT}" ) [ -n "${DF_CONCURRENCY}" ] && args+=( --concurrency="${DF_CONCURRENCY}" ) [ "${DF_SILENT}" = "true" ] && args+=( --silent ) [ "${DF_VERBOSE}" = "true" ] && args+=( --verbose ) [ "${DF_INCLUDE30X}" = "true" ] && args+=( --include30x ) [ -n "${DF_USER_AGENT}" ] && args+=( --user_agent="${DF_USER_AGENT}" ) [ -n "${DF_PROXY}" ] && args+=( --proxy="${DF_PROXY}" ) [ -n "${DF_PROXY_AUTH}" ] && args+=( --proxy_auth="${DF_PROXY_AUTH}" ) [ -n "${DF_MATCH}" ] && args+=( --match="${DF_MATCH}" ) [ -n "${DF_IGNORE}" ] && args+=( --ignore="${DF_IGNORE}" ) [ "${DF_COVERAGE}" = "true" ] && args+=( --coverage ) [ -n "${DF_VISUALIZE}" ] && args+=( --visualize="${DF_VISUALIZE}" ) if [ -n "${DF_HEADERS}" ]; then IFS=',' read -ra hdrs <<< "${DF_HEADERS}" for h in "${hdrs[@]}"; do [ -n "${h}" ] && args+=( -H "${h}" ) done fi if [ -n "${DF_WORKER_HEADERS}" ]; then IFS=',' read -ra whdrs <<< "${DF_WORKER_HEADERS}" for h in "${whdrs[@]}"; do [ -n "${h}" ] && args+=( --worker_headers="${h}" ) done fi /tmp/deadfinder "${args[@]}" if [ ! -f /tmp/output.json ]; then echo "::error::/tmp/output.json was not produced" exit 1 fi if command -v jq >/dev/null 2>&1; then encoded=$(jq -c . /tmp/output.json) else encoded=$(tr -d '\n' < /tmp/output.json) fi echo "output=${encoded}" >> "$GITHUB_OUTPUT" ================================================ FILE: aur/PKGBUILD ================================================ # Maintainer: HAHWUL pkgname=deadfinder pkgver=2.0.2 pkgrel=1 pkgdesc="Find dead (broken) links in web pages, URL lists, and sitemaps" arch=('x86_64' 'aarch64') url="https://github.com/hahwul/deadfinder" license=('MIT') source_x86_64=("${pkgname}-${pkgver}-x86_64.tar.gz::${url}/releases/download/${pkgver}/deadfinder-linux-x86_64.tar.gz") source_aarch64=("${pkgname}-${pkgver}-aarch64.tar.gz::${url}/releases/download/${pkgver}/deadfinder-linux-aarch64.tar.gz") source=("LICENSE-${pkgver}::https://raw.githubusercontent.com/hahwul/deadfinder/${pkgver}/LICENSE") sha256sums=('SKIP') sha256sums_x86_64=('SKIP') sha256sums_aarch64=('SKIP') package() { install -Dm755 "${srcdir}/deadfinder" "${pkgdir}/usr/bin/${pkgname}" install -Dm644 "${srcdir}/LICENSE-${pkgver}" "${pkgdir}/usr/share/licenses/${pkgname}/LICENSE" } ================================================ FILE: docs/AGENTS.md ================================================ # AGENTS.md - AI Agent Instructions for Hwaro Site This document provides instructions for AI agents working on this Hwaro-generated website. ## Project Overview This is a static website built with [Hwaro](https://github.com/hahwul/hwaro), a fast and lightweight static site generator written in Crystal. ## Essential Commands | Command | Description | |---------|-------------| | `hwaro build` | Build the site to `public/` directory | | `hwaro serve` | Start development server with live reload | | `hwaro new ` | Create new content from archetype | | `hwaro deploy` | Deploy the site (requires configuration) | | `hwaro build --drafts` | Include draft content | | `hwaro serve -p 8080` | Serve on custom port (default: 3000) | | `hwaro build --base-url "https://example.com"` | Set base URL for production | ## Directory Structure ``` . ├── config.toml # Site configuration ├── content/ # Markdown content files │ ├── _index.md # Homepage content │ └── blog/ # Blog section │ ├── _index.md # Section listing page │ └── *.md # Individual pages ├── templates/ # Jinja2 templates (Crinja) │ ├── base.html # Base layout (optional) │ ├── page.html # Page template │ ├── section.html # Section listing template │ └── shortcodes/ # Shortcode templates ├── static/ # Static assets (copied as-is) └── archetypes/ # Content templates for `hwaro new` ``` ## Notes for AI Agents 1. **Front matter is TOML** (`+++`), not YAML (`---`). 2. **Rendered content** is `{{ content | safe }}`, not `{{ page.content }}`. 3. **Custom metadata** is `page.extra.field`, not `page.params.field`. 4. **Always preview** with `hwaro serve` before committing. 5. **Validate TOML syntax** in config.toml and front matter after edits. 6. **Use `{{ base_url }}` prefix** for URLs in templates. 7. **Escape user content** with `{{ value | escape }}` in templates. ## Full Reference For detailed documentation on content, templates, configuration, and more: - [Hwaro Documentation](https://hwaro.hahwul.com) - [Configuration Guide](https://hwaro.hahwul.com/start/config/) - [Full LLM Reference](https://hwaro.hahwul.com/llms-full.txt) — comprehensive reference optimized for AI agents To generate the full embedded AGENTS.md locally, run: ``` hwaro tool agents-md --local --write ``` ## Site-Specific Instructions ================================================ FILE: docs/config.toml ================================================ # ============================================================================= # Site Configuration # ============================================================================= title = "DeadFinder" description = "Find dead (broken) links in web pages, URL lists, and sitemaps." base_url = "https://deadfinder.hahwul.com" # ============================================================================= # Plugins # ============================================================================= [plugins] processors = ["markdown"] # ============================================================================= # Content Files # ============================================================================= [content.files] allow_extensions = ["jpg", "jpeg", "png", "gif", "svg", "webp"] # ============================================================================= # Syntax Highlighting # ============================================================================= [highlight] enabled = true theme = "monokai" use_cdn = true # ============================================================================= # Taxonomies # ============================================================================= [[taxonomies]] name = "tags" feed = true sitemap = false # ============================================================================= # Sitemap # ============================================================================= [sitemap] enabled = true filename = "sitemap.xml" changefreq = "weekly" priority = 0.5 # ============================================================================= # Markdown Configuration # ============================================================================= [markdown] safe = false lazy_loading = false emoji = false # ============================================================================= # Search (client-side, Fuse.js) # ============================================================================= [search] enabled = true format = "fuse_json" fields = ["title", "content", "description"] filename = "search.json" # ============================================================================= # OpenGraph & Twitter Cards # ============================================================================= # Default meta tags for social sharing. Page-level front matter overrides. [og] type = "website" twitter_card = "summary_large_image" # twitter_site = "@hahwul" # twitter_creator = "@hahwul" # ============================================================================= # Auto OG Images # ============================================================================= # Auto-generate 1200x630 OG preview images for pages without a custom `image`. # https://hwaro.hahwul.com/features/og-images/ [og.auto_image] enabled = true format = "png" background = "#0a0f0a" text_color = "#e8ede8" accent_color = "#22c55e" font_size = 52 style = "dots" pattern_opacity = 0.12 pattern_scale = 1.0 logo = "static/images/deadfinder.webp" logo_position = "bottom-left" output_dir = "og-images" show_title = true # ============================================================================= # Pagination (Optional) # ============================================================================= # [pagination] # enabled = false # per_page = 10 # ============================================================================= # Series (Optional) # ============================================================================= # Group posts into ordered series # [series] # enabled = true # ============================================================================= # Related Posts (Optional) # ============================================================================= # Recommend related content based on shared taxonomy terms # [related] # enabled = true # limit = 5 # taxonomies = ["tags"] # ============================================================================= # Robots.txt # ============================================================================= # Controls search engine crawler access [robots] enabled = true filename = "robots.txt" rules = [ { user_agent = "*", allow = ["/"] } ] # ============================================================================= # LLMs.txt # ============================================================================= # Instructions for AI/LLM crawlers [llms] enabled = true filename = "llms.txt" instructions = "This is documentation for DeadFinder, an open-source CLI that finds broken links in web pages, URL lists, and sitemaps. Content is MIT-licensed." full_enabled = true full_filename = "llms-full.txt" # ============================================================================= # RSS/Atom Feeds # ============================================================================= # Generates RSS or Atom feed for content syndication # [feeds] # enabled = true # type = "rss" # limit = 10 # full_content = true # sections = [] # ============================================================================= # Build Hooks (Optional) # ============================================================================= # Run custom shell commands before/after build process # [build] # hooks.pre = ["npm install"] # hooks.post = ["npm run minify"] # ============================================================================= # Permalinks (Optional) # ============================================================================= # Override the output path for specific sections or taxonomies # [permalinks] # posts = "/posts/:year/:month/:slug/" # tags = "/topic/:slug/" # ============================================================================= # Auto Includes (Optional) # ============================================================================= # Automatically load CSS/JS files from static directories # [auto_includes] # enabled = true # dirs = ["assets/css", "assets/js"] # ============================================================================= # Asset Pipeline (Optional) # ============================================================================= # [assets] # enabled = true # minify = true # fingerprint = true # ============================================================================= # Deployment (Optional) # ============================================================================= # [deployment] # target = "prod" # source_dir = "public" # # [[deployment.targets]] # name = "prod" # url = "file://./out" # ============================================================================= # Image Processing (Optional) # ============================================================================= # Automatic image resizing and LQIP (Low-Quality Image Placeholder) generation # Uses vendored stb libraries — no external tools required. # Use resize_image() in templates to generate responsive variants. # [image_processing] # enabled = true # widths = [320, 640, 1024, 1280] # quality = 85 # # [image_processing.lqip] # enabled = true # width = 32 # Placeholder width in pixels (8-128) # quality = 20 # JPEG quality for placeholder (1-100, lower = smaller) # ============================================================================= # PWA (Progressive Web App) (Optional) # ============================================================================= # Generate manifest.json and service worker for offline access # [pwa] # enabled = true # name = "My Site" # short_name = "Site" # theme_color = "#ffffff" # background_color = "#ffffff" # display = "standalone" # icons = ["static/icon-192.png", "static/icon-512.png"] # ============================================================================= # AMP (Accelerated Mobile Pages) (Optional) # ============================================================================= # Generate AMP-compliant versions of content pages # [amp] # enabled = true # path_prefix = "amp" # sections = ["posts"] ================================================ FILE: docs/content/about.md ================================================ +++ title = "About" description = "About DeadFinder" +++ DeadFinder detects broken links — 4xx, 5xx, optionally 3xx — on any page, URL list, or sitemap. It's built for automation: one static binary, machine-readable output, and a GitHub Action wrapper so CI pipelines can gate on link health. ## Status - **Current line**: 2.x, Crystal rewrite. - **Legacy**: 1.x, original Ruby gem — frozen except for bug fixes on the [`legacy/v1`](https://github.com/hahwul/deadfinder/tree/legacy/v1) branch. ## Source - Repository: [github.com/hahwul/deadfinder](https://github.com/hahwul/deadfinder) - License: MIT - Maintainer: [HAHWUL](https://www.hahwul.com) ## Reporting issues Please use the [GitHub issue tracker](https://github.com/hahwul/deadfinder/issues). Security-sensitive reports go through the [GitHub security page](https://github.com/hahwul/deadfinder/security). ================================================ FILE: docs/content/docs/_index.md ================================================ +++ title = "Documentation" description = "DeadFinder documentation" sort_by = "weight" +++ Start with [Installation](/docs/getting-started/installation/) and the [Quick Start](/docs/getting-started/quickstart/). The **Usage** section covers every subcommand, output format, and filter. **Integration** shows how to call DeadFinder from GitHub Actions or Docker. **Reference** is the full CLI flag table. ================================================ FILE: docs/content/docs/getting-started/_index.md ================================================ +++ title = "Getting Started" description = "Install DeadFinder and run your first scan." weight = 1 sort_by = "weight" +++ Two steps: 1. [Install](/docs/getting-started/installation/) the binary. 2. [Run your first scan](/docs/getting-started/quickstart/). ================================================ FILE: docs/content/docs/getting-started/installation.md ================================================ +++ title = "Installation" description = "Install DeadFinder via Homebrew, Docker, prebuilt binary, Nix, or from source." weight = 1 +++ Pick the channel that fits your environment. All paths produce the same CLI. ## Homebrew (macOS / Linux) ```bash brew install deadfinder ``` ## Docker Image: [`ghcr.io/hahwul/deadfinder`](https://github.com/hahwul/deadfinder/pkgs/container/deadfinder). Multi-arch (linux/amd64, linux/arm64). Each published tag is cosign-signed. ```bash docker run ghcr.io/hahwul/deadfinder:latest deadfinder url https://example.com ``` ## Prebuilt binary Download the tarball for your platform from [Releases](https://github.com/hahwul/deadfinder/releases/latest) (a `.sha256` sidecar ships alongside each tarball): | OS | Arch | Asset | |---|---|---| | Linux | x86_64 | `deadfinder-linux-x86_64.tar.gz` | | Linux | aarch64 | `deadfinder-linux-aarch64.tar.gz` | | macOS | arm64 | `deadfinder-macos-arm64.tar.gz` | > Intel macOS (`x86_64`) doesn't have a prebuilt binary — use `brew install deadfinder` (builds from source) or run the Apple Silicon binary under Rosetta. Extract and put `deadfinder` on your `PATH`: ```bash curl -fsSL https://github.com/hahwul/deadfinder/releases/latest/download/deadfinder-linux-x86_64.tar.gz \ | tar xz sudo mv deadfinder /usr/local/bin/ ``` ## Linux package managers | Distro | Package | |---|---| | Debian / Ubuntu | `deadfinder_X.Y.Z_{amd64,arm64}.deb` from Releases | | RHEL / Fedora | `deadfinder-X.Y.Z.{x86_64,aarch64}.rpm` from Releases | | Alpine | `deadfinder-X.Y.Z-r0.{x86_64,aarch64}.apk` from Releases | | Arch Linux | `yay -S deadfinder` (AUR) | | Snap | `sudo snap install deadfinder` | ## Nix ```bash nix run github:hahwul/deadfinder nix profile install github:hahwul/deadfinder nix develop github:hahwul/deadfinder ``` ## Build from source Prerequisites: - Crystal >= 1.19.1 - `cmake` — required by the `lexbor` HTML parser's postinstall step. Without it, `shards install` fails with `Error executing process: 'cmake': No such file or directory`. ```bash # macOS brew install crystal cmake # Debian / Ubuntu sudo apt install crystal cmake # Arch Linux sudo pacman -S crystal cmake ``` Then build: ```bash git clone https://github.com/hahwul/deadfinder cd deadfinder shards install crystal build src/cli_main.cr -o deadfinder --release --no-debug ``` Or use the [`justfile`](https://github.com/hahwul/deadfinder/blob/main/justfile) recipes: ```bash just build # release binary just build-debug # fast debug build just test # run specs ``` ================================================ FILE: docs/content/docs/getting-started/quickstart.md ================================================ +++ title = "Quick Start" description = "Run your first DeadFinder scan and read its output." weight = 2 +++ ## Scan a single URL ```bash deadfinder url https://www.example.com ``` The terminal shows discovered links and their status: ``` ▶ Fetching https://www.example.com ● Discovered 12 URLs, currently checking them. [anchor:8 / link:4] ├── ✓ [200] https://www.example.com/about ├── ✘ [404] https://www.example.com/old-page └── ● Task completed ``` Exit code is `0` even when dead links exist — parse the output to make a build pass/fail decision. ## Structured output Write JSON to a file: ```bash deadfinder url https://www.example.com -o output.json cat output.json ``` ```json { "https://www.example.com": [ "https://www.example.com/old-page" ] } ``` YAML, TOML, CSV, and SARIF are available via `-f `. See [Output formats](/docs/usage/output-formats/). ## Scan a sitemap ```bash deadfinder sitemap https://www.example.com/sitemap.xml -o results.json ``` ## Scan many URLs From a file: ```bash cat > urls.txt <<'EOF' https://www.example.com https://docs.example.com EOF deadfinder file urls.txt -o results.json ``` From STDIN: ```bash printf 'https://www.example.com\nhttps://docs.example.com\n' \ | deadfinder pipe -o results.json ``` ## Coverage report `--coverage` adds a per-target summary with dead-link percentage: ```bash deadfinder sitemap https://www.example.com/sitemap.xml --coverage -o results.json ``` Optionally render a PNG chart: ```bash deadfinder sitemap https://www.example.com/sitemap.xml --coverage --visualize report.png ``` ## Next - [Subcommands](/docs/usage/subcommands/) - [Output formats](/docs/usage/output-formats/) - [CLI flags reference](/docs/reference/cli-flags/) ================================================ FILE: docs/content/docs/integration/_index.md ================================================ +++ title = "Integration" description = "Run DeadFinder from GitHub Actions or Docker." weight = 3 sort_by = "weight" +++ - [GitHub Action](/docs/integration/github-action/) — official composite action that downloads the release binary and verifies its sha256. - [Docker](/docs/integration/docker/) — multi-arch image with cosign-signed tags. ================================================ FILE: docs/content/docs/integration/docker.md ================================================ +++ title = "Docker" description = "ghcr.io/hahwul/deadfinder — multi-arch, cosign-signed, tiny Alpine base." weight = 2 +++ Image: [`ghcr.io/hahwul/deadfinder`](https://github.com/hahwul/deadfinder/pkgs/container/deadfinder) - Multi-arch: `linux/amd64`, `linux/arm64` - Runtime base: `alpine:3.21` + static binary (~15 MB total) - Tags on release: ``, `.`, `latest` - Every published tag is **cosign-signed** (keyless, Sigstore) ## Run The image's `CMD` is `["deadfinder"]`. Append arguments after the image name — `docker run` passes them through: ```bash docker run ghcr.io/hahwul/deadfinder:latest deadfinder url https://www.example.com docker run ghcr.io/hahwul/deadfinder:latest deadfinder sitemap https://www.example.com/sitemap.xml ``` Writing results out? Bind-mount a host directory: ```bash docker run --rm -v "$PWD":/out \ ghcr.io/hahwul/deadfinder:latest \ deadfinder url https://www.example.com -o /out/results.json -s ``` ## Pin a version ```bash docker pull ghcr.io/hahwul/deadfinder:2.0.0 docker pull ghcr.io/hahwul/deadfinder:2.0 docker pull ghcr.io/hahwul/deadfinder:latest ``` ## Verify the signature ```bash cosign verify ghcr.io/hahwul/deadfinder:2.0.0 \ --certificate-identity-regexp 'https://github.com/hahwul/deadfinder/.+' \ --certificate-oidc-issuer 'https://token.actions.githubusercontent.com' ``` Substitute the tag you pulled. The command succeeds only if the image was signed by this repo's GitHub Actions. ================================================ FILE: docs/content/docs/integration/github-action.md ================================================ +++ title = "GitHub Action" description = "hahwul/deadfinder composite action — inputs, outputs, examples." weight = 1 +++ `hahwul/deadfinder` is a composite action that downloads the matching release binary, verifies its sha256, and executes the scan. Runs on Linux (x86_64/aarch64) and macOS (arm64). Intel macOS runners (`macos-13`) are not supported — use `macos-latest`. ## Pin a version Always pin a released ref. `@latest` is **not** a valid Actions ref (GitHub has no auto-resolver for it). ```yaml - uses: hahwul/deadfinder@v2 # tracks latest 2.x — gets bug-fix patches automatically # or - uses: hahwul/deadfinder@2.0.2 # exact pin — fully reproducible ``` The `version` input can override the binary independently of the action ref: ```yaml - uses: hahwul/deadfinder@v2 with: version: "2.0.2" # download binary from this release tag ``` ## Full example ```yaml steps: - name: Run DeadFinder uses: hahwul/deadfinder@v2 id: scan with: command: sitemap target: https://www.example.com/sitemap.xml # Optional: # timeout: 10 # concurrency: 50 # include30x: false # headers: "X-API-Key: secret" # worker_headers: "User-Agent: Deadfinder Bot" # user_agent: "MyBot/1.0" # proxy: "http://localhost:8080" # proxy_auth: "user:pass" # match: "^https://example\\.com/" # ignore: "\\.png$" # coverage: true # visualize: report.png # silent: false # verbose: false - name: Handle results run: echo '${{ steps.scan.outputs.output }}' | jq '.' ``` ## Inputs | Input | Required | Default | Notes | |---|---|---|---| | `command` | ✓ | — | `url` / `file` / `pipe` / `sitemap` | | `target` | ✓ | — | URL, file path, or sitemap URL | | `version` | | `latest` | Release tag; `latest` resolves to most recent release | | `timeout` | | `10` | seconds | | `concurrency` | | `50` | workers | | `silent` | | `false` | string `"true"` to enable | | `verbose` | | `false` | | | `include30x` | | `false` | | | `headers` | | `""` | comma-separated `"Key: Value"` pairs | | `worker_headers` | | `""` | headers for link-check requests | | `user_agent` | | `""` | overrides default UA | | `proxy` | | `""` | HTTP/HTTPS proxy URL | | `proxy_auth` | | `""` | `user:pass` | | `match` | | `""` | regex | | `ignore` | | `""` | regex | | `coverage` | | `false` | | | `visualize` | | `""` | file path (implies coverage) | ## Outputs | Output | Shape | |---|---| | `output` | Compact JSON string of the scan result (same shape as `-f json` output). | Consume with `fromJSON()`: ```yaml - run: | echo "Dead links: ${{ fromJSON(steps.scan.outputs.output).summary }}" ``` ## Migrating from v1 The v1 action was Docker-based and bundled the Ruby gem. v2 is a composite action that downloads the Crystal binary directly. All v1 inputs are preserved. `worker_headers` was previously undeclared but wired through args — it's now a formal input. `version` is new. No inputs were renamed or removed. Pin to `@1.10.0` to keep the v1 behavior; use `@v2` (or pin a specific 2.x tag like `@2.0.2`) for v2. ================================================ FILE: docs/content/docs/reference/_index.md ================================================ +++ title = "Reference" description = "CLI flag reference." weight = 4 sort_by = "weight" +++ - [CLI flags](/docs/reference/cli-flags/) — every option accepted by `deadfinder`. ================================================ FILE: docs/content/docs/reference/cli-flags.md ================================================ +++ title = "CLI Flags" description = "Complete reference for every deadfinder option." weight = 1 +++ Run `deadfinder --help` for the live help text. This page is the documented contract. ## Synopsis ``` deadfinder [options] Commands: pipe Scan the URLs from STDIN file Scan the URLs from File url Scan the Single URL sitemap Scan the URLs from sitemap completion Generate completion script (bash/zsh/fish) version Show version ``` ## Options | Short | Long | Default | Description | |---|---|---|---| | `-r` | `--include30x` | `false` | Treat 3xx responses as dead links. | | `-c` | `--concurrency=N` | `50` | Number of concurrent workers. | | `-t` | `--timeout=N` | `10` | Per-request timeout (seconds). | | `-o` | `--output=FILE` | `""` | Write structured results to FILE. | | `-f` | `--output_format=FORMAT` | `json` | `json` / `yaml` / `toml` / `csv` / `sarif`. | | `-H` | `--headers=HEADER` | `[]` | Header for the **initial** page fetch. Repeat for multiple. Format: `"Name: Value"`. | | | `--worker_headers=HEADER` | `[]` | Header for every **link-check** request. Repeat for multiple. | | | `--user_agent=UA` | `Mozilla/5.0 (compatible; DeadFinder/;)` | Override User-Agent. | | `-p` | `--proxy=URL` | `""` | HTTP/HTTPS proxy (HTTPS uses CONNECT tunneling). | | | `--proxy_auth=USER:PASS` | `""` | Proxy credentials (Basic). | | `-m` | `--match=PATTERN` | `""` | Regex: only scan URLs that match. | | `-i` | `--ignore=PATTERN` | `""` | Regex: skip URLs that match. | | `-s` | `--silent` | `false` | Suppress the live log on stdout. | | `-v` | `--verbose` | `false` | Log every checked URL, not just dead ones. | | | `--debug` | `false` | Internal state / cache diagnostics. | | | `--limit=N` | `0` | Cap input URLs (`0` = unlimited). | | | `--coverage` | `false` | Emit per-target coverage stats. | | | `--visualize=PATH` | `""` | Write a PNG status-code chart (implies `--coverage`). | ## Notes - Structured output is **file-only**: you must set `-o`. stdout is reserved for the live log. - `match` / `ignore` regexes each run under a 1-second timeout to block ReDoS. - The initial page fetch receives `--headers`; worker link-check requests receive `--worker_headers`. `--user_agent` applies to both. - `--visualize` auto-enables `--coverage`. ================================================ FILE: docs/content/docs/usage/_index.md ================================================ +++ title = "Usage" description = "Subcommands, output formats, and filters." weight = 2 sort_by = "weight" +++ DeadFinder is a single CLI with four scan subcommands and a handful of global flags. - [Subcommands](/docs/usage/subcommands/) — `url`, `file`, `pipe`, `sitemap`, plus `completion` and `version`. - [Output formats](/docs/usage/output-formats/) — JSON / YAML / TOML / CSV / SARIF, coverage, PNG visualization. - [Filtering](/docs/usage/filtering/) — `--match` / `--ignore` regex, `--include30x`, `--limit`. ================================================ FILE: docs/content/docs/usage/filtering.md ================================================ +++ title = "Filtering" description = "Regex match/ignore, 3xx inclusion, URL limit." weight = 3 +++ ## `--match=PATTERN` / `--ignore=PATTERN` Regex applied to every discovered URL before it's fetched. Each pattern has a 1-second timeout to prevent ReDoS. ```bash # Only check internal links deadfinder sitemap https://www.example.com/sitemap.xml \ --match='^https://(www\.)?example\.com/' # Skip media files deadfinder url https://www.example.com \ --ignore='\.(png|jpg|gif|webp|mp4)$' ``` Using both: `--match` is applied first, then `--ignore`. ## `--include30x` By default, 3xx redirects are treated as healthy (the destination is what matters). Enable this flag to mark them as dead too: ```bash deadfinder url https://www.example.com --include30x ``` Use this when your policy is "redirects are technical debt" rather than "follow the redirect chain". ## `--limit=N` Cap the number of URLs scanned per invocation (useful for quick smoke tests of a large sitemap): ```bash deadfinder sitemap https://www.example.com/sitemap.xml --limit=50 ``` Applies to the input list (file lines, STDIN lines, or sitemap `` entries). Not to discovered child links on each page. ## `--concurrency=N` / `--timeout=N` Not filters per se, but the other knobs you'll reach for: - `--concurrency=50` (default) — number of parallel workers. - `--timeout=10` (default, seconds) — per-request connect + read timeout. Ramp concurrency down on rate-limited targets; up on fast internal scans. ================================================ FILE: docs/content/docs/usage/output-formats.md ================================================ +++ title = "Output Formats" description = "JSON, YAML, TOML, CSV, SARIF, coverage reports, and PNG visualization." weight = 2 +++ DeadFinder writes results only when `-o ` is set (stdout stays human-readable log). Pick the format with `-f `. | Flag | Format | |---|---| | `-f json` (default) | pretty JSON | | `-f yaml` / `-f yml` | YAML | | `-f toml` | TOML | | `-f csv` | CSV with `target,url` columns | | `-f sarif` | SARIF 2.1.0 JSON (one `DEAD_LINK` result per broken URL) | ## Basic shape Same across JSON / YAML / TOML: ```json { "https://www.example.com": [ "https://www.example.com/broken-link-1", "https://www.example.com/broken-link-2" ] } ``` CSV: ```csv target,url https://www.example.com,https://www.example.com/broken-link-1 https://www.example.com,https://www.example.com/broken-link-2 ``` ## Coverage mode Add `--coverage` to include per-target statistics: ```bash deadfinder sitemap https://www.example.com/sitemap.xml --coverage -o out.json ``` ```json { "dead_links": { "https://www.example.com": ["https://www.example.com/broken-link-1"] }, "coverage": { "targets": { "https://www.example.com": { "total_tested": 100, "dead_links": 5, "coverage_percentage": 5.0, "status_counts": {"404": 3, "500": 2} } }, "summary": { "total_tested": 100, "total_dead": 5, "overall_coverage_percentage": 5.0, "overall_status_counts": {"404": 3, "500": 2} } } } ``` ## SARIF `-f sarif` produces a [SARIF 2.1.0](https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html) document you can upload to GitHub code scanning (`github/codeql-action/upload-sarif`) or feed into any SARIF-aware tooling: ```bash deadfinder sitemap https://www.example.com/sitemap.xml -f sarif -o deadfinder.sarif ``` Each dead link becomes a `result` under the `DEAD_LINK` rule. The broken URL is the primary location; the page it was discovered on is attached as a related location. ## PNG visualization ```bash deadfinder sitemap https://www.example.com/sitemap.xml --visualize report.png ``` `--visualize` implies `--coverage`. Output is a stacked bar chart of status codes per target. ## Stdout vs file Structured output requires `-o`. Without it the tool emits a live log to stdout only. Use `-s` / `--silent` to suppress the log entirely (for example when you're only interested in the file output). ```bash deadfinder url https://www.example.com -o out.json -s ``` ================================================ FILE: docs/content/docs/usage/subcommands.md ================================================ +++ title = "Subcommands" description = "url / file / pipe / sitemap / completion / version" weight = 1 +++ ## `url ` Scan a single page. Extract links from the HTML and check each one. ```bash deadfinder url https://www.example.com ``` ## `file ` Read newline-separated URLs from a file and scan each one. Each URL is scanned independently; results are keyed by the source URL. ```bash deadfinder file urls.txt ``` ## `pipe` Read URLs from STDIN (one per line). Useful in shell pipelines. ```bash grep '^https://' access.log | sort -u | deadfinder pipe ``` ## `sitemap ` Parse an XML sitemap, follow sitemap indexes recursively, and scan every ``. ```bash deadfinder sitemap https://www.example.com/sitemap.xml ``` ## `completion ` Emit shell completion for bash, zsh, or fish. ```bash # Bash deadfinder completion bash > /etc/bash_completion.d/deadfinder # Zsh deadfinder completion zsh > ~/.zsh/completion/_deadfinder # Fish deadfinder completion fish > ~/.config/fish/completions/deadfinder.fish ``` ## `version` Print the DeadFinder version. ```bash deadfinder version ``` ================================================ FILE: docs/content/index.md ================================================ +++ title = "DeadFinder" description = "Find dead (broken) links in web pages, URL lists, and sitemaps." +++ Find dead (broken) links in web pages, URL lists, and sitemaps. Fast native CLI written in Crystal with fiber-based concurrency. ## Why DeadFinder - **Fast**: fiber-based concurrent workers scan hundreds of links in parallel. - **Ergonomic**: one binary, no runtime dependencies. - **Structured output**: JSON / YAML / TOML / CSV — or attach as a GitHub Action output. - **Coverage report**: track dead-link ratio per target with `--coverage`. ## Install ```bash # Homebrew brew install deadfinder # Docker docker run ghcr.io/hahwul/deadfinder:latest deadfinder url https://example.com # Prebuilt binary — pick your platform on the Releases page # https://github.com/hahwul/deadfinder/releases/latest ``` See [Installation](/docs/getting-started/installation/) for every channel (Nix, build from source, etc). ## First scan ```bash deadfinder url https://your-site.example deadfinder sitemap https://your-site.example/sitemap.xml cat urls.txt | deadfinder pipe ``` See [Quick Start](/docs/getting-started/quickstart/) for more. ## Continuous integration Run DeadFinder on every push via the official GitHub Action: ```yaml - uses: hahwul/deadfinder@v2 with: command: sitemap target: https://www.example.com/sitemap.xml ``` See [GitHub Action](/docs/integration/github-action/) for the full input reference. --- DeadFinder 2.0+ is written in Crystal. v1.x (Ruby gem) lives on the [`legacy/v1`](https://github.com/hahwul/deadfinder/tree/legacy/v1) branch and receives bug-fix updates only. ================================================ FILE: docs/static/CNAME ================================================ deadfinder.hahwul.com ================================================ FILE: docs/static/css/style.css ================================================ :root { --sidebar-w: 280px; --toc-w: 220px; --content-max: 720px; --font: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif; --mono: 'Noto Sans Mono', ui-monospace, 'SFMono-Regular', Consolas, monospace; --bg: #0a0f0a; --bg-sidebar: #0f1a0f; --text: #e8ede8; --text-muted: #8fa38f; --text-light: #5c6e5c; --primary: #22c55e; --primary-light: #0a1f0e; --accent: #f59e0b; --accent-light: #1a1500; --border: #1a2e1a; --border-light: #152515; --code-bg: #0d160d; --hover-bg: #122012; } *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } body { font-family: var(--font); font-size: 15px; line-height: 1.7; color: var(--text); background: var(--bg); -webkit-font-smoothing: antialiased; } /* -- Top Bar -- */ .topbar { position: sticky; top: 0; z-index: 100; display: flex; align-items: center; justify-content: space-between; height: 52px; padding: 0 1.25rem; background: var(--bg); border-bottom: 1px solid var(--border); } .topbar-left { display: flex; align-items: center; gap: 0.75rem; } .topbar-logo { display: flex; align-items: center; gap: 0.5rem; text-decoration: none; color: var(--text); font-weight: 700; font-size: 1rem; } .topbar-logo svg { flex-shrink: 0; } .topbar-logo:hover { color: var(--primary); } .menu-btn { display: none; background: none; border: 1px solid var(--border); border-radius: 6px; padding: 4px 8px; cursor: pointer; color: var(--text-muted); } .menu-btn:hover { background: var(--hover-bg); } .topbar-right { display: flex; align-items: center; gap: 0.5rem; } .topbar-icon { display: inline-flex; align-items: center; justify-content: center; width: 32px; height: 32px; color: var(--text-muted); border: 1px solid var(--border); border-radius: 8px; text-decoration: none; transition: color 0.15s, border-color 0.15s; } .topbar-icon:hover { color: var(--text); border-color: var(--primary); } /* Search trigger (button in topbar) */ .topbar-search { display: inline-flex; align-items: center; gap: 0.5rem; width: 260px; padding: 6px 8px 6px 10px; font-family: var(--font); font-size: 0.8rem; background: var(--code-bg); color: var(--text-muted); border: 1px solid var(--border); border-radius: 8px; cursor: pointer; transition: border-color 0.15s, box-shadow 0.15s, color 0.15s; } .topbar-search:hover { border-color: var(--primary); color: var(--text); } .topbar-search:focus-visible { outline: none; border-color: var(--primary); box-shadow: 0 0 0 3px rgba(34, 197, 94, 0.15); } .topbar-search svg { flex-shrink: 0; color: var(--text-light); } .topbar-search span { flex: 1; text-align: left; } .topbar-search kbd { font-family: var(--mono); font-size: 0.7rem; padding: 2px 6px; background: var(--bg); border: 1px solid var(--border); border-radius: 4px; color: var(--text-muted); line-height: 1; } /* Search modal */ #search-modal { position: fixed; inset: 0; z-index: 1000; font-family: var(--font); } #search-modal[hidden] { display: none; } .search-overlay { position: absolute; inset: 0; background: rgba(0, 0, 0, 0.65); backdrop-filter: blur(4px); -webkit-backdrop-filter: blur(4px); } .search-dialog { position: absolute; top: 12%; left: 50%; transform: translateX(-50%); width: 92%; max-width: 640px; max-height: 70vh; display: flex; flex-direction: column; background: var(--bg-sidebar); color: var(--text); border: 1px solid var(--border); border-radius: 10px; box-shadow: 0 24px 64px rgba(0, 0, 0, 0.6); overflow: hidden; } .search-dialog-header { position: relative; display: flex; align-items: center; gap: 0.5rem; padding: 12px 14px; border-bottom: 1px solid var(--border); background: var(--bg); } .search-dialog-header svg { flex-shrink: 0; color: var(--text-light); } #search-input { flex: 1; font-family: var(--font); font-size: 0.95rem; background: transparent; color: var(--text); border: none; outline: none; padding: 4px 0; } #search-input::placeholder { color: var(--text-light); } #search-close { background: transparent; border: 1px solid var(--border); color: var(--text-muted); padding: 2px 8px; border-radius: 4px; font-family: var(--mono); font-size: 0.7rem; cursor: pointer; line-height: 1.4; } #search-close:hover { color: var(--text); border-color: var(--primary); } #search-results { flex: 1; overflow-y: auto; padding: 8px; } #search-results::-webkit-scrollbar { width: 6px; } #search-results::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; } .search-result { padding: 10px 12px; border-radius: 6px; cursor: pointer; transition: background 0.12s; } .search-result + .search-result { margin-top: 2px; } .search-result:hover, .search-result.selected { background: var(--hover-bg); } .search-result-title { font-size: 0.9rem; font-weight: 600; color: var(--primary); margin-bottom: 2px; } .search-result-description { font-size: 0.8rem; color: var(--text-muted); line-height: 1.45; } .search-result-content { font-size: 0.78rem; color: var(--text-light); margin-top: 4px; line-height: 1.45; font-family: var(--mono); } .search-result mark { background: rgba(34, 197, 94, 0.22); color: var(--text); padding: 0 2px; border-radius: 2px; } .search-empty { padding: 1.5rem 1rem; text-align: center; color: var(--text-muted); font-size: 0.85rem; } /* -- Layout -- */ .layout { display: flex; min-height: calc(100vh - 52px); } /* -- Sidebar -- */ .sidebar { position: sticky; top: 52px; width: var(--sidebar-w); height: calc(100vh - 52px); overflow-y: auto; padding: 1.25rem 0; border-right: 1px solid var(--border); background: var(--bg-sidebar); flex-shrink: 0; } .sidebar::-webkit-scrollbar { width: 4px; } .sidebar::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; } .sidebar::-webkit-scrollbar-track { background: transparent; } .sidebar-section { margin-bottom: 0.25rem; } .sidebar-heading { display: block; padding: 0.35rem 1.25rem; font-size: 0.75rem; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; color: var(--text-muted); } .sidebar-nav { list-style: none; } .sidebar-nav a { display: flex; align-items: center; gap: 0.35rem; padding: 0.3rem 1.25rem 0.3rem 1.5rem; font-size: 0.875rem; color: var(--text-muted); text-decoration: none; border-left: 2px solid transparent; transition: color 0.15s, background 0.15s, border-color 0.15s; } .sidebar-nav a:hover { color: var(--text); background: var(--hover-bg); } .sidebar-nav a.active { color: var(--primary); font-weight: 500; background: var(--primary-light); border-left-color: var(--primary); } /* Nested nav */ .sidebar-nav .nested { list-style: none; } .sidebar-nav .nested a { padding-left: 2.25rem; font-size: 0.825rem; } .sidebar-nav .nested .nested a { padding-left: 3rem; } .sidebar-toggle { display: flex; align-items: center; gap: 0.25rem; width: 100%; padding: 0.3rem 1.25rem 0.3rem 1.5rem; font-family: var(--font); font-size: 0.875rem; color: var(--text-muted); background: none; border: none; border-left: 2px solid transparent; cursor: pointer; text-align: left; transition: color 0.15s, background 0.15s; } .sidebar-toggle:hover { color: var(--text); background: var(--hover-bg); } .sidebar-toggle .arrow { display: inline-block; width: 16px; text-align: center; font-size: 0.7rem; transition: transform 0.2s; } .sidebar-toggle.open .arrow { transform: rotate(90deg); } /* -- Main Content -- */ .main { flex: 1; min-width: 0; padding: 2rem 2.5rem; max-width: calc(var(--content-max) + 5rem); } /* -- Prose -- */ .prose h1 { font-size: 1.75rem; font-weight: 700; margin: 0 0 0.75rem; line-height: 1.3; color: var(--text); } .prose h2 { font-size: 1.35rem; font-weight: 600; margin: 2rem 0 0.5rem; padding-bottom: 0.35rem; border-bottom: 1px solid var(--border); line-height: 1.3; color: var(--text); } .prose h3 { font-size: 1.1rem; font-weight: 600; margin: 1.5rem 0 0.4rem; line-height: 1.3; color: var(--text); } .prose h4 { font-size: 0.95rem; font-weight: 600; margin: 1.25rem 0 0.35rem; color: var(--text); } .prose p { margin: 0.75rem 0; color: var(--text); } .prose a { color: var(--primary); text-decoration: none; } .prose a:hover { text-decoration: underline; } .prose strong { font-weight: 600; color: var(--text); } .prose img { max-width: 100%; border-radius: 8px; margin: 1rem 0; } .prose blockquote { margin: 1rem 0; padding: 0.5rem 1rem; border-left: 3px solid var(--primary); background: var(--primary-light); border-radius: 0 6px 6px 0; color: var(--text); } .prose blockquote p { margin: 0.25rem 0; } .prose ul, .prose ol { margin: 0.75rem 0; padding-left: 1.5rem; } .prose li { margin: 0.25rem 0; color: var(--text); } .prose li::marker { color: var(--text-muted); } .prose code { font-family: var(--mono); font-size: 0.85em; background: var(--code-bg); padding: 0.15rem 0.4rem; border-radius: 4px; border: 1px solid var(--border); color: var(--primary); } .prose pre { margin: 1rem 0; padding: 1rem; background: var(--code-bg); border: 1px solid var(--border); border-radius: 8px; overflow-x: auto; line-height: 1.5; } .prose pre code { background: none; border: none; padding: 0; font-size: 0.85rem; color: var(--text); } .prose table { width: 100%; border-collapse: collapse; margin: 1rem 0; font-size: 0.9rem; } .prose th, .prose td { padding: 0.5rem 0.75rem; border: 1px solid var(--border); text-align: left; } .prose th { background: var(--code-bg); font-weight: 600; color: var(--text); } .prose td { color: var(--text-muted); } .prose hr { border: none; border-top: 1px solid var(--border); margin: 2rem 0; } /* -- Page Navigation -- */ .page-nav { display: flex; justify-content: space-between; gap: 1rem; margin-top: 3rem; padding-top: 1.5rem; border-top: 1px solid var(--border); } .page-nav a { display: flex; flex-direction: column; gap: 0.15rem; padding: 0.75rem 1rem; text-decoration: none; border: 1px solid var(--border); border-radius: 8px; flex: 1; max-width: 50%; transition: border-color 0.2s, box-shadow 0.2s; } .page-nav a:hover { border-color: var(--primary); box-shadow: 0 0 0 3px rgba(34, 197, 94, 0.08); } .page-nav a .label { font-size: 0.75rem; color: var(--text-light); text-transform: uppercase; letter-spacing: 0.04em; } .page-nav a .title { font-size: 0.9rem; color: var(--primary); font-weight: 500; } .page-nav .next { text-align: right; margin-left: auto; } /* -- Section list -- */ ul.section-list { list-style: none; margin: 1rem 0; } ul.section-list li { padding: 0.5rem 0; border-bottom: 1px solid var(--border); } ul.section-list li:last-child { border-bottom: none; } ul.section-list li a { color: var(--primary); text-decoration: none; font-weight: 500; } ul.section-list li a:hover { text-decoration: underline; } nav.pagination { margin: 1.5rem 0; } nav.pagination .pagination-list { list-style: none; display: flex; gap: 0.5rem; flex-wrap: wrap; align-items: center; } nav.pagination a { display: inline-block; padding: 0.25rem 0.55rem; border-radius: 6px; border: 1px solid var(--border); color: var(--text-muted); text-decoration: none; font-size: 0.85rem; } nav.pagination a:hover { color: var(--primary); border-color: var(--primary); } .pagination-current span { display: inline-block; padding: 0.25rem 0.55rem; border-radius: 6px; border: 1px solid var(--primary); background: var(--primary-light); font-size: 0.85rem; } .pagination-disabled span { display: inline-block; padding: 0.25rem 0.55rem; border-radius: 6px; border: 1px solid var(--border); color: var(--text-muted); opacity: 0.5; font-size: 0.85rem; } /* -- Footer -- */ .site-footer { padding: 1.5rem 2.5rem; border-top: 1px solid var(--border); color: var(--text-light); font-size: 0.8rem; } .site-footer a { color: var(--text-muted); text-decoration: none; } .site-footer a:hover { color: var(--primary); } /* -- Alert shortcode -- */ .alert { padding: 0.75rem 1rem; border-radius: 6px; margin: 1rem 0; font-size: 0.9rem; border-left: 4px solid; } .alert-info { background: var(--primary-light); border-color: var(--primary); color: var(--primary); } .alert-warning { background: var(--accent-light); border-color: var(--accent); color: var(--accent); } .alert-danger { background: #1a0508; border-color: #ef4444; color: #ef4444; } .alert-tip { background: var(--primary-light); border-color: #22c55e; color: #22c55e; } /* -- Hint shortcode -- */ .hint { padding: 0.75rem 1rem; border-radius: 6px; margin: 1rem 0; font-size: 0.9rem; border-left: 4px solid; } .hint-info { background: var(--primary-light); border-color: var(--primary); color: var(--primary); } .hint-warning { background: var(--accent-light); border-color: var(--accent); color: var(--accent); } .hint-danger { background: #1a0508; border-color: #ef4444; color: #ef4444; } /* -- Responsive -- */ @media (max-width: 768px) { .sidebar { position: fixed; left: -100%; top: 52px; z-index: 90; width: 280px; transition: left 0.25s ease; box-shadow: none; } .sidebar.open { left: 0; box-shadow: 4px 0 24px rgba(0, 0, 0, 0.4); } .sidebar-overlay { display: none; position: fixed; inset: 0; top: 52px; z-index: 80; background: rgba(0, 0, 0, 0.6); } .sidebar-overlay.open { display: block; } .menu-btn { display: block; } .main { padding: 1.5rem 1rem; } .site-footer { padding: 1.5rem 1rem; } .page-nav { flex-direction: column; } .page-nav a { max-width: 100%; } .topbar-search { width: auto; padding: 6px 10px; } .topbar-search span, .topbar-search kbd { display: none; } } ================================================ FILE: docs/static/icons/site.webmanifest ================================================ { "name": "DeadFinder", "short_name": "DeadFinder", "icons": [ { "src": "/icons/web-app-manifest-192x192.png", "sizes": "192x192", "type": "image/png", "purpose": "maskable" }, { "src": "/icons/web-app-manifest-512x512.png", "sizes": "512x512", "type": "image/png", "purpose": "maskable" } ], "theme_color": "#ffffff", "background_color": "#ffffff", "display": "standalone" } ================================================ FILE: docs/static/js/search.js ================================================ // Guard against double-load (auto-includes + explicit ================================================ FILE: docs/templates/header.html ================================================ {{ page.title }} - {{ site.title }} {{ og_all_tags }} {{ highlight_css }} {{ auto_includes_css }}
================================================ FILE: docs/templates/page.html ================================================ {% include "header.html" %}

{{ page.title }}

{{ content }}
{% include "footer.html" %} ================================================ FILE: docs/templates/section.html ================================================ {% include "header.html" %}

{{ page.title }}

{{ content }}
    {{ section.list }}
{{ pagination }}
{% include "footer.html" %} ================================================ FILE: docs/templates/shortcodes/alert.html ================================================
{{ type | upper }}: {{ message }}
================================================ FILE: docs/templates/taxonomy.html ================================================ {% include "header.html" %}

{{ page.title }}

Browse all terms in this taxonomy:

{{ content }}
{% include "footer.html" %} ================================================ FILE: docs/templates/taxonomy_term.html ================================================ {% include "header.html" %}

{{ page.title }}

Pages tagged with this term:

{{ content }}
{% include "footer.html" %} ================================================ FILE: flake.nix ================================================ { description = "DeadFinder — find dead (broken) links in web pages, URL lists, and sitemaps"; inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; flake-utils.url = "github:numtide/flake-utils"; }; outputs = { self, nixpkgs, flake-utils }: flake-utils.lib.eachDefaultSystem (system: let pkgs = import nixpkgs { inherit system; }; # lexbor.cr's postinstall hook clones the upstream lexbor C library # from GitHub at a pinned commit (lib/lexbor/src/ext/revision) and # builds it via cmake. The Nix sandbox blocks network access, so # pre-fetch the source as a fixed-output derivation and drop it # into place during preBuild — then cmake runs normally. lexborCSrc = pkgs.fetchgit { url = "https://github.com/lexbor/lexbor.git"; rev = "971faf11a5f45433b9193a143e2897d8c0fd5611"; sha256 = "0v3ka5dhgz2jkmigdjcjm3vmxlc9yv4hks6pz13xzgagxxfwlw7s"; }; deadfinder = pkgs.crystal.buildCrystalPackage rec { pname = "deadfinder"; version = "2.0.0"; src = ./.; # Generate with: crystal2nix > shards.nix shardsFile = ./shards.nix; nativeBuildInputs = with pkgs; [ crystal shards cmake pkg-config ]; buildInputs = [ ]; # lexbor.cr's postinstall hook (build_ext.cr) clones the lexbor C # library at a pinned commit and builds it via cmake. The Nix # sandbox blocks network, so we (a) replace the read-only shard # symlink with a writable copy, (b) drop in the pre-fetched C # source, and (c) run cmake directly here — bypassing build_ext.cr. preBuild = '' cp -RL lib/lexbor lib/lexbor.rw chmod -R u+w lib/lexbor.rw rm lib/lexbor mv lib/lexbor.rw lib/lexbor cp -r ${lexborCSrc} lib/lexbor/src/ext/lexbor-c chmod -R u+w lib/lexbor/src/ext/lexbor-c mkdir -p lib/lexbor/src/ext/lexbor-c/build ( cd lib/lexbor/src/ext/lexbor-c/build \ && cmake .. \ -DCMAKE_BUILD_TYPE=Release \ -DLEXBOR_BUILD_TESTS_CPP=OFF \ -DLEXBOR_INSTALL_HEADERS=OFF \ -DLEXBOR_BUILD_SHARED=ON \ -G "Unix Makefiles" \ && cmake --build . --config Release -j $NIX_BUILD_CORES ) ''; buildPhase = '' runHook preBuild shards build --release --no-debug runHook postBuild ''; installPhase = '' runHook preInstall mkdir -p $out/bin cp bin/deadfinder $out/bin/deadfinder runHook postInstall ''; doCheck = false; meta = with pkgs.lib; { description = "Find dead (broken) links in web pages, URL lists, and sitemaps"; homepage = "https://github.com/hahwul/deadfinder"; license = licenses.mit; maintainers = [ "hahwul" ]; mainProgram = "deadfinder"; }; }; in { packages.default = deadfinder; packages.deadfinder = deadfinder; devShells.default = pkgs.mkShell { inputsFrom = [ deadfinder ]; nativeBuildInputs = with pkgs; [ crystal shards crystal2nix cmake pkg-config just ]; shellHook = '' echo "deadfinder development environment (Nix)" [ -d lib ] || shards install ''; }; }); } ================================================ FILE: github-action/README.md ================================================ ## DeadFinder Github Action ================================================ FILE: justfile ================================================ default: @just --list # Install shard dependencies deps: shards install # Build a release binary at ./deadfinder build: shards install crystal build src/cli_main.cr -o deadfinder --release --no-debug # Build a debug binary at ./deadfinder (fast compile) build-debug: shards install crystal build src/cli_main.cr -o deadfinder # Run unit specs test: crystal spec # Run cross-implementation compat harness (requires built binary) compat: build BIN=./deadfinder ruby spec/compat/run.rb # Format sources fix: crystal tool format src spec # Check formatting without modifying check-format: crystal tool format --check src spec # Verify version consistency across shard.yml and src/deadfinder/version.cr alias vc := version-check version-check: crystal run scripts/version_check.cr # Update version in all tracked files alias vu := version-update version-update VERSION: crystal run scripts/version_update.cr -- {{VERSION}} # Clean build artifacts and dependencies clean: rm -f deadfinder *.dwarf rm -rf lib/ .shards/ ================================================ FILE: scripts/version_check.cr ================================================ require "yaml" # Cross-file version consistency check. Prints each discovered version # string and exits non-zero if any tracked file disagrees (files that # don't exist yet are skipped silently so the script works on branches # that haven't landed the snap/aur packaging yet). SHARD_YML = "shard.yml" VERSION_CR = "src/deadfinder/version.cr" SPEC_TOP = "spec/deadfinder_spec.cr" SPEC_CLI = "spec/deadfinder/cli_spec.cr" SNAPCRAFT = "snap/snapcraft.yaml" PKGBUILD = "aur/PKGBUILD" def shard_version(path : String) : String? YAML.parse(File.read(path))["version"].as_s rescue nil end def match_pattern(path : String, pattern : Regex) : String? content = File.read(path) m = content.match(pattern) m ? m[1] : nil rescue nil end # Matches both `VERSION = "X"` and `VERSION.should eq "X"` (with or without parens). CR_VERSION_RE = /VERSION\s*(?:=|\.should\s+eq\(?)\s*"([^"]+)"/ # PKGBUILD: pkgver=X.Y.Z PKGBUILD_RE = /^pkgver=([^\s]+)/m results = [] of {String, String} results << {SHARD_YML, shard_version(SHARD_YML).not_nil!} if File.exists?(SHARD_YML) results << {VERSION_CR, match_pattern(VERSION_CR, CR_VERSION_RE).not_nil!} if File.exists?(VERSION_CR) results << {SPEC_TOP, match_pattern(SPEC_TOP, CR_VERSION_RE).not_nil!} if File.exists?(SPEC_TOP) results << {SPEC_CLI, match_pattern(SPEC_CLI, CR_VERSION_RE).not_nil!} if File.exists?(SPEC_CLI) results << {SNAPCRAFT, shard_version(SNAPCRAFT).not_nil!} if File.exists?(SNAPCRAFT) results << {PKGBUILD, match_pattern(PKGBUILD, PKGBUILD_RE).not_nil!} if File.exists?(PKGBUILD) if results.empty? STDERR.puts "no tracked version files found" exit 1 end results.each { |path, v| puts "#{path}: #{v}" } uniq = results.map { |_, v| v }.uniq if uniq.size == 1 puts "OK: all files agree on #{uniq.first}" else STDERR.puts "MISMATCH: #{uniq.join(", ")}" exit 1 end ================================================ FILE: scripts/version_update.cr ================================================ require "yaml" # Bump the version string across every tracked file in one pass. Run: # # crystal run scripts/version_update.cr -- 2.1.0 # # or via `just version-update 2.1.0`. # # Files that don't exist yet are skipped silently so the script works # on branches that haven't landed the snap/aur packaging. SHARD_YML = "shard.yml" VERSION_CR = "src/deadfinder/version.cr" SPEC_TOP = "spec/deadfinder_spec.cr" SPEC_CLI = "spec/deadfinder/cli_spec.cr" SNAPCRAFT = "snap/snapcraft.yaml" PKGBUILD = "aur/PKGBUILD" SEMVER = /\A\d+\.\d+\.\d+(?:-[0-9A-Za-z.-]+)?\z/ def usage(code = 1) STDERR.puts "usage: crystal run scripts/version_update.cr -- " exit code end new_version = ARGV[0]? usage unless new_version unless new_version.as(String).matches?(SEMVER) STDERR.puts "invalid semver: #{new_version}" usage end nv = new_version.as(String) def replace_in_file(path : String, pattern : Regex, replacement : String) : Bool return true unless File.exists?(path) src = File.read(path) updated = src.sub(pattern, replacement) if updated == src STDERR.puts "#{path}: pattern not found" return false end File.write(path, updated) puts "#{path}: updated" true end ok = true # Crystal's `m` flag enables both line-anchor and DOTALL semantics, so a # bare `.+$/m` swallows everything from the match start to end of file. # Constrain to single-line content with `[^\n]+`. ok &= replace_in_file(SHARD_YML, /^version:\s*[^\n]+$/m, "version: #{nv}") ok &= replace_in_file(VERSION_CR, /VERSION\s*=\s*"[^"]+"/, %(VERSION = "#{nv}")) ok &= replace_in_file(SPEC_TOP, /VERSION\.should\s+eq\s+"[^"]+"/, %(VERSION.should eq "#{nv}")) ok &= replace_in_file(SPEC_CLI, /VERSION\.should\s+eq\s+"[^"]+"/, %(VERSION.should eq "#{nv}")) ok &= replace_in_file(SNAPCRAFT, /^version:\s*[^\n]+$/m, "version: #{nv}") ok &= replace_in_file(PKGBUILD, /^pkgver=[^\n]+$/m, "pkgver=#{nv}") exit(ok ? 0 : 1) ================================================ FILE: shard.yml ================================================ name: deadfinder version: 2.0.2 authors: - hahwul targets: deadfinder: main: src/cli_main.cr dependencies: lexbor: github: kostya/lexbor stumpy_png: github: stumpycr/stumpy_png version: "~> 5.0" sarif: github: hahwul/sarif.cr version: "~> 0.2.0" development_dependencies: webmock: github: manastech/webmock.cr version: "~> 0.14" crystal: '>= 1.19.1' license: MIT ================================================ FILE: shards.nix ================================================ { "lexbor" = { url = "https://github.com/kostya/lexbor.git"; rev = "v3.4.2"; sha256 = "0bsncwsvqf5zns0c56va1l9gc7798pvl34i6yh8jf1syqxkvdb8a"; }; "stumpy_core" = { url = "https://github.com/stumpycr/stumpy_core.git"; rev = "v1.9.1"; sha256 = "1sj5wr9zrxnihnjwq057lah09lsl9jq6j7giwwv3ds9wp9j9z903"; }; "stumpy_png" = { url = "https://github.com/stumpycr/stumpy_png.git"; rev = "v5.0.1"; sha256 = "15wiawl0n3n596bdi0k9dd08nxln2smffba7mggdffw241mn89jc"; }; "webmock" = { url = "https://github.com/manastech/webmock.cr.git"; rev = "v0.14.0"; sha256 = "1h008sx33xq0hha2lxd5dsh2wr7rzlv4nifgr4k5knpw5ahq1f88"; }; } ================================================ FILE: snap/snapcraft.yaml ================================================ name: deadfinder base: core24 version: 2.0.2 summary: Find dead (broken) links in web pages, URL lists, and sitemaps. description: | DeadFinder is a fast CLI tool for detecting broken links on a page, a list of URLs, or an entire sitemap. Written in Crystal for native speed and fiber-based concurrency. Supports JSON/YAML/TOML/CSV output and coverage reporting. grade: stable confinement: strict license: MIT apps: deadfinder: command: deadfinder plugs: - home - removable-media - network - network-bind parts: deadfinder: source: ./ plugin: nil override-build: | curl -fsSL https://crystal-lang.org/install.sh | bash shards install --production shards build --release --no-debug --production cp ./bin/deadfinder $CRAFT_PART_INSTALL/ build-packages: - git - curl - cmake - make - g++ - pkg-config - libssl-dev - libxml2-dev - libz-dev - libyaml-dev - libpcre2-dev - libevent-dev - libgmp-dev stage-packages: - libxml2 - zlib1g - libyaml-0-2 - ca-certificates ================================================ FILE: spec/compat/README.md ================================================ # Compatibility harness Ruby 원본 v1의 출력을 **골든 파일로 동결**하고, Crystal 바이너리가 동일 출력을 내는지 검증하는 블랙박스 테스트다. ## 구조 ``` spec/compat/ ├── fixtures/ │ └── server.rb # 최소 HTTP fixture 서버 (Ruby stdlib only) ├── golden/ │ └── .{json,yaml,toml,csv} # 기대 출력. {{BASE}} 플레이스홀더 ├── run.rb # 드라이버: 서버 기동 → 바이너리 실행 → 비교 └── README.md ``` ## 실행 ```bash shards install crystal build src/cli_main.cr -o deadfinder --release BIN="./deadfinder" ruby spec/compat/run.rb ``` ## 케이스 추가 1. `fixtures/server.rb`의 `ROUTES`에 필요한 경로 추가 2. `golden/.`에 기대 출력 작성 (`{{BASE}}`로 origin 표현) 3. `run.rb` 맨 아래 `run_case(...)` 한 줄 추가 ## 비교 규칙 - 배열은 정렬 후 비교 (링크 추출 순서 비결정성 흡수) - `{{BASE}}` 플레이스홀더는 실행 시 동적 포트로 치환 - 출력은 `-o `로 받아 파일에서 파싱 ## 왜 Ruby 드라이버? 골든 파일은 v1 Ruby 출력의 스냅샷이고, 비교 로직에 `toml-rb` 같은 파서가 필요해서 그대로 Ruby 드라이버를 유지했다. Crystal로 포팅할 수도 있지만 CI 복잡도 대비 이득이 적다. ================================================ FILE: spec/compat/fixtures/server.rb ================================================ #!/usr/bin/env ruby # frozen_string_literal: true require 'socket' ROUTES = { '/index.html' => { status: 200, content_type: 'text/html', body: <<~HTML ok dead redirect HTML }, '/ok' => { status: 200, content_type: 'text/plain', body: 'OK' }, '/dead' => { status: 404, content_type: 'text/plain', body: 'Not Found' }, '/redirect' => { status: 301, content_type: 'text/plain', body: '', extra: { 'Location' => '/ok' } } }.freeze STATUS_TEXT = { 200 => 'OK', 301 => 'Moved Permanently', 404 => 'Not Found' }.freeze server = TCPServer.new('127.0.0.1', 0) puts server.addr[1] STDOUT.flush trap('TERM') { exit 0 } trap('INT') { exit 0 } loop do client = server.accept begin request_line = client.gets raw_path = request_line&.split(' ')&.dig(1) || '/' path = raw_path.split('?').first while (line = client.gets) && line.strip != ''; end route = ROUTES[path] if route headers = { 'Content-Type' => route[:content_type], 'Content-Length' => route[:body].bytesize.to_s }.merge(route[:extra] || {}) client.print "HTTP/1.1 #{route[:status]} #{STATUS_TEXT[route[:status]] || 'OK'}\r\n" headers.each { |k, v| client.print "#{k}: #{v}\r\n" } client.print "\r\n#{route[:body]}" else client.print "HTTP/1.1 404 Not Found\r\nContent-Length: 0\r\n\r\n" end rescue StandardError # swallow: test fixture, keep accepting ensure client&.close end end ================================================ FILE: spec/compat/golden/file_json.json ================================================ { "{{BASE}}/index.html": [ "{{BASE}}/dead" ] } ================================================ FILE: spec/compat/golden/pipe_json.json ================================================ { "{{BASE}}/index.html": [ "{{BASE}}/dead" ] } ================================================ FILE: spec/compat/golden/url_csv.csv ================================================ target,url {{BASE}}/index.html,{{BASE}}/dead ================================================ FILE: spec/compat/golden/url_json.json ================================================ { "{{BASE}}/index.html": [ "{{BASE}}/dead" ] } ================================================ FILE: spec/compat/golden/url_json_include30x.json ================================================ { "{{BASE}}/index.html": [ "{{BASE}}/dead", "{{BASE}}/redirect" ] } ================================================ FILE: spec/compat/golden/url_toml.toml ================================================ "{{BASE}}/index.html" = ["{{BASE}}/dead"] ================================================ FILE: spec/compat/golden/url_yaml.yaml ================================================ --- {{BASE}}/index.html: - {{BASE}}/dead ================================================ FILE: spec/compat/run.rb ================================================ #!/usr/bin/env ruby # frozen_string_literal: true # Black-box compatibility harness for the deadfinder Crystal binary. # # The golden files in this directory were captured from the v1 Ruby # implementation and now act as the frozen contract the Crystal binary # must match. The harness runs the binary under test against a local # fixture server, writes the output to a temp file, and compares the # parsed structure to the corresponding golden file (with `{{BASE}}` # substituted for the dynamic fixture origin). # # Usage: # BIN="./deadfinder" ruby spec/compat/run.rb # BIN="/path/to/deadfinder" ruby spec/compat/run.rb require 'csv' require 'json' require 'open3' require 'tempfile' require 'toml-rb' require 'yaml' HARNESS_ROOT = __dir__ BIN = ENV.fetch('BIN', './deadfinder') def sort_arrays(obj) case obj when Hash then obj.transform_values { |v| sort_arrays(v) } when Array then obj.map { |v| sort_arrays(v) }.sort_by(&:to_s) else obj end end def parse_output(path, format) text = File.read(path) case format when 'json' then JSON.parse(text) when 'yaml', 'yml' then YAML.safe_load(text) when 'toml' then TomlRB.parse(text) when 'csv' then CSV.parse(text) else raise "unknown format: #{format}" end end def substitute_base(text, base) text.gsub('{{BASE}}', base) end def run_case(base, name:, args:, format:, golden:, stdin: nil, extra_files: {}) extra_files.each do |path, content| File.write(path, substitute_base(content, base)) end Tempfile.create(['deadfinder', ".#{format}"]) do |tmp| resolved_args = substitute_base(args, base) cmd = "#{BIN} #{resolved_args} -o #{tmp.path} -f #{format} -s" stdout, stderr, status = Open3.capture3(cmd, stdin_data: stdin || '') unless status.success? warn "FAIL: #{name} — exit #{status.exitstatus}" warn "CMD: #{cmd}" warn "STDOUT: #{stdout}" warn "STDERR: #{stderr}" return false end expected_text = substitute_base(File.read(golden), base) expected_path = Tempfile.new(['expected', ".#{format}"]).tap do |f| f.write(expected_text) f.close end.path expected = parse_output(expected_path, format) actual = parse_output(tmp.path, format) if sort_arrays(actual) == sort_arrays(expected) true else warn "FAIL: #{name}" warn "EXPECTED: #{expected.inspect}" warn "ACTUAL: #{actual.inspect}" false end end ensure extra_files.each_key { |path| FileUtils.rm_f(path) } end # --- Boot fixture server ---------------------------------------------------- server_io = IO.popen(['ruby', "#{HARNESS_ROOT}/fixtures/server.rb"], 'r') port = server_io.gets&.strip abort 'fixture server did not start' unless port && !port.empty? base = "http://127.0.0.1:#{port}" at_exit do Process.kill('TERM', server_io.pid) rescue Errno::ESRCH # already gone end # --- Cases ------------------------------------------------------------------ urls_file = File.join(Dir.tmpdir, "deadfinder_compat_urls_#{Process.pid}.txt") results = [] results << run_case(base, name: 'url_json', args: 'url {{BASE}}/index.html', format: 'json', golden: "#{HARNESS_ROOT}/golden/url_json.json") results << run_case(base, name: 'url_yaml', args: 'url {{BASE}}/index.html', format: 'yaml', golden: "#{HARNESS_ROOT}/golden/url_yaml.yaml") results << run_case(base, name: 'url_toml', args: 'url {{BASE}}/index.html', format: 'toml', golden: "#{HARNESS_ROOT}/golden/url_toml.toml") results << run_case(base, name: 'url_csv', args: 'url {{BASE}}/index.html', format: 'csv', golden: "#{HARNESS_ROOT}/golden/url_csv.csv") results << run_case(base, name: 'url_json_include30x', args: 'url {{BASE}}/index.html -r', format: 'json', golden: "#{HARNESS_ROOT}/golden/url_json_include30x.json") results << run_case(base, name: 'file_json', args: "file #{urls_file}", format: 'json', golden: "#{HARNESS_ROOT}/golden/file_json.json", extra_files: { urls_file => "{{BASE}}/index.html\n" }) results << run_case(base, name: 'pipe_json', args: 'pipe', format: 'json', golden: "#{HARNESS_ROOT}/golden/pipe_json.json", stdin: substitute_base("{{BASE}}/index.html\n", base)) exit(results.all? ? 0 : 1) ================================================ FILE: spec/deadfinder/cli_spec.cr ================================================ require "../spec_helper" describe Deadfinder::CLI do before_each do WebMock.reset reset_deadfinder_state end describe "Options defaults" do it "has correct default values" do options = Deadfinder::Options.new options.concurrency.should eq 50 options.timeout.should eq 10 options.output.should eq "" options.output_format.should eq "json" options.headers.should eq [] of String options.worker_headers.should eq [] of String options.silent.should be_false options.verbose.should be_false options.debug.should be_false options.include30x.should be_false options.proxy.should eq "" options.proxy_auth.should eq "" options.match.should eq "" options.ignore.should eq "" options.coverage.should be_false options.visualize.should eq "" options.limit.should eq 0 end end describe "completion scripts" do it "generates bash completion script" do script = Deadfinder::Completion.bash script.should contain "_deadfinder_completions" script.should contain "complete -F _deadfinder_completions deadfinder" script.should contain "COMPREPLY" end it "generates zsh completion script" do script = Deadfinder::Completion.zsh script.should contain "#compdef deadfinder" script.should contain "_arguments" script.should contain "--include30x" end it "generates fish completion script" do script = Deadfinder::Completion.fish script.should contain "complete -c deadfinder -l include30x" script.should contain "complete -c deadfinder -l debug -d 'Debug mode'" script.should contain "complete -c deadfinder -l concurrency" end end describe "version" do it "has correct version" do Deadfinder::VERSION.should eq "2.0.2" end end end ================================================ FILE: spec/deadfinder/http_client_spec.cr ================================================ require "../spec_helper" describe Deadfinder::HttpClient do before_each do reset_deadfinder_state end describe ".create" do it "creates a basic HTTP client" do uri = URI.parse("http://example.com") options = default_test_options client = Deadfinder::HttpClient.create(uri, options) client.should be_a(HTTP::Client) end it "creates an HTTPS client with SSL" do uri = URI.parse("https://example.com") options = default_test_options client = Deadfinder::HttpClient.create(uri, options) client.should be_a(HTTP::Client) end it "creates client with custom timeout without error" do uri = URI.parse("http://example.com") options = default_test_options options.timeout = 5 client = Deadfinder::HttpClient.create(uri, options) client.should be_a(HTTP::Client) end it "falls back to direct connection when proxy has no host" do uri = URI.parse("http://example.com") options = default_test_options options.proxy = "not-a-valid-proxy" client = Deadfinder::HttpClient.create(uri, options) client.should be_a(HTTP::Client) end it "creates client without proxy when proxy is empty" do uri = URI.parse("http://example.com") options = default_test_options options.proxy = "" client = Deadfinder::HttpClient.create(uri, options) client.should be_a(HTTP::Client) end it "creates an HTTPS client when insecure flag is enabled" do uri = URI.parse("https://example.com") options = default_test_options options.insecure = true client = Deadfinder::HttpClient.create(uri, options) client.should be_a(HTTP::Client) end it "creates an HTTPS client with verification enabled by default" do uri = URI.parse("https://example.com") options = default_test_options options.insecure.should be_false client = Deadfinder::HttpClient.create(uri, options) client.should be_a(HTTP::Client) end end describe ".proxy_configured?" do it "returns false when proxy is empty" do options = default_test_options options.proxy = "" Deadfinder::HttpClient.proxy_configured?(options).should be_false end it "returns true when proxy is set" do options = default_test_options options.proxy = "http://proxy.example.com:8080" Deadfinder::HttpClient.proxy_configured?(options).should be_true end end describe ".absolute_uri" do it "returns the full URI string" do uri = URI.parse("http://example.com/path?q=1") Deadfinder::HttpClient.absolute_uri(uri).should eq("http://example.com/path?q=1") end end end ================================================ FILE: spec/deadfinder/logger_spec.cr ================================================ require "../spec_helper" describe Deadfinder::Logger do before_each do Deadfinder::Logger.unset_silent Deadfinder::Logger.unset_verbose Deadfinder::Logger.unset_debug end describe ".apply_options" do it "sets silent mode when options has silent" do options = Deadfinder::Options.new options.silent = true options.verbose = false options.debug = false Deadfinder::Logger.apply_options(options) Deadfinder::Logger.silent?.should be_true end it "sets verbose mode when options has verbose" do options = Deadfinder::Options.new options.silent = false options.verbose = true options.debug = false Deadfinder::Logger.apply_options(options) Deadfinder::Logger.verbose?.should be_true end it "sets debug mode when options has debug" do options = Deadfinder::Options.new options.silent = false options.verbose = false options.debug = true Deadfinder::Logger.apply_options(options) Deadfinder::Logger.debug?.should be_true end it "sets multiple modes simultaneously" do options = Deadfinder::Options.new options.silent = true options.verbose = true options.debug = true Deadfinder::Logger.apply_options(options) Deadfinder::Logger.silent?.should be_true Deadfinder::Logger.verbose?.should be_true Deadfinder::Logger.debug?.should be_true end end describe ".silent?" do it "returns false by default" do Deadfinder::Logger.silent?.should be_false end end describe ".set_silent / .unset_silent" do it "sets and unsets silent mode" do Deadfinder::Logger.set_silent Deadfinder::Logger.silent?.should be_true Deadfinder::Logger.unset_silent Deadfinder::Logger.silent?.should be_false end end describe ".verbose?" do it "returns false by default" do Deadfinder::Logger.verbose?.should be_false end end describe ".set_verbose / .unset_verbose" do it "sets and unsets verbose mode" do Deadfinder::Logger.set_verbose Deadfinder::Logger.verbose?.should be_true Deadfinder::Logger.unset_verbose Deadfinder::Logger.verbose?.should be_false end end describe ".debug?" do it "returns false by default" do Deadfinder::Logger.debug?.should be_false end end describe ".set_debug / .unset_debug" do it "sets and unsets debug mode" do Deadfinder::Logger.set_debug Deadfinder::Logger.debug?.should be_true Deadfinder::Logger.unset_debug Deadfinder::Logger.debug?.should be_false end end describe "output suppression in silent mode" do it "does not output when silent" do Deadfinder::Logger.set_silent # These should not raise and should produce no visible output Deadfinder::Logger.info("test") Deadfinder::Logger.error("test") Deadfinder::Logger.target("test") Deadfinder::Logger.sub_info("test") Deadfinder::Logger.sub_complete("test") Deadfinder::Logger.found("test") end end end ================================================ FILE: spec/deadfinder/runner_spec.cr ================================================ require "../spec_helper" describe Deadfinder::Runner do before_each { WebMock.reset } describe "#run" do it "finds broken links (404)" do target = "http://example.com" html = <<-HTML Broken Valid HTML WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://example.com/broken").to_return(status: 404) WebMock.stub(:get, "http://example.com/valid").to_return(status: 200) runner = Deadfinder::Runner.new options = default_test_options args = make_runner_args runner.run(target, options, **args) args[:output][target]?.should_not be_nil args[:output][target].should contain "http://example.com/broken" args[:output][target].should_not contain "http://example.com/valid" end it "finds multiple broken links" do target = "http://example.com" html = <<-HTML D1 D2 OK HTML WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://example.com/dead1").to_return(status: 404) WebMock.stub(:get, "http://example.com/dead2").to_return(status: 500) WebMock.stub(:get, "http://example.com/ok").to_return(status: 200) runner = Deadfinder::Runner.new options = default_test_options args = make_runner_args runner.run(target, options, **args) args[:output][target].should contain "http://example.com/dead1" args[:output][target].should contain "http://example.com/dead2" args[:output][target].should_not contain "http://example.com/ok" end it "does not flag 3xx as dead by default" do target = "http://example.com" html = %(R) WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://example.com/redirect").to_return(status: 301) runner = Deadfinder::Runner.new options = default_test_options args = make_runner_args runner.run(target, options, **args) (args[:output][target]? || [] of String).should_not contain "http://example.com/redirect" end it "flags 3xx as dead when include30x is true" do target = "http://example.com" html = %(R) WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://example.com/redirect").to_return(status: 301) runner = Deadfinder::Runner.new options = default_test_options options.include30x = true args = make_runner_args runner.run(target, options, **args) args[:output][target]?.should_not be_nil args[:output][target].should contain "http://example.com/redirect" end it "respects match option - only checks matched URLs" do target = "http://example.com" html = <<-HTML Broken Valid HTML WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://example.com/broken").to_return(status: 404) # valid은 match 안 하므로 stub 불필요하지만 안전하게 추가 WebMock.stub(:get, "http://example.com/valid").to_return(status: 200) runner = Deadfinder::Runner.new options = default_test_options options.match = "broken" args = make_runner_args runner.run(target, options, **args) args[:output][target]?.should_not be_nil args[:output][target].should contain "http://example.com/broken" end it "respects ignore option - skips ignored URLs" do target = "http://example.com" html = <<-HTML Broken Valid HTML WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://example.com/broken").to_return(status: 404) runner = Deadfinder::Runner.new options = default_test_options options.ignore = "valid" args = make_runner_args runner.run(target, options, **args) args[:output][target]?.should_not be_nil args[:output][target].should contain "http://example.com/broken" args[:output][target].should_not contain "http://example.com/valid" end it "handles invalid match pattern gracefully" do target = "http://example.com" html = %(Link) WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://example.com/page").to_return(status: 200) runner = Deadfinder::Runner.new options = default_test_options options.match = "[" args = make_runner_args # Should not raise - error is logged internally runner.run(target, options, **args) end it "handles invalid ignore pattern gracefully" do target = "http://example.com" html = %(Link) WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://example.com/page").to_return(status: 200) runner = Deadfinder::Runner.new options = default_test_options options.ignore = "[" args = make_runner_args # Should not raise runner.run(target, options, **args) end it "handles target fetch failure gracefully" do target = "http://unreachable.invalid" WebMock.stub(:get, target).to_return(status: 500, body: "") runner = Deadfinder::Runner.new options = default_test_options args = make_runner_args # Should not raise runner.run(target, options, **args) end it "extracts links from all 7 HTML element types" do target = "http://example.com" html = <<-HTML Link
HTML WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://example.com/script.js").to_return(status: 404) WebMock.stub(:get, "http://example.com/style.css").to_return(status: 404) WebMock.stub(:get, "http://example.com/page").to_return(status: 404) WebMock.stub(:get, "http://example.com/frame").to_return(status: 404) WebMock.stub(:get, "http://example.com/submit").to_return(status: 404) WebMock.stub(:get, "http://example.com/object.swf").to_return(status: 404) WebMock.stub(:get, "http://example.com/embed.swf").to_return(status: 404) runner = Deadfinder::Runner.new options = default_test_options args = make_runner_args runner.run(target, options, **args) dead = args[:output][target] dead.should contain "http://example.com/script.js" dead.should contain "http://example.com/style.css" dead.should contain "http://example.com/page" dead.should contain "http://example.com/frame" dead.should contain "http://example.com/submit" dead.should contain "http://example.com/object.swf" dead.should contain "http://example.com/embed.swf" end it "resolves relative URLs against target" do target = "http://example.com/docs/" html = %(AboutPage) WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://example.com/about").to_return(status: 404) WebMock.stub(:get, "http://example.com/docs/page.html").to_return(status: 404) runner = Deadfinder::Runner.new options = default_test_options args = make_runner_args runner.run(target, options, **args) dead = args[:output][target] dead.should contain "http://example.com/about" dead.should contain "http://example.com/docs/page.html" end it "skips mailto/tel/data scheme links" do target = "http://example.com" html = <<-HTML Mail Tel Data Real HTML WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://example.com/real").to_return(status: 200) runner = Deadfinder::Runner.new options = default_test_options args = make_runner_args runner.run(target, options, **args) # No dead links from special schemes, and no errors dead = args[:output][target]? || [] of String dead.should_not contain "mailto:test@example.com" dead.should_not contain "tel:1234567890" end it "deduplicates URLs" do target = "http://example.com" html = <<-HTML Link1 Link2 Link3 HTML WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://example.com/dup").to_return(status: 404) runner = Deadfinder::Runner.new options = default_test_options args = make_runner_args runner.run(target, options, **args) # Should appear only once in output args[:output][target].count("http://example.com/dup").should eq 1 end it "tracks coverage data when coverage is enabled" do target = "http://example.com" html = <<-HTML Dead Ok1 Ok2 HTML WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://example.com/dead").to_return(status: 404) WebMock.stub(:get, "http://example.com/ok1").to_return(status: 200) WebMock.stub(:get, "http://example.com/ok2").to_return(status: 200) runner = Deadfinder::Runner.new options = default_test_options options.coverage = true args = make_runner_args runner.run(target, options, **args) cov = args[:coverage_data][target] cov.total.should eq 3 cov.dead.should eq 1 cov.status_counts["404"].should eq 1 cov.status_counts["200"].should eq 2 end it "does not track coverage when coverage is disabled" do target = "http://example.com" html = %(L) WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://example.com/page").to_return(status: 404) runner = Deadfinder::Runner.new options = default_test_options options.coverage = false args = make_runner_args runner.run(target, options, **args) args[:coverage_data][target]?.should be_nil end it "handles empty HTML page with no links" do target = "http://example.com" WebMock.stub(:get, target).to_return(body: "") runner = Deadfinder::Runner.new options = default_test_options args = make_runner_args runner.run(target, options, **args) (args[:output][target]? || [] of String).should be_empty end end describe "#worker" do it "detects 404 as broken link" do target = "http://example.com" url = "http://example.com/broken" WebMock.stub(:get, url).to_return(status: 404) runner = Deadfinder::Runner.new options = default_test_options args = make_runner_args jobs = Channel(String).new(10) results = Channel(String).new(10) jobs.send(url) jobs.close runner.worker(1, jobs, results, target, options, **args) args[:output][target].should contain url end it "detects 500 as broken link" do target = "http://example.com" url = "http://example.com/error" WebMock.stub(:get, url).to_return(status: 500) runner = Deadfinder::Runner.new options = default_test_options args = make_runner_args jobs = Channel(String).new(10) results = Channel(String).new(10) jobs.send(url) jobs.close runner.worker(1, jobs, results, target, options, **args) args[:output][target].should contain url end it "does not flag 200 as broken" do target = "http://example.com" url = "http://example.com/ok" WebMock.stub(:get, url).to_return(status: 200) runner = Deadfinder::Runner.new options = default_test_options args = make_runner_args jobs = Channel(String).new(10) results = Channel(String).new(10) jobs.send(url) jobs.close runner.worker(1, jobs, results, target, options, **args) (args[:output][target]? || [] of String).should_not contain url end it "does not flag 301 as broken without include30x" do target = "http://example.com" url = "http://example.com/moved" WebMock.stub(:get, url).to_return(status: 301) runner = Deadfinder::Runner.new options = default_test_options options.include30x = false args = make_runner_args jobs = Channel(String).new(10) results = Channel(String).new(10) jobs.send(url) jobs.close runner.worker(1, jobs, results, target, options, **args) (args[:output][target]? || [] of String).should_not contain url end it "flags 301 as broken with include30x" do target = "http://example.com" url = "http://example.com/moved" WebMock.stub(:get, url).to_return(status: 301) runner = Deadfinder::Runner.new options = default_test_options options.include30x = true args = make_runner_args jobs = Channel(String).new(10) results = Channel(String).new(10) jobs.send(url) jobs.close runner.worker(1, jobs, results, target, options, **args) args[:output][target].should contain url end it "skips already cached URLs" do target = "http://example.com" url = "http://example.com/cached" WebMock.stub(:get, url).to_return(status: 404) runner = Deadfinder::Runner.new options = default_test_options args = make_runner_args # Pre-populate cache args[:cache_set][url] = true jobs = Channel(String).new(10) results = Channel(String).new(10) jobs.send(url) jobs.close runner.worker(1, jobs, results, target, options, **args) # Should NOT appear in output because it was cached (args[:output][target]? || [] of String).should_not contain url end it "processes multiple jobs sequentially" do target = "http://example.com" WebMock.stub(:get, "http://example.com/a").to_return(status: 404) WebMock.stub(:get, "http://example.com/b").to_return(status: 200) WebMock.stub(:get, "http://example.com/c").to_return(status: 503) runner = Deadfinder::Runner.new options = default_test_options args = make_runner_args jobs = Channel(String).new(10) results = Channel(String).new(10) jobs.send("http://example.com/a") jobs.send("http://example.com/b") jobs.send("http://example.com/c") jobs.close runner.worker(1, jobs, results, target, options, **args) dead = args[:output][target] dead.should contain "http://example.com/a" dead.should_not contain "http://example.com/b" dead.should contain "http://example.com/c" end it "tracks coverage with status counts" do target = "http://example.com" WebMock.stub(:get, "http://example.com/ok").to_return(status: 200) WebMock.stub(:get, "http://example.com/not-found").to_return(status: 404) WebMock.stub(:get, "http://example.com/server-err").to_return(status: 500) runner = Deadfinder::Runner.new options = default_test_options options.coverage = true args = make_runner_args jobs = Channel(String).new(10) results = Channel(String).new(10) jobs.send("http://example.com/ok") jobs.send("http://example.com/not-found") jobs.send("http://example.com/server-err") jobs.close runner.worker(1, jobs, results, target, options, **args) cov = args[:coverage_data][target] cov.total.should eq 3 cov.dead.should eq 2 cov.status_counts["200"].should eq 1 cov.status_counts["404"].should eq 1 cov.status_counts["500"].should eq 1 end it "sends worker_headers with requests" do target = "http://example.com" url = "http://example.com/authed" WebMock.stub(:get, url) .with(headers: {"Authorization" => "Bearer token123"}) .to_return(status: 200) runner = Deadfinder::Runner.new options = default_test_options options.worker_headers = ["Authorization: Bearer token123"] args = make_runner_args jobs = Channel(String).new(10) results = Channel(String).new(10) jobs.send(url) jobs.close runner.worker(1, jobs, results, target, options, **args) # Should not be in dead links (200 response with correct headers) (args[:output][target]? || [] of String).should_not contain url end end end ================================================ FILE: spec/deadfinder/url_pattern_matcher_spec.cr ================================================ require "../spec_helper" describe Deadfinder::UrlPatternMatcher do describe ".match?" do it "returns true when the URL matches the pattern" do Deadfinder::UrlPatternMatcher.match?("http://example.com", "example").should be_true end it "returns false when the URL does not match the pattern" do Deadfinder::UrlPatternMatcher.match?("http://example.com", "nonexistent").should be_false end it "raises an error when the pattern is an invalid regex" do expect_raises(ArgumentError) do Deadfinder::UrlPatternMatcher.match?("http://example.com", "[") end end it "supports complex regex patterns" do Deadfinder::UrlPatternMatcher.match?("http://example.com/path/to/page", "path/to/\\w+").should be_true end it "supports anchored patterns" do Deadfinder::UrlPatternMatcher.match?("http://example.com", "^http://example").should be_true Deadfinder::UrlPatternMatcher.match?("http://example.com", "^https://example").should be_false end it "matches query parameters" do Deadfinder::UrlPatternMatcher.match?("http://example.com?foo=bar", "foo=bar").should be_true end end describe ".ignore?" do it "returns true when the URL matches the pattern" do Deadfinder::UrlPatternMatcher.ignore?("http://example.com", "example").should be_true end it "returns false when the URL does not match the pattern" do Deadfinder::UrlPatternMatcher.ignore?("http://example.com", "nonexistent").should be_false end it "raises an error when the pattern is an invalid regex" do expect_raises(ArgumentError) do Deadfinder::UrlPatternMatcher.ignore?("http://example.com", "[") end end it "can ignore multiple URL patterns with alternation" do Deadfinder::UrlPatternMatcher.ignore?("http://example.com/ads", "ads|tracking").should be_true Deadfinder::UrlPatternMatcher.ignore?("http://example.com/tracking", "ads|tracking").should be_true Deadfinder::UrlPatternMatcher.ignore?("http://example.com/page", "ads|tracking").should be_false end end describe "ReDoS guardrails" do before_each { Deadfinder::UrlPatternMatcher.clear_cache } it "rejects patterns longer than MAX_PATTERN_LENGTH" do long_pattern = "a" * (Deadfinder::UrlPatternMatcher::MAX_PATTERN_LENGTH + 1) expect_raises(Deadfinder::UrlPatternMatcher::UnsafePatternError) do Deadfinder::UrlPatternMatcher.match?("http://example.com", long_pattern) end end it "rejects classic nested-quantifier ReDoS shapes like (a+)+" do expect_raises(Deadfinder::UrlPatternMatcher::UnsafePatternError) do Deadfinder::UrlPatternMatcher.match?("aaaa", "(a+)+") end end it "rejects (a*)* " do expect_raises(Deadfinder::UrlPatternMatcher::UnsafePatternError) do Deadfinder::UrlPatternMatcher.ignore?("aaaa", "(a*)*") end end it "rejects (.+){2,} bounded-repeat variant" do expect_raises(Deadfinder::UrlPatternMatcher::UnsafePatternError) do Deadfinder::UrlPatternMatcher.match?("aaaa", "(.+){2,}") end end it "UnsafePatternError is-a ArgumentError so runner rescue still catches" do (Deadfinder::UrlPatternMatcher::UnsafePatternError < ArgumentError).should be_true end it "does not flag patterns with escaped literal parens" do # `\(a+\)+` = literal `(`, one-or-more `a`, literal `)`, one-or-more — # there's no actual group being quantified, so no catastrophic backtracking. Deadfinder::UrlPatternMatcher.match?("(aaa))))", "\\(a+\\)+").should be_true end end describe "regex caching" do before_each { Deadfinder::UrlPatternMatcher.clear_cache } it "reuses the compiled regex across calls with the same pattern" do pattern = "example" Deadfinder::UrlPatternMatcher.match?("http://example.com", pattern) Deadfinder::UrlPatternMatcher.match?("http://example.org", pattern) Deadfinder::UrlPatternMatcher.match?("http://other.com", pattern) # No public accessor to the cache map, but we at least exercise the # hot path to confirm it does not blow up and returns consistent results. Deadfinder::UrlPatternMatcher.match?("http://example.com", pattern).should be_true end end end ================================================ FILE: spec/deadfinder/utils_spec.cr ================================================ require "../spec_helper" describe "Deadfinder.generate_url" do base_url = "http://example.com/base/" it "returns the original URL if it starts with http://" do Deadfinder.generate_url("http://example.com", base_url).should eq "http://example.com" end it "returns the original URL if it starts with https://" do Deadfinder.generate_url("https://example.com", base_url).should eq "https://example.com" end it "prepends the scheme if the URL starts with //" do Deadfinder.generate_url("//example.com", base_url).should eq "http://example.com" end it "prepends the scheme and host if the URL starts with /" do Deadfinder.generate_url("/path", base_url).should eq "http://example.com/path" end it "returns nil if the URL should ignore the scheme" do Deadfinder.generate_url("mailto:test@example.com", base_url).should be_nil end it "prepends the base directory if the URL is relative" do Deadfinder.generate_url("relative/path", base_url).should eq "http://example.com/base/relative/path" end it "returns nil if base_url is invalid" do Deadfinder.generate_url("relative/path", "://invalid").should be_nil end it "returns nil for empty text" do Deadfinder.generate_url("", base_url).should be_nil end it "returns nil for whitespace-only text" do Deadfinder.generate_url(" ", base_url).should be_nil end it "returns nil for javascript: scheme" do Deadfinder.generate_url("javascript:void(0)", base_url).should be_nil end it "returns nil for data: scheme" do Deadfinder.generate_url("data:text/plain,hello", base_url).should be_nil end it "returns nil for fragment-only (#) links" do Deadfinder.generate_url("#section", base_url).should be_nil end it "handles protocol-relative URLs with https base" do Deadfinder.generate_url("//cdn.example.com/lib.js", "https://example.com/").should eq "https://cdn.example.com/lib.js" end it "resolves relative URL when base path does not end with /" do Deadfinder.generate_url("page.html", "http://example.com/dir/index.html").should eq "http://example.com/dir/page.html" end it "handles root-relative paths" do Deadfinder.generate_url("/about", "https://example.com/some/deep/path").should eq "https://example.com/about" end it "preserves non-default port when resolving root-relative paths" do Deadfinder.generate_url("/about", "http://127.0.0.1:8080/index.html").should eq "http://127.0.0.1:8080/about" end it "preserves non-default port when resolving relative paths" do Deadfinder.generate_url("about", "http://127.0.0.1:8080/index.html").should eq "http://127.0.0.1:8080/about" end it "preserves non-default port when base path is a directory" do Deadfinder.generate_url("page.html", "http://127.0.0.1:8080/dir/").should eq "http://127.0.0.1:8080/dir/page.html" end end describe "Deadfinder.ignore_scheme?" do it "returns true for mailto: URLs" do Deadfinder.ignore_scheme?("mailto:test@example.com").should be_true end it "returns true for tel: URLs" do Deadfinder.ignore_scheme?("tel:1234567890").should be_true end it "returns true for sms: URLs" do Deadfinder.ignore_scheme?("sms:1234567890").should be_true end it "returns true for data: URLs" do Deadfinder.ignore_scheme?("data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==").should be_true end it "returns true for file: URLs" do Deadfinder.ignore_scheme?("file:///path/to/file").should be_true end it "returns true for javascript: URLs" do Deadfinder.ignore_scheme?("javascript:void(0)").should be_true end it "returns true for fragment-only links" do Deadfinder.ignore_scheme?("#top").should be_true end it "returns false for http URLs" do Deadfinder.ignore_scheme?("http://example.com").should be_false end it "returns false for https URLs" do Deadfinder.ignore_scheme?("https://example.com").should be_false end it "returns false for relative paths" do Deadfinder.ignore_scheme?("page.html").should be_false end end ================================================ FILE: spec/deadfinder/visualizer_spec.cr ================================================ require "../spec_helper" require "stumpy_png" require "file_utils" describe Deadfinder::Visualizer do describe ".generate" do it "returns early when total_tested is zero" do data = Deadfinder::CoverageResult.new( targets: {} of String => Deadfinder::CoverageTarget, summary: Deadfinder::CoverageSummary.new( total_tested: 0, total_dead: 0, overall_coverage_percentage: 0.0, overall_status_counts: {} of String => Int32 ) ) output_path = File.tempname("viz_test", ".png") Deadfinder::Visualizer.generate(data, output_path) File.exists?(output_path).should be_false end it "creates a valid 500x300 PNG with 200 status codes" do data = Deadfinder::CoverageResult.new( targets: {} of String => Deadfinder::CoverageTarget, summary: Deadfinder::CoverageSummary.new( total_tested: 10, total_dead: 0, overall_coverage_percentage: 0.0, overall_status_counts: {"200" => 10} ) ) output_path = File.tempname("viz_test", ".png") begin Deadfinder::Visualizer.generate(data, output_path) File.exists?(output_path).should be_true canvas = StumpyPNG.read(output_path) canvas.width.should eq 500 canvas.height.should eq 300 # Check for green pixels (200 status = green) green = StumpyPNG::RGBA.from_rgb8(0, 255, 0) green_found = (110..180).any? { |y| canvas[250, y] == green } green_found.should be_true ensure FileUtils.rm_rf(output_path) end end it "draws orange bars for 3xx status codes" do data = Deadfinder::CoverageResult.new( targets: {} of String => Deadfinder::CoverageTarget, summary: Deadfinder::CoverageSummary.new( total_tested: 10, total_dead: 10, overall_coverage_percentage: 100.0, overall_status_counts: {"301" => 10} ) ) output_path = File.tempname("viz_test", ".png") begin Deadfinder::Visualizer.generate(data, output_path) canvas = StumpyPNG.read(output_path) orange = StumpyPNG::RGBA.from_rgb8(255, 165, 0) orange_found = (110..180).any? { |y| canvas[250, y] == orange } orange_found.should be_true ensure FileUtils.rm_rf(output_path) end end it "draws red bars for 4xx status codes" do data = Deadfinder::CoverageResult.new( targets: {} of String => Deadfinder::CoverageTarget, summary: Deadfinder::CoverageSummary.new( total_tested: 10, total_dead: 10, overall_coverage_percentage: 100.0, overall_status_counts: {"404" => 10} ) ) output_path = File.tempname("viz_test", ".png") begin Deadfinder::Visualizer.generate(data, output_path) canvas = StumpyPNG.read(output_path) red = StumpyPNG::RGBA.from_rgb8(255, 0, 0) red_found = (110..180).any? { |y| canvas[250, y] == red } red_found.should be_true ensure FileUtils.rm_rf(output_path) end end it "draws purple bars for 5xx status codes" do data = Deadfinder::CoverageResult.new( targets: {} of String => Deadfinder::CoverageTarget, summary: Deadfinder::CoverageSummary.new( total_tested: 10, total_dead: 10, overall_coverage_percentage: 100.0, overall_status_counts: {"500" => 10} ) ) output_path = File.tempname("viz_test", ".png") begin Deadfinder::Visualizer.generate(data, output_path) canvas = StumpyPNG.read(output_path) purple = StumpyPNG::RGBA.from_rgb8(128, 0, 128) purple_found = (110..180).any? { |y| canvas[250, y] == purple } purple_found.should be_true ensure FileUtils.rm_rf(output_path) end end it "draws gray bars for error/unknown status codes" do data = Deadfinder::CoverageResult.new( targets: {} of String => Deadfinder::CoverageTarget, summary: Deadfinder::CoverageSummary.new( total_tested: 10, total_dead: 10, overall_coverage_percentage: 100.0, overall_status_counts: {"error" => 10} ) ) output_path = File.tempname("viz_test", ".png") begin Deadfinder::Visualizer.generate(data, output_path) canvas = StumpyPNG.read(output_path) gray = StumpyPNG::RGBA.from_rgb8(128, 128, 128) gray_found = (110..180).any? { |y| canvas[250, y] == gray } gray_found.should be_true ensure FileUtils.rm_rf(output_path) end end it "creates PNG with mixed status codes" do data = Deadfinder::CoverageResult.new( targets: {} of String => Deadfinder::CoverageTarget, summary: Deadfinder::CoverageSummary.new( total_tested: 100, total_dead: 60, overall_coverage_percentage: 60.0, overall_status_counts: { "200" => 40, "301" => 20, "404" => 20, "500" => 10, "error" => 10, } ) ) output_path = File.tempname("viz_test", ".png") begin Deadfinder::Visualizer.generate(data, output_path) File.exists?(output_path).should be_true canvas = StumpyPNG.read(output_path) canvas.width.should eq 500 canvas.height.should eq 300 ensure FileUtils.rm_rf(output_path) end end it "draws outline with semi-transparent black" do data = Deadfinder::CoverageResult.new( targets: {} of String => Deadfinder::CoverageTarget, summary: Deadfinder::CoverageSummary.new( total_tested: 10, total_dead: 5, overall_coverage_percentage: 50.0, overall_status_counts: {"200" => 5, "404" => 5} ) ) output_path = File.tempname("viz_test", ".png") begin Deadfinder::Visualizer.generate(data, output_path) canvas = StumpyPNG.read(output_path) outline = StumpyPNG::RGBA.new(0_u16, 0_u16, 0_u16, 32768_u16) # Top line center canvas[250, 100].should eq outline # Bottom line center canvas[250, 190].should eq outline # Left line center canvas[10, 145].should eq outline # Right line center canvas[490, 145].should eq outline ensure FileUtils.rm_rf(output_path) end end it "skips zero-height bars" do data = Deadfinder::CoverageResult.new( targets: {} of String => Deadfinder::CoverageTarget, summary: Deadfinder::CoverageSummary.new( total_tested: 10_000, total_dead: 0, overall_coverage_percentage: 0.0, overall_status_counts: {"200" => 1} ) ) output_path = File.tempname("viz_test", ".png") begin Deadfinder::Visualizer.generate(data, output_path) canvas = StumpyPNG.read(output_path) # With 1/10000 * 70 = 0.007, height rounds to 0 so no green bars green = StumpyPNG::RGBA.from_rgb8(0, 255, 0) green_found = (110..180).any? { |y| canvas[250, y] == green } green_found.should be_false ensure FileUtils.rm_rf(output_path) end end it "handles empty status counts" do data = Deadfinder::CoverageResult.new( targets: {} of String => Deadfinder::CoverageTarget, summary: Deadfinder::CoverageSummary.new( total_tested: 10, total_dead: 0, overall_coverage_percentage: 0.0, overall_status_counts: {} of String => Int32 ) ) output_path = File.tempname("viz_test", ".png") begin Deadfinder::Visualizer.generate(data, output_path) File.exists?(output_path).should be_true canvas = StumpyPNG.read(output_path) canvas.width.should eq 500 canvas.height.should eq 300 ensure FileUtils.rm_rf(output_path) end end end end ================================================ FILE: spec/deadfinder_spec.cr ================================================ require "./spec_helper" describe Deadfinder do before_each do WebMock.reset reset_deadfinder_state end describe "#version" do it "returns the version number" do Deadfinder::VERSION.should_not be_nil Deadfinder::VERSION.should eq "2.0.2" end end describe ".reset_state" do it "clears output, coverage_data, and cache_set accumulators" do Deadfinder.output["foo"] = ["bar"] Deadfinder.coverage_data["foo"] = Deadfinder::TargetCoverage.new(total: 1, dead: 1) Deadfinder.cache_set["foo"] = true Deadfinder.reset_state Deadfinder.output.should be_empty Deadfinder.coverage_data.should be_empty Deadfinder.cache_set.should be_empty end end describe "#run_url" do it "scans a single URL and collects broken links" do target = "http://mock-site.test" html = <<-HTML Dead Alive HTML WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://mock-site.test/dead").to_return(status: 404) WebMock.stub(:get, "http://mock-site.test/alive").to_return(status: 200) options = default_test_options Deadfinder.run_url(target, options) Deadfinder.output[target]?.should_not be_nil Deadfinder.output[target].should contain "http://mock-site.test/dead" Deadfinder.output[target].should_not contain "http://mock-site.test/alive" end it "writes JSON output to file when output is specified" do target = "http://mock-site.test" html = %(X) WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://mock-site.test/broken").to_return(status: 404) tempfile = File.tempfile("deadfinder_run_url", ".json") begin options = default_test_options options.output = tempfile.path options.output_format = "json" Deadfinder.run_url(target, options) content = File.read(tempfile.path) parsed = JSON.parse(content) parsed[target].as_a.map(&.as_s).should contain "http://mock-site.test/broken" ensure tempfile.delete end end end describe "#run_file" do it "scans URLs read from a file" do target = "http://mock-file.test" html = %(X) WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://mock-file.test/dead").to_return(status: 404) urlfile = File.tempfile("deadfinder_urls", ".txt") begin File.write(urlfile.path, "#{target}\n") options = default_test_options Deadfinder.run_file(urlfile.path, options) Deadfinder.output[target]?.should_not be_nil Deadfinder.output[target].should contain "http://mock-file.test/dead" ensure urlfile.delete end end it "respects limit option" do html1 = %(P) html2 = %(P) WebMock.stub(:get, "http://mock1.test").to_return(body: html1) WebMock.stub(:get, "http://mock1.test/page").to_return(status: 200) WebMock.stub(:get, "http://mock2.test").to_return(body: html2) WebMock.stub(:get, "http://mock2.test/page").to_return(status: 200) urlfile = File.tempfile("deadfinder_urls", ".txt") begin File.write(urlfile.path, "http://mock1.test\nhttp://mock2.test\n") options = default_test_options options.limit = 1 Deadfinder.run_file(urlfile.path, options) # Only the first URL should be scanned Deadfinder.output.keys.size.should be <= 1 ensure urlfile.delete end end end describe "#run_sitemap" do it "parses sitemap XML and scans discovered URLs" do sitemap_xml = <<-XML http://mock-sitemap.test/page1 http://mock-sitemap.test/page2 XML html1 = %(D) html2 = %(O) WebMock.stub(:get, "http://mock-sitemap.test/sitemap.xml").to_return(body: sitemap_xml) WebMock.stub(:get, "http://mock-sitemap.test/page1").to_return(body: html1) WebMock.stub(:get, "http://mock-sitemap.test/page2").to_return(body: html2) WebMock.stub(:get, "http://mock-sitemap.test/dead1").to_return(status: 404) WebMock.stub(:get, "http://mock-sitemap.test/ok").to_return(status: 200) options = default_test_options Deadfinder.run_sitemap("http://mock-sitemap.test/sitemap.xml", options) Deadfinder.output["http://mock-sitemap.test/page1"]?.should_not be_nil Deadfinder.output["http://mock-sitemap.test/page1"].should contain "http://mock-sitemap.test/dead1" end it "terminates on a cyclic sitemap index without infinite recursion" do # a.xml references b.xml, b.xml references a.xml — must not loop. sitemap_a = <<-XML http://cycle.test/b.xml XML sitemap_b = <<-XML http://cycle.test/a.xml XML WebMock.stub(:get, "http://cycle.test/a.xml").to_return(body: sitemap_a) WebMock.stub(:get, "http://cycle.test/b.xml").to_return(body: sitemap_b) options = default_test_options # Should return cleanly (no stack overflow, no hang). Deadfinder.run_sitemap("http://cycle.test/a.xml", options) Deadfinder.output.should be_empty end it "parses sitemap without namespace" do sitemap_xml = <<-XML http://mock-sitemap2.test/page1 XML html = %(B) WebMock.stub(:get, "http://mock-sitemap2.test/sitemap.xml").to_return(body: sitemap_xml) WebMock.stub(:get, "http://mock-sitemap2.test/page1").to_return(body: html) WebMock.stub(:get, "http://mock-sitemap2.test/broken").to_return(status: 404) options = default_test_options Deadfinder.run_sitemap("http://mock-sitemap2.test/sitemap.xml", options) Deadfinder.output["http://mock-sitemap2.test/page1"]?.should_not be_nil Deadfinder.output["http://mock-sitemap2.test/page1"].should contain "http://mock-sitemap2.test/broken" end end describe "#gen_output" do context "when output_format is json" do it "writes JSON formatted output" do tempfile = File.tempfile("deadfinder_output", ".json") begin options = default_test_options options.output = tempfile.path options.output_format = "json" Deadfinder.output["http://example.com"] = ["http://example.com/page1", "http://example.com/page2"] Deadfinder.gen_output(options) content = File.read(tempfile.path) parsed = JSON.parse(content) parsed["http://example.com"].as_a.map(&.as_s).should eq ["http://example.com/page1", "http://example.com/page2"] ensure tempfile.delete end end end context "when output_format is yaml" do it "writes YAML formatted output" do tempfile = File.tempfile("deadfinder_output", ".yaml") begin options = default_test_options options.output = tempfile.path options.output_format = "yaml" Deadfinder.output["http://example.com"] = ["http://example.com/page1", "http://example.com/page2"] Deadfinder.gen_output(options) content = File.read(tempfile.path) parsed = YAML.parse(content) parsed["http://example.com"].as_a.map(&.as_s).should eq ["http://example.com/page1", "http://example.com/page2"] ensure tempfile.delete end end end context "when output_format is yml (alias)" do it "writes YAML formatted output" do tempfile = File.tempfile("deadfinder_output", ".yml") begin options = default_test_options options.output = tempfile.path options.output_format = "yml" Deadfinder.output["http://example.com"] = ["http://example.com/p1"] Deadfinder.gen_output(options) content = File.read(tempfile.path) parsed = YAML.parse(content) parsed["http://example.com"].as_a.map(&.as_s).should eq ["http://example.com/p1"] ensure tempfile.delete end end end context "when output_format is csv" do it "writes CSV formatted output" do tempfile = File.tempfile("deadfinder_output", ".csv") begin options = default_test_options options.output = tempfile.path options.output_format = "csv" Deadfinder.output["http://example.com"] = ["http://example.com/page1", "http://example.com/page2"] Deadfinder.gen_output(options) content = File.read(tempfile.path) rows = CSV.parse(content) rows[0].should eq ["target", "url"] rows.should contain ["http://example.com", "http://example.com/page1"] rows.should contain ["http://example.com", "http://example.com/page2"] ensure tempfile.delete end end end context "when output_format is toml" do it "writes TOML formatted output" do tempfile = File.tempfile("deadfinder_output", ".toml") begin options = default_test_options options.output = tempfile.path options.output_format = "toml" Deadfinder.output["http://example.com"] = ["http://example.com/page1"] Deadfinder.gen_output(options) content = File.read(tempfile.path) content.should contain "\"http://example.com\"" content.should contain "\"http://example.com/page1\"" ensure tempfile.delete end end end context "when output_format is sarif" do it "writes a valid SARIF 2.1.0 document with a DEAD_LINK result per broken URL" do tempfile = File.tempfile("deadfinder_output", ".sarif") begin options = default_test_options options.output = tempfile.path options.output_format = "sarif" Deadfinder.output["http://example.com"] = ["http://example.com/page1", "http://example.com/page2"] Deadfinder.gen_output(options) content = File.read(tempfile.path) parsed = JSON.parse(content) parsed["version"].as_s.should eq "2.1.0" parsed["$schema"].as_s.should contain "sarif-schema-2.1.0" run = parsed["runs"].as_a.first run["tool"]["driver"]["name"].as_s.should eq "deadfinder" run["tool"]["driver"]["version"].as_s.should eq Deadfinder::VERSION rules = run["tool"]["driver"]["rules"].as_a rules.size.should eq 1 rules[0]["id"].as_s.should eq "DEAD_LINK" results = run["results"].as_a results.size.should eq 2 result_uris = results.map { |r| r["locations"].as_a.first["physicalLocation"]["artifactLocation"]["uri"].as_s } result_uris.should contain "http://example.com/page1" result_uris.should contain "http://example.com/page2" results.each do |r| r["ruleId"].as_s.should eq "DEAD_LINK" r["level"].as_s.should eq "warning" r["relatedLocations"].as_a.first["physicalLocation"]["artifactLocation"]["uri"].as_s.should eq "http://example.com" end ensure tempfile.delete end end it "produces an empty results array when there are no dead links" do tempfile = File.tempfile("deadfinder_output", ".sarif") begin options = default_test_options options.output = tempfile.path options.output_format = "sarif" Deadfinder.gen_output(options) content = File.read(tempfile.path) parsed = JSON.parse(content) parsed["version"].as_s.should eq "2.1.0" run = parsed["runs"].as_a.first run["tool"]["driver"]["name"].as_s.should eq "deadfinder" ensure tempfile.delete end end end context "when output is empty" do it "does nothing if output file is not specified" do options = default_test_options options.output = "" options.output_format = "json" # Should not raise Deadfinder.gen_output(options) end end end describe "coverage functionality" do describe "#calculate_coverage" do it "calculates coverage correctly for single target" do target = "http://example.com" Deadfinder.coverage_data[target] = Deadfinder::TargetCoverage.new(total: 10, dead: 3) coverage = Deadfinder.calculate_coverage coverage.targets[target].total_tested.should eq 10 coverage.targets[target].dead_links.should eq 3 coverage.targets[target].coverage_percentage.should eq 30.0 coverage.summary.total_tested.should eq 10 coverage.summary.total_dead.should eq 3 coverage.summary.overall_coverage_percentage.should eq 30.0 end it "calculates coverage correctly for multiple targets" do Deadfinder.coverage_data["http://example1.com"] = Deadfinder::TargetCoverage.new(total: 10, dead: 2) Deadfinder.coverage_data["http://example2.com"] = Deadfinder::TargetCoverage.new(total: 20, dead: 5) coverage = Deadfinder.calculate_coverage coverage.targets["http://example1.com"].coverage_percentage.should eq 20.0 coverage.targets["http://example2.com"].coverage_percentage.should eq 25.0 coverage.summary.total_tested.should eq 30 coverage.summary.total_dead.should eq 7 coverage.summary.overall_coverage_percentage.should eq 23.33 end it "handles zero total URLs correctly" do target = "http://example.com" Deadfinder.coverage_data[target] = Deadfinder::TargetCoverage.new(total: 0, dead: 0) coverage = Deadfinder.calculate_coverage coverage.targets[target].coverage_percentage.should eq 0.0 coverage.summary.overall_coverage_percentage.should eq 0.0 end it "aggregates status counts across targets" do Deadfinder.coverage_data["http://a.com"] = Deadfinder::TargetCoverage.new( total: 5, dead: 2, status_counts: {"200" => 3, "404" => 2} ) Deadfinder.coverage_data["http://b.com"] = Deadfinder::TargetCoverage.new( total: 3, dead: 1, status_counts: {"200" => 2, "500" => 1} ) coverage = Deadfinder.calculate_coverage coverage.summary.overall_status_counts["200"].should eq 5 coverage.summary.overall_status_counts["404"].should eq 2 coverage.summary.overall_status_counts["500"].should eq 1 end end describe "#gen_output with coverage" do it "includes coverage data in JSON when coverage flag is enabled" do tempfile = File.tempfile("deadfinder_coverage", ".json") begin options = default_test_options options.output = tempfile.path options.output_format = "json" options.coverage = true Deadfinder.output["http://example.com"] = ["http://example.com/dead1"] Deadfinder.coverage_data["http://example.com"] = Deadfinder::TargetCoverage.new(total: 5, dead: 1) Deadfinder.gen_output(options) content = File.read(tempfile.path) parsed = JSON.parse(content) parsed["dead_links"].should_not be_nil parsed["coverage"].should_not be_nil parsed["dead_links"]["http://example.com"].as_a.map(&.as_s).should eq ["http://example.com/dead1"] parsed["coverage"]["targets"]["http://example.com"]["total_tested"].as_i.should eq 5 parsed["coverage"]["targets"]["http://example.com"]["dead_links"].as_i.should eq 1 parsed["coverage"]["targets"]["http://example.com"]["coverage_percentage"].as_f.should eq 20.0 ensure tempfile.delete end end it "does not include coverage data when coverage flag is disabled" do tempfile = File.tempfile("deadfinder_coverage", ".json") begin options = default_test_options options.output = tempfile.path options.output_format = "json" options.coverage = false Deadfinder.output["http://example.com"] = ["http://example.com/dead1"] Deadfinder.coverage_data["http://example.com"] = Deadfinder::TargetCoverage.new(total: 5, dead: 1) Deadfinder.gen_output(options) content = File.read(tempfile.path) parsed = JSON.parse(content) parsed["dead_links"]?.should be_nil parsed["coverage"]?.should be_nil parsed["http://example.com"].as_a.map(&.as_s).should eq ["http://example.com/dead1"] ensure tempfile.delete end end it "includes coverage data in YAML" do tempfile = File.tempfile("deadfinder_coverage", ".yaml") begin options = default_test_options options.output = tempfile.path options.output_format = "yaml" options.coverage = true Deadfinder.output["http://example.com"] = ["http://example.com/dead1"] Deadfinder.coverage_data["http://example.com"] = Deadfinder::TargetCoverage.new(total: 10, dead: 2) Deadfinder.gen_output(options) content = File.read(tempfile.path) parsed = YAML.parse(content) parsed["dead_links"].should_not be_nil parsed["coverage"].should_not be_nil parsed["coverage"]["targets"]["http://example.com"]["total_tested"].as_i.should eq 10 ensure tempfile.delete end end it "generates CSV with coverage information" do tempfile = File.tempfile("deadfinder_coverage", ".csv") begin options = default_test_options options.output = tempfile.path options.output_format = "csv" options.coverage = true Deadfinder.output["http://example.com"] = ["http://example.com/dead1"] Deadfinder.coverage_data["http://example.com"] = Deadfinder::TargetCoverage.new(total: 5, dead: 1) Deadfinder.gen_output(options) content = File.read(tempfile.path) rows = CSV.parse(content) rows.should contain ["target", "url"] rows.should contain ["http://example.com", "http://example.com/dead1"] rows.any? { |r| r.includes?("Coverage Report") }.should be_true rows.should contain ["target", "total_tested", "dead_links", "coverage_percentage"] rows.should contain ["http://example.com", "5", "1", "20.0%"] rows.any? { |r| r.includes?("Overall Summary") }.should be_true ensure tempfile.delete end end it "generates CSV without coverage when flag is disabled" do tempfile = File.tempfile("deadfinder_coverage", ".csv") begin options = default_test_options options.output = tempfile.path options.output_format = "csv" options.coverage = false Deadfinder.output["http://example.com"] = ["http://example.com/dead1"] Deadfinder.gen_output(options) content = File.read(tempfile.path) rows = CSV.parse(content) rows.should contain ["target", "url"] rows.should contain ["http://example.com", "http://example.com/dead1"] rows.any? { |r| r.includes?("Coverage Report") }.should be_false ensure tempfile.delete end end it "includes coverage data in TOML" do tempfile = File.tempfile("deadfinder_coverage", ".toml") begin options = default_test_options options.output = tempfile.path options.output_format = "toml" options.coverage = true Deadfinder.output["http://example.com"] = ["http://example.com/dead1"] Deadfinder.coverage_data["http://example.com"] = Deadfinder::TargetCoverage.new(total: 4, dead: 1) Deadfinder.gen_output(options) content = File.read(tempfile.path) content.should contain "[dead_links]" content.should contain "[coverage.summary]" content.should contain "total_tested = 4" content.should contain "total_dead = 1" ensure tempfile.delete end end end describe "end-to-end coverage with mock" do it "tracks coverage through run_url" do target = "http://mock-cov.test" html = <<-HTML OK Dead HTML WebMock.stub(:get, target).to_return(body: html) WebMock.stub(:get, "http://mock-cov.test/ok").to_return(status: 200) WebMock.stub(:get, "http://mock-cov.test/dead").to_return(status: 404) tempfile = File.tempfile("deadfinder_e2e_cov", ".json") begin options = default_test_options options.coverage = true options.output = tempfile.path options.output_format = "json" Deadfinder.run_url(target, options) content = File.read(tempfile.path) parsed = JSON.parse(content) parsed["coverage"]["targets"][target]["total_tested"].as_i.should eq 2 parsed["coverage"]["targets"][target]["dead_links"].as_i.should eq 1 parsed["coverage"]["summary"]["total_tested"].as_i.should eq 2 parsed["coverage"]["summary"]["total_dead"].as_i.should eq 1 ensure tempfile.delete end end end end end ================================================ FILE: spec/spec_helper.cr ================================================ require "spec" require "webmock" require "../src/deadfinder" require "../src/deadfinder/cli" def reset_deadfinder_state Deadfinder.output.clear Deadfinder.coverage_data.clear Deadfinder.cache_set.clear Deadfinder::Logger.unset_silent Deadfinder::Logger.unset_verbose Deadfinder::Logger.unset_debug end def default_test_options : Deadfinder::Options options = Deadfinder::Options.new options.silent = true options.concurrency = 2 options end def make_runner_args { output: {} of String => Array(String), coverage_data: {} of String => Deadfinder::TargetCoverage, cache_set: {} of String => Bool, mutex: Mutex.new, } end ================================================ FILE: src/cli_main.cr ================================================ require "./deadfinder" require "./deadfinder/cli" Deadfinder::CLI.run ================================================ FILE: src/deadfinder/cli.cr ================================================ require "option_parser" module Deadfinder module CLI def self.run(args = ARGV) options = Options.new subcommand : String? = nil positional_arg : String? = nil global_parser = OptionParser.new do |parser| parser.banner = "Usage: deadfinder [options]" parser.separator "" parser.separator "Commands:" parser.separator " pipe Scan the URLs from STDIN" parser.separator " file Scan the URLs from File" parser.separator " url Scan the Single URL" parser.separator " sitemap Scan the URLs from sitemap" parser.separator " completion Generate completion script (bash/zsh/fish)" parser.separator " version Show version" parser.separator "" parser.separator "Options:" parser.on("-r", "--include30x", "Include 30x redirections") { options.include30x = true } parser.on("-c CONCURRENCY", "--concurrency=CONCURRENCY", "Number of concurrency (default: 50)") { |v| options.concurrency = v.to_i } parser.on("-t TIMEOUT", "--timeout=TIMEOUT", "Timeout in seconds (default: 10)") { |v| options.timeout = v.to_i } parser.on("-o OUTPUT", "--output=OUTPUT", "File to write result") { |v| options.output = v } parser.on("-f FORMAT", "--output_format=FORMAT", "Output format: json, yaml, toml, csv, sarif (default: json)") { |v| options.output_format = v } parser.on("-H HEADER", "--headers=HEADER", "Custom HTTP headers for initial request") { |v| options.headers << v } parser.on("--worker_headers=HEADER", "Custom HTTP headers for worker requests") { |v| options.worker_headers << v } parser.on("--user_agent=UA", "User-Agent string") { |v| options.user_agent = v } parser.on("-p PROXY", "--proxy=PROXY", "Proxy server") { |v| options.proxy = v } parser.on("--proxy_auth=CREDS", "Proxy authentication (user:pass)") { |v| options.proxy_auth = v } parser.on("-k", "--insecure", "Skip TLS certificate verification (not recommended)") { options.insecure = true } parser.on("-m PATTERN", "--match=PATTERN", "Match URL pattern") { |v| options.match = v } parser.on("-i PATTERN", "--ignore=PATTERN", "Ignore URL pattern") { |v| options.ignore = v } parser.on("-s", "--silent", "Silent mode") { options.silent = true } parser.on("-v", "--verbose", "Verbose mode") { options.verbose = true } parser.on("--debug", "Debug mode") { options.debug = true } parser.on("--limit=N", "Limit number of URLs to scan") { |v| options.limit = v.to_i } parser.on("--coverage", "Enable coverage tracking and reporting") { options.coverage = true } parser.on("--visualize=PATH", "Generate visualization PNG") { |v| options.visualize = v } parser.on("-h", "--help", "Show help") do puts parser exit end parser.unknown_args do |remaining, _| if remaining.size > 0 subcommand = remaining[0] positional_arg = remaining[1]? if remaining.size > 1 end end end global_parser.parse(args) # Auto-enable coverage if visualize is set if !options.visualize.empty? options.coverage = true end case subcommand when "pipe" Deadfinder.run_pipe(options) when "file" if positional_arg Deadfinder.run_file(positional_arg.not_nil!, options) else STDERR.puts "Error: file command requires a filename argument" STDERR.puts "Usage: deadfinder file [options]" exit 1 end when "url" if positional_arg Deadfinder.run_url(positional_arg.not_nil!, options) else STDERR.puts "Error: url command requires a URL argument" STDERR.puts "Usage: deadfinder url [options]" exit 1 end when "sitemap" if positional_arg Deadfinder.run_sitemap(positional_arg.not_nil!, options) else STDERR.puts "Error: sitemap command requires a URL argument" STDERR.puts "Usage: deadfinder sitemap [options]" exit 1 end when "completion" if positional_arg shell = positional_arg.not_nil! unless ["bash", "zsh", "fish"].includes?(shell) Deadfinder::Logger.error "Unsupported shell: #{shell}" exit 1 end case shell when "bash" puts Deadfinder::Completion.bash when "zsh" puts Deadfinder::Completion.zsh when "fish" puts Deadfinder::Completion.fish end else STDERR.puts "Error: completion command requires a shell argument (bash/zsh/fish)" exit 1 end when "version" Deadfinder::Logger.info "deadfinder #{Deadfinder::VERSION}" else puts global_parser exit 1 if subcommand end end end end ================================================ FILE: src/deadfinder/completion.cr ================================================ module Deadfinder module Completion def self.bash : String <<-BASH _deadfinder_completions() { local cur prev opts COMPREPLY=() cur="${COMP_WORDS[COMP_CWORD]}" opts="--include30x --concurrency --timeout --output --output_format --headers --worker_headers --user_agent --proxy --proxy_auth --match --ignore --silent --verbose --debug --limit --coverage --visualize" COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) return 0 } complete -F _deadfinder_completions deadfinder BASH end def self.zsh : String <<-ZSH #compdef deadfinder _arguments \\ '--include30x[Include 30x redirections]' \\ '--concurrency[Number of concurrency]:number' \\ '--timeout[Timeout in seconds]:number' \\ '--output[File to write result]:file' \\ '--output_format[Output format]:string' \\ '--headers[Custom HTTP headers]:array' \\ '--worker_headers[Custom HTTP headers for workers]:array' \\ '--user_agent[User-Agent string]:string' \\ '--proxy[Proxy server]:string' \\ '--proxy_auth[Proxy server authentication]:string' \\ '--match[Match URL pattern]:string' \\ '--ignore[Ignore URL pattern]:string' \\ '--silent[Silent mode]' \\ '--verbose[Verbose mode]' \\ '--debug[Debug mode]' \\ '--limit[Limit number of URLs to scan]:number' \\ '--coverage[Enable coverage tracking]' \\ '--visualize[Generate visualization PNG]:file' ZSH end def self.fish : String <<-FISH complete -c deadfinder -l include30x -d 'Include 30x redirections' complete -c deadfinder -l concurrency -d 'Number of concurrency' -a '(seq 1 100)' complete -c deadfinder -l timeout -d 'Timeout in seconds' -a '(seq 1 60)' complete -c deadfinder -l output -d 'File to write result' -r complete -c deadfinder -l output_format -d 'Output format' -r complete -c deadfinder -l headers -d 'Custom HTTP headers' -r complete -c deadfinder -l worker_headers -d 'Custom HTTP headers for workers' -r complete -c deadfinder -l user_agent -d 'User-Agent string' -r complete -c deadfinder -l proxy -d 'Proxy server' -r complete -c deadfinder -l proxy_auth -d 'Proxy server authentication' -r complete -c deadfinder -l match -d 'Match URL pattern' -r complete -c deadfinder -l ignore -d 'Ignore URL pattern' -r complete -c deadfinder -l silent -d 'Silent mode' complete -c deadfinder -l verbose -d 'Verbose mode' complete -c deadfinder -l debug -d 'Debug mode' complete -c deadfinder -l limit -d 'Limit number of URLs to scan' -r complete -c deadfinder -l coverage -d 'Enable coverage tracking' complete -c deadfinder -l visualize -d 'Generate visualization PNG' -r FISH end end end ================================================ FILE: src/deadfinder/http_client.cr ================================================ require "http/client" require "openssl" require "uri" require "base64" require "socket" module Deadfinder module HttpClient @@proxy_cache = {} of String => URI? @@proxy_cache_mutex = Mutex.new def self.create(uri : URI, options : Options) : HTTP::Client host = uri.host.not_nil! port = uri.port use_ssl = uri.scheme == "https" proxy_str = options.proxy if !proxy_str.empty? proxy_uri = resolve_proxy(proxy_str) if proxy_uri && proxy_uri.host proxy_host = proxy_uri.host.not_nil! proxy_port = proxy_uri.port || (proxy_uri.scheme == "https" ? 443 : 8080) proxy_user = proxy_uri.user proxy_password = proxy_uri.password # Apply proxy_auth option if provided if !options.proxy_auth.empty? parts = options.proxy_auth.split(":", 2) if parts.size == 2 proxy_user = parts[0] proxy_password = parts[1] end end auth_header = if proxy_user && proxy_password "Basic #{Base64.strict_encode("#{proxy_user}:#{proxy_password}")}" else nil end if use_ssl # HTTPS through proxy: use CONNECT tunnel target_port = port || 443 socket = TCPSocket.new(proxy_host, proxy_port) socket.read_timeout = options.timeout.seconds connect_request = "CONNECT #{host}:#{target_port} HTTP/1.1\r\nHost: #{host}:#{target_port}\r\n" connect_request += "Proxy-Authorization: #{auth_header}\r\n" if auth_header connect_request += "\r\n" socket.print(connect_request) response_line = socket.gets unless response_line && response_line.includes?("200") socket.close raise "Proxy CONNECT to #{host}:#{target_port} via #{proxy_host}:#{proxy_port} failed: #{response_line.try(&.strip) || "no response"}" end # Consume remaining headers while (line = socket.gets) && !line.strip.empty? end tls_socket = OpenSSL::SSL::Socket::Client.new(socket, context: ssl_context(options), hostname: host) client = HTTP::Client.new(io: tls_socket, host: host, port: target_port) client.read_timeout = options.timeout.seconds return client else # HTTP through proxy: connect to proxy, use absolute URI in requests client = HTTP::Client.new(proxy_host, port: proxy_port) client.read_timeout = options.timeout.seconds client.connect_timeout = options.timeout.seconds if auth_header client.before_request do |request| request.headers["Proxy-Authorization"] = auth_header.not_nil! end end return client end end end create_direct(host, port, use_ssl, options) end # For HTTP proxy, requests need to use absolute URI as path def self.absolute_uri(uri : URI) : String uri.to_s end def self.proxy_configured?(options : Options) : Bool !options.proxy.empty? end private def self.create_direct(host : String, port : Int32?, use_ssl : Bool, options : Options) : HTTP::Client client = HTTP::Client.new(host, port: port, tls: use_ssl ? ssl_context(options) : nil) client.read_timeout = options.timeout.seconds client.connect_timeout = options.timeout.seconds client end private def self.resolve_proxy(proxy_str : String) : URI? @@proxy_cache_mutex.synchronize do if @@proxy_cache.has_key?(proxy_str) @@proxy_cache[proxy_str] else begin parsed = URI.parse(proxy_str) @@proxy_cache[proxy_str] = parsed parsed rescue ex Deadfinder::Logger.error "Invalid proxy URI: #{proxy_str} - #{ex.message}" @@proxy_cache[proxy_str] = nil nil end end end end private def self.ssl_context(options : Options) : OpenSSL::SSL::Context::Client ctx = OpenSSL::SSL::Context::Client.new ctx.verify_mode = options.insecure ? OpenSSL::SSL::VerifyMode::NONE : OpenSSL::SSL::VerifyMode::PEER ctx end end end ================================================ FILE: src/deadfinder/logger.cr ================================================ require "colorize" module Deadfinder module Logger @@silent = false @@verbose = false @@debug = false @@mutex = Mutex.new def self.apply_options(options : Options) set_silent if options.silent set_verbose if options.verbose set_debug if options.debug end def self.set_silent @@mutex.synchronize { @@silent = true } end def self.unset_silent @@mutex.synchronize { @@silent = false } end def self.silent? @@mutex.synchronize { @@silent } end def self.set_verbose @@mutex.synchronize { @@verbose = true } end def self.unset_verbose @@mutex.synchronize { @@verbose = false } end def self.verbose? @@mutex.synchronize { @@verbose } end def self.set_debug @@mutex.synchronize { @@debug = true } end def self.unset_debug @@mutex.synchronize { @@debug = false } end def self.debug? @@mutex.synchronize { @@debug } end def self.log(prefix : String, text : String, color : Symbol) return if silent? case color when :yellow print prefix.colorize(:yellow) when :blue print prefix.colorize(:blue) when :red print prefix.colorize(:red) when :green print prefix.colorize(:green) else print prefix end puts text end def self.sub_log(prefix : String, is_end : Bool, text : String, color : Symbol) return if silent? indent = is_end ? " \u2514\u2500\u2500 " : " \u251C\u2500\u2500 " case color when :yellow print indent.colorize(:yellow) print prefix.colorize(:yellow) when :blue print indent.colorize(:blue) print prefix.colorize(:blue) when :red print indent.colorize(:red) print prefix.colorize(:red) when :green print indent.colorize(:green) print prefix.colorize(:green) else print indent print prefix end puts text end def self.debug(text : String) log("\u2740 ", text, :yellow) if debug? end def self.info(text : String) log("\u2139 ", text, :blue) end def self.error(text : String) log("\u26A0\uFE0E ", text, :red) end def self.target(text : String) log("\u25BA ", text, :green) end def self.sub_info(text : String) log(" \u25CF ", text, :blue) end def self.sub_complete(text : String) sub_log("\u25CF ", true, text, :blue) end def self.found(text : String) sub_log("\u2718 ", false, text, :red) end def self.verbose(text : String) sub_log("\u279C ", false, text, :yellow) if verbose? end def self.verbose_ok(text : String) sub_log("\u2713 ", false, text, :green) if verbose? end end end ================================================ FILE: src/deadfinder/runner.cr ================================================ require "http/client" require "uri" require "lexbor" module Deadfinder class Runner LINK_SELECTORS = { "anchor" => {"a", "href"}, "script" => {"script", "src"}, "link" => {"link", "href"}, "iframe" => {"iframe", "src"}, "form" => {"form", "action"}, "object" => {"object", "data"}, "embed" => {"embed", "src"}, } private def request_path(uri : URI) : String path = uri.path.presence || "/" if q = uri.query.presence "#{path}?#{q}" else path end end # Parse "Name: value" header strings. Accepts ":" or ": " as the # separator and trims both sides — keeps initial-request and worker # headers using the exact same semantics so users don't hit # depending-on-which-flag surprises. private def build_headers(raw : Array(String), user_agent : String) : HTTP::Headers headers = HTTP::Headers.new raw.each do |header| name, sep, value = header.partition(':') next if sep.empty? name = name.strip next if name.empty? headers[name] = value.strip end headers["User-Agent"] = user_agent headers end def run(target : String, options : Options, output : Hash(String, Array(String)), coverage_data : Hash(String, TargetCoverage), cache_set : Hash(String, Bool), mutex : Mutex) Deadfinder::Logger.apply_options(options) headers = build_headers(options.headers, options.user_agent) uri = URI.parse(target) client = HttpClient.create(uri, options) path = if HttpClient.proxy_configured?(options) && uri.scheme == "http" HttpClient.absolute_uri(uri) else request_path(uri) end response = client.get(path, headers: headers) client.close page = Lexbor::Parser.new(response.body) links = extract_links(page) if !options.match.empty? begin links.each do |type, urls| links[type] = urls.select { |url| UrlPatternMatcher.match?(url, options.match) } end rescue ex : ArgumentError Deadfinder::Logger.error "Invalid match pattern: #{ex.message}" end end if !options.ignore.empty? begin links.each do |type, urls| links[type] = urls.reject { |url| UrlPatternMatcher.ignore?(url, options.ignore) } end rescue ex : ArgumentError Deadfinder::Logger.error "Invalid ignore pattern: #{ex.message}" end end all_links = links.values.flatten.uniq total_links_count = all_links.size link_info = links.compact_map { |type, urls| "#{type}:#{urls.size}" if urls.size > 0 }.join(" / ") Deadfinder::Logger.sub_info "Discovered #{total_links_count} URLs, currently checking them. [#{link_info}]" unless link_info.empty? # Resolve all URLs resolved_urls = all_links.compact_map { |node| Deadfinder.generate_url(node, target) } # Channel-based concurrent workers jobs = Channel(String).new(1000) results = Channel(String).new(1000) options.concurrency.times do |w| spawn do worker(w, jobs, results, target, options, output, coverage_data, cache_set, mutex) end end resolved_urls.each { |url| jobs.send(url) } jobs_size = resolved_urls.size jobs.close jobs_size.times { results.receive } # Log coverage summary if options.coverage mutex.synchronize do if data = coverage_data[target]? if data.total > 0 percentage = ((data.dead.to_f / data.total) * 100).round(2) Deadfinder::Logger.sub_info "Coverage: #{data.dead}/#{data.total} URLs are dead links (#{percentage}%)" end end end end Deadfinder::Logger.sub_complete "Task completed" rescue ex Deadfinder::Logger.error "[#{ex}] #{target}" end def worker(id : Int32, jobs : Channel(String), results : Channel(String), target : String, options : Options, output : Hash(String, Array(String)), coverage_data : Hash(String, TargetCoverage), cache_set : Hash(String, Bool), mutex : Mutex) loop do url = jobs.receive? || break unless claim_url(url, cache_set, mutex) results.send(url) next end record_total(target, options, coverage_data, mutex) begin status_code = check_url(url, options) record_status(target, url, status_code, options, output, coverage_data, mutex) rescue ex Deadfinder::Logger.verbose "[#{ex}] #{url}" if options.verbose record_error(target, options, coverage_data, mutex) end results.send(url) end end # Returns true if this worker now owns `url` (first-time check), # false if another worker already claimed it. private def claim_url(url : String, cache_set : Hash(String, Bool), mutex : Mutex) : Bool mutex.synchronize do return false if cache_set[url]? cache_set[url] = true true end end private def check_url(url : String, options : Options) : Int32 uri = URI.parse(url) client = HttpClient.create(uri, options) headers = build_headers(options.worker_headers, options.user_agent) path = if HttpClient.proxy_configured?(options) && uri.scheme == "http" HttpClient.absolute_uri(uri) else request_path(uri) end response = client.get(path, headers: headers) client.close response.status_code end private def record_total(target : String, options : Options, coverage_data : Hash(String, TargetCoverage), mutex : Mutex) : Nil return unless options.coverage mutex.synchronize do coverage_data[target] ||= TargetCoverage.new coverage_data[target].total += 1 end end private def record_status(target : String, url : String, status_code : Int32, options : Options, output : Hash(String, Array(String)), coverage_data : Hash(String, TargetCoverage), mutex : Mutex) : Nil dead = status_code >= 400 || (status_code >= 300 && options.include30x) if dead Deadfinder::Logger.found "[#{status_code}] #{url}" else Deadfinder::Logger.verbose_ok "[#{status_code}] #{url}" if options.verbose end # Skip the mutex entirely on the common "alive + no coverage" path # so we don't serialize every live link on the cache-set mutex. return unless dead || options.coverage mutex.synchronize do if dead output[target] ||= [] of String output[target] << url end if options.coverage coverage_data[target].dead += 1 if dead coverage_data[target].status_counts[status_code.to_s] = (coverage_data[target].status_counts[status_code.to_s]? || 0) + 1 end end end private def record_error(target : String, options : Options, coverage_data : Hash(String, TargetCoverage), mutex : Mutex) : Nil return unless options.coverage mutex.synchronize do coverage_data[target] ||= TargetCoverage.new coverage_data[target].dead += 1 coverage_data[target].status_counts["error"] = (coverage_data[target].status_counts["error"]? || 0) + 1 end end private def extract_links(page : Lexbor::Parser) : Hash(String, Array(String)) links = {} of String => Array(String) LINK_SELECTORS.each do |type, selector_info| tag, attr = selector_info urls = [] of String page.css(tag).each do |element| if val = element.attribute_by(attr) urls << val unless val.empty? end end links[type] = urls end links end end end ================================================ FILE: src/deadfinder/types.cr ================================================ module Deadfinder class Options property concurrency : Int32 = 50 property timeout : Int32 = 10 property output : String = "" property output_format : String = "json" property headers : Array(String) = [] of String property worker_headers : Array(String) = [] of String property silent : Bool = false property verbose : Bool = false property debug : Bool = false property include30x : Bool = false property proxy : String = "" property proxy_auth : String = "" property insecure : Bool = false property match : String = "" property ignore : String = "" property user_agent : String = "Mozilla/5.0 (compatible; DeadFinder/#{VERSION};)" property coverage : Bool = false property visualize : String = "" property limit : Int32 = 0 end class TargetCoverage property total : Int32 = 0 property dead : Int32 = 0 property status_counts : Hash(String, Int32) = {} of String => Int32 def initialize(@total = 0, @dead = 0, @status_counts = {} of String => Int32) end end struct CoverageTarget property total_tested : Int32 property dead_links : Int32 property coverage_percentage : Float64 property status_counts : Hash(String, Int32) def initialize(@total_tested, @dead_links, @coverage_percentage, @status_counts) end end struct CoverageSummary property total_tested : Int32 property total_dead : Int32 property overall_coverage_percentage : Float64 property overall_status_counts : Hash(String, Int32) def initialize(@total_tested, @total_dead, @overall_coverage_percentage, @overall_status_counts) end end struct CoverageResult property targets : Hash(String, CoverageTarget) property summary : CoverageSummary def initialize(@targets, @summary) end end end ================================================ FILE: src/deadfinder/url_pattern_matcher.cr ================================================ module Deadfinder module UrlPatternMatcher MAX_PATTERN_LENGTH = 1024 # Inherits from ArgumentError so existing `rescue ArgumentError` # sites in the runner continue to catch bad patterns uniformly. class UnsafePatternError < ArgumentError end @@regex_cache = {} of String => Regex @@regex_cache_mutex = Mutex.new def self.match?(url : String, pattern : String) : Bool regex = compile(pattern) regex.matches?(url) end def self.ignore?(url : String, pattern : String) : Bool regex = compile(pattern) regex.matches?(url) end # Exposed for tests / diagnostics. def self.clear_cache : Nil @@regex_cache_mutex.synchronize { @@regex_cache.clear } end private def self.compile(pattern : String) : Regex if pattern.size > MAX_PATTERN_LENGTH raise UnsafePatternError.new("Pattern exceeds #{MAX_PATTERN_LENGTH} characters (got #{pattern.size})") end reject_catastrophic_backtracking!(pattern) @@regex_cache_mutex.synchronize do @@regex_cache[pattern] ||= Regex.new(pattern) end end # Conservative static check for the two classic ReDoS shapes: # (a+)+ , (a*)* , (a|a)* , (.+)* , etc. # Crystal's stdlib exposes no PCRE2 match-limit, and a fiber `timeout` # cannot interrupt a CPU-bound regex (fibers are cooperative), so we # reject the pattern up-front instead of pretending a timeout protects us. # # The `(?= x1 && py >= y1 end # Top-right (90..180).each do |angle| rad = angle * Math::PI / 180 cx = x2 - r cy = y1 + r px = (cx + r * Math.cos(rad)).to_i py = (cy + r * Math.sin(rad)).to_i canvas[px, py] = color if px <= x2 && py >= y1 end # Bottom-left (270..360).each do |angle| rad = angle * Math::PI / 180 cx = x1 + r cy = y2 - r px = (cx + r * Math.cos(rad)).to_i py = (cy + r * Math.sin(rad)).to_i canvas[px, py] = color if px >= x1 && py <= y2 end # Bottom-right (180..270).each do |angle| rad = angle * Math::PI / 180 cx = x2 - r cy = y2 - r px = (cx + r * Math.cos(rad)).to_i py = (cy + r * Math.sin(rad)).to_i canvas[px, py] = color if px <= x2 && py <= y2 end end end end ================================================ FILE: src/deadfinder.cr ================================================ require "uri" require "json" require "yaml" require "csv" require "xml" require "sarif" require "./deadfinder/version" require "./deadfinder/types" require "./deadfinder/utils" require "./deadfinder/logger" require "./deadfinder/url_pattern_matcher" require "./deadfinder/http_client" require "./deadfinder/runner" require "./deadfinder/visualizer" require "./deadfinder/completion" module Deadfinder MAX_SITEMAP_DEPTH = 5 @@output = {} of String => Array(String) @@coverage_data = {} of String => TargetCoverage @@cache_set = {} of String => Bool @@mutex = Mutex.new def self.output @@output end def self.coverage_data @@coverage_data end def self.cache_set @@cache_set end def self.mutex @@mutex end # Clears module-level accumulator state so back-to-back runs in the # same process (e.g. tests, embedded usage) start from a clean slate. def self.reset_state : Nil @@mutex.synchronize do @@output.clear @@coverage_data.clear @@cache_set.clear end end def self.run_pipe(options : Options) run_with_input(options) do lines = [] of String while line = STDIN.gets lines << line.chomp end lines end end def self.run_file(filename : String, options : Options) run_with_input(options) do File.read_lines(filename).map(&.chomp) end end def self.run_url(url : String, options : Options) Deadfinder::Logger.apply_options(options) run_with_target(url, options) gen_output(options) end def self.run_sitemap(sitemap_url : String, options : Options) Deadfinder::Logger.apply_options(options) app = Runner.new urls = parse_sitemap(sitemap_url, options) urls = urls.first(options.limit) if options.limit > 0 Deadfinder::Logger.info "Found #{urls.size} URLs from #{sitemap_url}" urls.each do |url| turl = generate_url(url, sitemap_url) run_with_target(turl, options, app) if turl end gen_output(options) end private def self.parse_sitemap(sitemap_url : String, options : Options, depth : Int32 = 0, visited : Set(String) = Set(String).new) : Array(String) urls = [] of String if depth >= MAX_SITEMAP_DEPTH Deadfinder::Logger.error "Sitemap depth limit (#{MAX_SITEMAP_DEPTH}) reached at #{sitemap_url}" return urls end if visited.includes?(sitemap_url) Deadfinder::Logger.error "Sitemap cycle detected at #{sitemap_url}" return urls end visited << sitemap_url begin uri = URI.parse(sitemap_url) client = HttpClient.create(uri, options) headers = HTTP::Headers.new headers["User-Agent"] = options.user_agent req_path = if HttpClient.proxy_configured?(options) && uri.scheme == "http" HttpClient.absolute_uri(uri) else path = uri.path.presence || "/" uri.query.presence ? "#{path}?#{uri.query}" : path end response = client.get(req_path, headers: headers) client.close doc = XML.parse(response.body) # Try with namespace doc.xpath_nodes("//xmlns:loc", {"xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9"}).each do |node| urls << node.text.strip unless node.text.strip.empty? end # Try without namespace if no results if urls.empty? doc.xpath_nodes("//loc").each do |node| urls << node.text.strip unless node.text.strip.empty? end end # Check for sitemap index (recursive sitemaps) sitemap_locs = [] of String doc.xpath_nodes("//xmlns:sitemap/xmlns:loc", {"xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9"}).each do |node| sitemap_locs << node.text.strip unless node.text.strip.empty? end if sitemap_locs.empty? doc.xpath_nodes("//sitemap/loc").each do |node| sitemap_locs << node.text.strip unless node.text.strip.empty? end end sitemap_locs.each do |sub_sitemap| urls.concat(parse_sitemap(sub_sitemap, options, depth + 1, visited)) end rescue ex Deadfinder::Logger.error "Failed to parse sitemap: #{ex.message}" end urls end private def self.run_with_input(options : Options, &block : -> Array(String)) Deadfinder::Logger.apply_options(options) Deadfinder::Logger.info "Reading input" app = Runner.new targets = yield targets = targets.first(options.limit) if options.limit > 0 targets.each do |target| run_with_target(target, options, app) end gen_output(options) end def self.run_with_target(target : String, options : Options, app : Runner = Runner.new) Deadfinder::Logger.target "Fetching #{target}" app.run(target, options, @@output, @@coverage_data, @@cache_set, @@mutex) end def self.calculate_coverage : CoverageResult coverage_summary = {} of String => CoverageTarget total_all_tested = 0 total_all_dead = 0 overall_status_counts = {} of String => Int32 @@coverage_data.each do |target, data| total = data.total dead = data.dead status_counts = data.status_counts coverage_percentage = total > 0 ? ((dead.to_f / total) * 100).round(2) : 0.0 coverage_summary[target] = CoverageTarget.new( total_tested: total, dead_links: dead, coverage_percentage: coverage_percentage, status_counts: status_counts.dup ) total_all_tested += total total_all_dead += dead status_counts.each do |code, count| overall_status_counts[code] = (overall_status_counts[code]? || 0) + count end end overall_coverage = total_all_tested > 0 ? ((total_all_dead.to_f / total_all_tested) * 100).round(2) : 0.0 CoverageResult.new( targets: coverage_summary, summary: CoverageSummary.new( total_tested: total_all_tested, total_dead: total_all_dead, overall_coverage_percentage: overall_coverage, overall_status_counts: overall_status_counts ) ) end def self.gen_output(options : Options) output_data = @@output format = options.output_format.downcase coverage_info : CoverageResult? = nil if options.coverage && !@@coverage_data.empty? && @@coverage_data.values.any? { |v| v.total > 0 } coverage_info = calculate_coverage end unless options.output.empty? content = case format when "yaml", "yml" generate_yaml(output_data, coverage_info) when "csv" generate_csv(output_data, coverage_info) when "toml" generate_toml(output_data, coverage_info) when "sarif" generate_sarif(output_data, coverage_info) else generate_json(output_data, coverage_info) end File.write(options.output, content) end if !options.visualize.empty? && coverage_info Visualizer.generate(coverage_info, options.visualize) end end private def self.generate_json(output_data : Hash(String, Array(String)), coverage_info : CoverageResult?) : String JSON.build(indent: " ") do |json| if coverage_info json.object do json.field "dead_links" do json.object do output_data.each do |target, urls| json.field target do json.array do urls.each { |url| json.string url } end end end end end json.field "coverage" do coverage_to_json(json, coverage_info) end end else json.object do output_data.each do |target, urls| json.field target do json.array do urls.each { |url| json.string url } end end end end end end end private def self.coverage_to_json(json : JSON::Builder, coverage : CoverageResult) json.object do json.field "targets" do json.object do coverage.targets.each do |target, data| json.field target do json.object do json.field "total_tested", data.total_tested json.field "dead_links", data.dead_links json.field "coverage_percentage", data.coverage_percentage json.field "status_counts" do json.object do data.status_counts.each do |code, count| json.field code, count end end end end end end end end json.field "summary" do json.object do json.field "total_tested", coverage.summary.total_tested json.field "total_dead", coverage.summary.total_dead json.field "overall_coverage_percentage", coverage.summary.overall_coverage_percentage json.field "overall_status_counts" do json.object do coverage.summary.overall_status_counts.each do |code, count| json.field code, count end end end end end end end private def self.generate_yaml(output_data : Hash(String, Array(String)), coverage_info : CoverageResult?) : String YAML.build do |yaml| yaml.mapping do if coverage_info yaml.scalar "dead_links" yaml.mapping do output_data.each do |target, urls| yaml.scalar target yaml.sequence do urls.each { |url| yaml.scalar url } end end end yaml.scalar "coverage" yaml.mapping do yaml.scalar "targets" yaml.mapping do coverage_info.targets.each do |target, data| yaml.scalar target yaml.mapping do yaml.scalar "total_tested" yaml.scalar data.total_tested yaml.scalar "dead_links" yaml.scalar data.dead_links yaml.scalar "coverage_percentage" yaml.scalar data.coverage_percentage yaml.scalar "status_counts" yaml.mapping do data.status_counts.each do |code, count| yaml.scalar code yaml.scalar count end end end end end yaml.scalar "summary" yaml.mapping do yaml.scalar "total_tested" yaml.scalar coverage_info.summary.total_tested yaml.scalar "total_dead" yaml.scalar coverage_info.summary.total_dead yaml.scalar "overall_coverage_percentage" yaml.scalar coverage_info.summary.overall_coverage_percentage yaml.scalar "overall_status_counts" yaml.mapping do coverage_info.summary.overall_status_counts.each do |code, count| yaml.scalar code yaml.scalar count end end end end else output_data.each do |target, urls| yaml.scalar target yaml.sequence do urls.each { |url| yaml.scalar url } end end end end end end private def self.generate_csv(output_data : Hash(String, Array(String)), coverage_info : CoverageResult?) : String CSV.build do |csv| csv.row "target", "url" output_data.each do |target, urls| urls.each { |url| csv.row target, url } end if coverage_info csv.row # Empty row separator csv.row "Coverage Report" csv.row "target", "total_tested", "dead_links", "coverage_percentage" coverage_info.targets.each do |target, data| csv.row target, data.total_tested, data.dead_links, "#{data.coverage_percentage}%" end csv.row # Empty row separator csv.row "Overall Summary" csv.row "total_tested", "total_dead", "overall_coverage_percentage" csv.row coverage_info.summary.total_tested, coverage_info.summary.total_dead, "#{coverage_info.summary.overall_coverage_percentage}%" end end end private def self.generate_toml(output_data : Hash(String, Array(String)), coverage_info : CoverageResult?) : String lines = [] of String if coverage_info lines << "[dead_links]" output_data.each do |target, urls| lines << "#{toml_key(target)} = #{toml_array(urls)}" end lines << "" lines << "[coverage.targets]" coverage_info.targets.each do |target, data| lines << "[coverage.targets.#{toml_key(target)}]" lines << "total_tested = #{data.total_tested}" lines << "dead_links = #{data.dead_links}" lines << "coverage_percentage = #{data.coverage_percentage}" lines << "[coverage.targets.#{toml_key(target)}.status_counts]" data.status_counts.each do |code, count| lines << "#{toml_key(code)} = #{count}" end end lines << "" lines << "[coverage.summary]" lines << "total_tested = #{coverage_info.summary.total_tested}" lines << "total_dead = #{coverage_info.summary.total_dead}" lines << "overall_coverage_percentage = #{coverage_info.summary.overall_coverage_percentage}" lines << "[coverage.summary.overall_status_counts]" coverage_info.summary.overall_status_counts.each do |code, count| lines << "#{toml_key(code)} = #{count}" end else output_data.each do |target, urls| lines << "#{toml_key(target)} = #{toml_array(urls)}" end end lines.join("\n") + "\n" end # Produce a SARIF 2.1.0 report where each dead link is a `Result` with # rule id "DEAD_LINK". The scanned target is attached as a related # location so downstream tools (GitHub code scanning, editors) can link # back to the page on which the broken URL was found. private def self.generate_sarif(output_data : Hash(String, Array(String)), coverage_info : CoverageResult?) : String log = Sarif::Builder.build do |b| b.run("deadfinder", Deadfinder::VERSION) do |r| r.information_uri("https://github.com/hahwul/deadfinder") r.rule( "DEAD_LINK", name: "DeadLink", short_description: "Broken or unreachable link", full_description: "A link on the scanned page returned an HTTP error status or failed to resolve.", help_uri: "https://github.com/hahwul/deadfinder", level: Sarif::Level::Warning, ) output_data.each do |target, urls| urls.each do |url| r.result do |rb| rb.message("Dead link detected: #{url} (found on #{target})") rb.rule_id("DEAD_LINK") rb.level(Sarif::Level::Warning) rb.location(uri: url) rb.related_location(uri: target, message_text: "Referenced from this page") end end end end end log.to_pretty_json end private def self.toml_key(key : String) : String # TOML keys with special chars need quoting if key.matches?(/^[a-zA-Z0-9_-]+$/) key else "\"#{key.gsub("\\", "\\\\").gsub("\"", "\\\"")}\"" end end private def self.toml_array(arr : Array(String)) : String items = arr.map { |s| "\"#{s.gsub("\\", "\\\\").gsub("\"", "\\\"")}\"" } "[#{items.join(", ")}]" end end