Repository: becheran/mlc Branch: master Commit: 3f00e2b7e105 Files: 100 Total size: 204.0 KB Directory structure: gitextract_iwizgnk_/ ├── .cargo/ │ └── config.toml ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ ├── custom.md │ │ └── feature_request.md │ ├── dependabot.yml │ ├── instructions/ │ │ └── rust.instructions.md │ └── workflows/ │ ├── ci.yml │ └── major-release-tag.yml ├── .gitignore ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Cargo.toml ├── Dockerfile ├── GithubAction-Dockerfile ├── LICENSE ├── README.md ├── action.yml ├── benches/ │ ├── benchmark/ │ │ ├── html/ │ │ │ ├── many_links.html │ │ │ ├── no_links.html │ │ │ └── xhtml.xhtml │ │ └── markdown/ │ │ ├── HashLinks.md │ │ ├── ansi_encoded.md │ │ ├── binary_file.md │ │ ├── broken-local-link.md │ │ ├── deep/ │ │ │ └── deeper/ │ │ │ └── go_up.md │ │ ├── ignore_me.md │ │ ├── ignore_me_dir/ │ │ │ ├── ignore_me copy.md │ │ │ └── ignore_me.md │ │ ├── link_ignore_file_extension.md │ │ ├── many_links/ │ │ │ ├── many_links (another copy).md │ │ │ ├── many_links (copy).md │ │ │ └── many_links.md │ │ ├── many_links.md │ │ ├── md_file_endings/ │ │ │ ├── F3_with_umlaut.md │ │ │ ├── NotMardown.nm │ │ │ ├── f1.md │ │ │ ├── f10.text │ │ │ ├── f11.Rmd │ │ │ ├── f12.mkd │ │ │ ├── f2.MD │ │ │ ├── f4.markdown │ │ │ ├── f5.mkdown │ │ │ ├── f6.mkdn │ │ │ ├── f7.mdwn │ │ │ ├── f8.mdtxt │ │ │ ├── f9.mdtext │ │ │ └── notmd │ │ ├── no_links/ │ │ │ ├── no_links (3rd copy).md │ │ │ ├── no_links (4th copy).md │ │ │ ├── no_links (5th copy).md │ │ │ ├── no_links (6th copy).md │ │ │ ├── no_links (7th copy).md │ │ │ ├── no_links (another copy).md │ │ │ ├── no_links (copy).md │ │ │ └── no_links.md │ │ ├── ref_links.md │ │ ├── reference_link.md │ │ ├── repeate_same_link.md │ │ ├── script_and_comments.md │ │ └── withUmlaut_ö/ │ │ └── LinksWithUmläuts.md │ ├── benchmarks.rs │ ├── different_root/ │ │ ├── one/ │ │ │ └── two.md │ │ ├── one.md │ │ └── two.md │ └── throttle/ │ ├── different_host.md │ ├── same_host.md │ └── same_ip.md ├── entrypoint.sh ├── release.toml ├── src/ │ ├── cli.rs │ ├── file_traversal.rs │ ├── lib.rs │ ├── link_extractors/ │ │ ├── html_link_extractor.rs │ │ ├── ignore_comments.rs │ │ ├── link_extractor.rs │ │ ├── markdown_link_extractor.rs │ │ └── mod.rs │ ├── link_validator/ │ │ ├── file_system.rs │ │ ├── http.rs │ │ ├── link_type.rs │ │ ├── mail.rs │ │ └── mod.rs │ ├── logger.rs │ ├── main.rs │ └── markup.rs └── tests/ ├── end_to_end.rs ├── end_to_end_mock.rs ├── file_traversal.rs ├── files_option.rs ├── gitignore_recursive.rs ├── helper/ │ └── mod.rs ├── markdown_files.rs ├── symlink_test.rs ├── test_files/ │ ├── deep/ │ │ └── index.md │ ├── many_links.md │ ├── reference_links.md │ ├── repeat_links.md │ └── symlink_test/ │ └── original.md └── throttle.rs ================================================ FILE CONTENTS ================================================ ================================================ FILE: .cargo/config.toml ================================================ [target.x86_64-apple-darwin] linker = "x86_64-apple-darwin14-clang" ar = "x86_64-apple-darwin14-ar" ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: '' labels: '' assignees: '' --- **Describe the bug** A clear and concise description of what the bug is. **To Reproduce** Steps to reproduce the behavior: 1. This md file '...' 2. This command '....' 3. See error **Expected behavior** A clear and concise description of what you expected to happen. **Desktop (please complete the following information):** - OS: [e.g. iOS] - Browser [e.g. chrome, safari] - Version [e.g. 22] **Additional context** Add any other context about the problem here. ================================================ FILE: .github/ISSUE_TEMPLATE/custom.md ================================================ --- name: Custom issue template about: Describe this issue template's purpose here. title: '' labels: '' assignees: '' --- ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: '' labels: '' assignees: '' --- **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you want to happen. **Describe alternatives you've considered** A clear and concise description of any alternative solutions or features you've considered. **Additional context** Add any other context or screenshots about the feature request here. ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: "cargo" directory: "/" schedule: interval: "weekly" open-pull-requests-limit: 5 labels: - "dependencies" - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly" open-pull-requests-limit: 5 labels: - "dependencies" ================================================ FILE: .github/instructions/rust.instructions.md ================================================ --- description: 'Rust programming language coding conventions and best practices' applyTo: '**/*.rs' --- # Rust Coding Conventions and Best Practices Follow idiomatic Rust practices and community standards when writing Rust code. These instructions are based on [The Rust Book](https://doc.rust-lang.org/book/), [Rust API Guidelines](https://rust-lang.github.io/api-guidelines/), [RFC 430 naming conventions](https://github.com/rust-lang/rfcs/blob/master/text/0430-finalizing-naming-conventions.md), and the broader Rust community at [users.rust-lang.org](https://users.rust-lang.org). ## General Instructions - Always prioritize readability, safety, and maintainability. - Use strong typing and leverage Rust's ownership system for memory safety. - Break down complex functions into smaller, more manageable functions. - For algorithm-related code, include explanations of the approach used. - Write code with good maintainability practices, including comments on why certain design decisions were made. - Handle errors gracefully using `Result` and provide meaningful error messages. - For external dependencies, mention their usage and purpose in documentation. - Use consistent naming conventions following [RFC 430](https://github.com/rust-lang/rfcs/blob/master/text/0430-finalizing-naming-conventions.md). - Write idiomatic, safe, and efficient Rust code that follows the borrow checker's rules. - Ensure code compiles without warnings. ## Patterns to Follow - Use modules (`mod`) and public interfaces (`pub`) to encapsulate logic. - Handle errors properly using `?`, `match`, or `if let`. - Use `serde` for serialization and `thiserror` or `anyhow` for custom errors. - Implement traits to abstract services or external dependencies. - Structure async code using `async/await` and `tokio` or `async-std`. - Prefer enums over flags and states for type safety. - Use builders for complex object creation. - Split binary and library code (`main.rs` vs `lib.rs`) for testability and reuse. - Use `rayon` for data parallelism and CPU-bound tasks. - Use iterators instead of index-based loops as they're often faster and safer. - Use `&str` instead of `String` for function parameters when you don't need ownership. - Prefer borrowing and zero-copy operations to avoid unnecessary allocations. ### Ownership, Borrowing, and Lifetimes - Prefer borrowing (`&T`) over cloning unless ownership transfer is necessary. - Use `&mut T` when you need to modify borrowed data. - Explicitly annotate lifetimes when the compiler cannot infer them. - Use `Rc` for single-threaded reference counting and `Arc` for thread-safe reference counting. - Use `RefCell` for interior mutability in single-threaded contexts and `Mutex` or `RwLock` for multi-threaded contexts. ## Patterns to Avoid - Don't use `unwrap()` or `expect()` unless absolutely necessary—prefer proper error handling. - Avoid panics in library code—return `Result` instead. - Don't rely on global mutable state—use dependency injection or thread-safe containers. - Avoid deeply nested logic—refactor with functions or combinators. - Don't ignore warnings—treat them as errors during CI. - Avoid `unsafe` unless required and fully documented. - Don't overuse `clone()`, use borrowing instead of cloning unless ownership transfer is needed. - Avoid premature `collect()`, keep iterators lazy until you actually need the collection. - Avoid unnecessary allocations—prefer borrowing and zero-copy operations. ## Code Style and Formatting - Follow the Rust Style Guide and use `rustfmt` for automatic formatting. - Keep lines under 100 characters when possible. - Place function and struct documentation immediately before the item using `///`. - Use `cargo clippy` to catch common mistakes and enforce best practices. ## Error Handling - Use `Result` for recoverable errors and `panic!` only for unrecoverable errors. - Prefer `?` operator over `unwrap()` or `expect()` for error propagation. - Create custom error types using `thiserror` or implement `std::error::Error`. - Use `Option` for values that may or may not exist. - Provide meaningful error messages and context. - Error types should be meaningful and well-behaved (implement standard traits). - Validate function arguments and return appropriate errors for invalid input. ## API Design Guidelines ### Common Traits Implementation Eagerly implement common traits where appropriate: - `Copy`, `Clone`, `Eq`, `PartialEq`, `Ord`, `PartialOrd`, `Hash`, `Debug`, `Display`, `Default` - Use standard conversion traits: `From`, `AsRef`, `AsMut` - Collections should implement `FromIterator` and `Extend` - Note: `Send` and `Sync` are auto-implemented by the compiler when safe; avoid manual implementation unless using `unsafe` code ### Type Safety and Predictability - Use newtypes to provide static distinctions - Arguments should convey meaning through types; prefer specific types over generic `bool` parameters - Use `Option` appropriately for truly optional values - Functions with a clear receiver should be methods - Only smart pointers should implement `Deref` and `DerefMut` ### Future Proofing - Use sealed traits to protect against downstream implementations - Structs should have private fields - Functions should validate their arguments - All public types must implement `Debug` ## Testing and Documentation - Write comprehensive unit tests using `#[cfg(test)]` modules and `#[test]` annotations. - Use test modules alongside the code they test (`mod tests { ... }`). - Write integration tests in `tests/` directory with descriptive filenames. - Write clear and concise comments for each function, struct, enum, and complex logic. - Ensure functions have descriptive names and include comprehensive documentation. - Document all public APIs with rustdoc (`///` comments) following the [API Guidelines](https://rust-lang.github.io/api-guidelines/). - Use `#[doc(hidden)]` to hide implementation details from public documentation. - Document error conditions, panic scenarios, and safety considerations. - Examples should use `?` operator, not `unwrap()` or deprecated `try!` macro. ## Project Organization - Use semantic versioning in `Cargo.toml`. - Include comprehensive metadata: `description`, `license`, `repository`, `keywords`, `categories`. - Use feature flags for optional functionality. - Organize code into modules using `mod.rs` or named files. - Keep `main.rs` or `lib.rs` minimal - move logic to modules. ## Pre-Pull Request Requirements **IMPORTANT**: Before opening or updating a pull request, you MUST run the following commands and fix any issues: 1. **Format code with rustfmt**: ```bash cargo fmt ``` 2. **Apply clippy fixes**: ```bash cargo clippy --fix --all-targets --all-features --allow-dirty --allow-staged ``` 3. **Verify no warnings remain**: ```bash cargo clippy --all-targets --all-features -- -D warnings ``` These steps ensure code quality and consistency before the PR is opened for review. ## Quality Checklist Before publishing or reviewing Rust code, ensure: ### Core Requirements - [ ] **Naming**: Follows RFC 430 naming conventions - [ ] **Traits**: Implements `Debug`, `Clone`, `PartialEq` where appropriate - [ ] **Error Handling**: Uses `Result` and provides meaningful error types - [ ] **Documentation**: All public items have rustdoc comments with examples - [ ] **Testing**: Comprehensive test coverage including edge cases ### Safety and Quality - [ ] **Safety**: No unnecessary `unsafe` code, proper error handling - [ ] **Performance**: Efficient use of iterators, minimal allocations - [ ] **API Design**: Functions are predictable, flexible, and type-safe - [ ] **Future Proofing**: Private fields in structs, sealed traits where appropriate - [ ] **Tooling**: Code passes `cargo fmt`, `cargo clippy`, and `cargo test` ================================================ FILE: .github/workflows/ci.yml ================================================ name: Continuous Integration on: push: branches: ["master"] tags: - "v*" pull_request: branches: ["master"] workflow_dispatch: env: CARGO_TERM_COLOR: always BINARY_NAME: mlc RUSTFLAGS: "-Dwarnings" jobs: test_own_readme: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Cache uses: actions/cache@v5 with: path: | ~/.cargo/registry ~/.cargo/git target key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - name: Run run: cargo run -- ./README.md -d formatting: runs-on: ubuntu-latest permissions: contents: write steps: - uses: actions/checkout@v6 with: token: ${{ secrets.GITHUB_TOKEN }} ref: ${{ github.head_ref || github.ref }} - uses: actions-rust-lang/setup-rust-toolchain@v1 with: components: rustfmt, clippy - name: Run rustfmt run: cargo fmt - name: Run clippy with auto-fix run: cargo clippy --fix --all-targets --all-features --allow-dirty --allow-staged - name: Check for formatting changes id: check_changes run: | if [[ -n "$(git status --porcelain)" ]]; then echo "changes=true" >> $GITHUB_OUTPUT else echo "changes=false" >> $GITHUB_OUTPUT fi - name: Commit and push formatting and clippy changes if: steps.check_changes.outputs.changes == 'true' run: | git config --local user.email "github-actions[bot]@users.noreply.github.com" git config --local user.name "github-actions[bot]" git add . git commit -m "Auto-format code with rustfmt and clippy" git push origin ${{ github.head_ref || github.ref_name }} test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - uses: actions-rust-lang/setup-rust-toolchain@v1 - run: cargo test --verbose build_linux: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - uses: awalsh128/cache-apt-pkgs-action@v1 with: packages: musl-tools # provides musl-gcc version: 1.0 - name: "Get the Rust toolchain" uses: dtolnay/rust-toolchain@stable with: targets: x86_64-unknown-linux-musl components: rustfmt, clippy - name: Cache uses: actions/cache@v5 with: path: | ~/.cargo/registry ~/.cargo/git target key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - name: Build run: cargo build --release --verbose --target=x86_64-unknown-linux-musl - uses: actions/upload-artifact@v7 with: name: linux path: ./target/x86_64-unknown-linux-musl/release/${{ env.BINARY_NAME }} build_linux_arm64: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: "Get the Rust toolchain" uses: dtolnay/rust-toolchain@stable with: targets: aarch64-unknown-linux-musl - name: Install cross run: cargo install cross --git https://github.com/cross-rs/cross - name: Cache uses: actions/cache@v5 with: path: | ~/.cargo/registry ~/.cargo/git target key: ${{ runner.os }}-cargo-aarch64-${{ hashFiles('**/Cargo.lock') }} - name: Build run: cross build --release --verbose --target=aarch64-unknown-linux-musl - uses: actions/upload-artifact@v7 with: name: linux-arm64 path: ./target/aarch64-unknown-linux-musl/release/${{ env.BINARY_NAME }} build_windows: runs-on: windows-latest steps: - uses: actions/checkout@v6 - uses: actions-rust-lang/setup-rust-toolchain@v1 - name: Build run: cargo build --verbose --release - uses: actions/upload-artifact@v7 with: name: windows path: ./target/release/${{ env.BINARY_NAME }}.exe build_osx: runs-on: macos-latest steps: - uses: actions/checkout@v6 - uses: actions-rust-lang/setup-rust-toolchain@v1 with: target: aarch64-apple-darwin - name: Build run: | cargo build --verbose --release --target aarch64-apple-darwin ls ./target - uses: actions/upload-artifact@v7 with: name: apple-darwin-arm64 path: target/aarch64-apple-darwin/release/${{ env.BINARY_NAME }} release_docker: runs-on: ubuntu-latest needs: [build_osx, build_windows, build_linux, build_linux_arm64, test] if: startsWith(github.ref, 'refs/tags/') steps: - uses: actions/checkout@v6 - name: Download artifact uses: actions/download-artifact@v8 with: name: linux path: ./target/release - name: Set up QEMU uses: docker/setup-qemu-action@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v4 - name: Set env run: | version=${GITHUB_REF#refs/*/} version=${version:1} echo "RELEASE_VERSION=$version" >> $GITHUB_ENV - run: echo Push docker image $RELEASE_VERSION - name: Login to Docker Hub uses: docker/login-action@v4 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_PW }} - name: Build and push uses: docker/build-push-action@v7 with: context: . push: true tags: becheran/mlc:latest,becheran/mlc:${{ env.RELEASE_VERSION }} release: runs-on: ubuntu-latest needs: [release_docker] if: startsWith(github.ref, 'refs/tags/') steps: - uses: actions/download-artifact@v8 with: name: linux path: mlc-x86_64-linux - uses: actions/download-artifact@v8 with: name: linux-arm64 path: mlc-aarch64-linux - uses: actions/download-artifact@v8 with: name: windows path: mlc-x86_64-windows - uses: actions/download-artifact@v8 with: name: apple-darwin-arm64 path: mlc-aarch64-apple-darwin - name: Rename files run: | ls ls mlc-x86_64-linux ls mlc-aarch64-linux ls mlc-aarch64-apple-darwin ls mlc-x86_64-windows mv ./mlc-x86_64-linux/mlc mlc rm -rd ./mlc-x86_64-linux mv ./mlc mlc-x86_64-linux mv ./mlc-aarch64-linux/mlc mlc rm -rd ./mlc-aarch64-linux mv ./mlc mlc-aarch64-linux mv ./mlc-aarch64-apple-darwin/mlc mlc rm -rd ./mlc-aarch64-apple-darwin mv ./mlc mlc-aarch64-apple-darwin mv ./mlc-x86_64-windows/mlc.exe mlc-x86_64-windows.exe rm -rd ./mlc-x86_64-windows ls - name: GitHub Release uses: softprops/action-gh-release@v3 with: generate_release_notes: true files: | mlc-x86_64-linux mlc-aarch64-linux mlc-aarch64-apple-darwin mlc-x86_64-windows.exe ================================================ FILE: .github/workflows/major-release-tag.yml ================================================ # Copyright (c) 2021 Vincent A. Cicirello # MIT License name: Update Major Release Tag on: release: types: [published] jobs: movetag: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Get major version num and update tag run: | VERSION=${GITHUB_REF#refs/tags/} MAJOR=${VERSION%%.*} git config --global user.name 'YOUR NAME HERE' git config --global user.email 'USERNAME@users.noreply.github.com' git tag -fa "${MAJOR}" -m 'Update major version tag' git push origin "${MAJOR}" --force ================================================ FILE: .gitignore ================================================ *~ .#* .DS_Store .cproject .hg/ .hgignore .idea *.iml __pycache__/ *.py[cod] *$py.class .project .settings/ .valgrindrc .vscode/ .favorites.json /*-*-*-*/ /*-*-*/ /Makefile /build /config.toml /dist/ /dl/ /doc /inst/ /llvm/ /mingw-build/ /nd/ /obj/ /rt/ /rustllvm/ /src/libcore/unicode/DerivedCoreProperties.txt /src/libcore/unicode/DerivedNormalizationProps.txt /src/libcore/unicode/PropList.txt /src/libcore/unicode/ReadMe.txt /src/libcore/unicode/Scripts.txt /src/libcore/unicode/SpecialCasing.txt /src/libcore/unicode/UnicodeData.txt /stage[0-9]+/ /target target/ /test/ /tmp/ tags tags.* TAGS TAGS.* \#* \#*\# config.mk config.stamp keywords.md lexer.ml mir_dump Session.vim src/etc/dl tmp.*.rs version.md version.ml version.texi #.cargo !src/vendor/** /src/target/ no_llvm_build .mlc.toml ================================================ FILE: CHANGELOG.md ================================================ # Changelog ## [Unreleased] - ReleaseDate ### Changed - Gitignore files in sub dirs are now also checked ## [1.2.0] - 2025-12-13 ### Added - Custom HTTP headers support for HTTP checks (`--http-headers` / `-H`) [#119](https://github.com/becheran/mlc/pull/119) ### Changed - GitHub Actions usage example now documents how to pass custom HTTP headers - Developer instructions now include pre-PR `cargo fmt` / `cargo clippy` steps [#120](https://github.com/becheran/mlc/pull/120) ### Fixed - GitHub-flavored markdown task list checkboxes are no longer detected as links [#121](https://github.com/becheran/mlc/pull/121) - Root directory configuration is validated to avoid crashes when `root-dir` does not exist ## [1.1.0] - 2025-12-11 ### Added - Support for ignore/disable comments to skip specific links or blocks in markup files [#114](https://github.com/becheran/mlc/pull/114) - `--files` option to specify individual files to check [#115](https://github.com/becheran/mlc/pull/115) - Severity column to CSV reports to distinguish errors from warnings [#109](https://github.com/becheran/mlc/pull/109) - ARM64 binary support for Linux ### Changed - Replace external URL dependencies in E2E tests with local mock servers [#118](https://github.com/becheran/mlc/pull/118) - CI workflow now auto-fixes and pushes formatting/clippy changes instead of failing [#116](https://github.com/becheran/mlc/pull/116) - Optimize URL comparison to avoid unnecessary cloning ### Fixed - False redirect warnings for URLs with fragments - Linux ARM64 build by using cross for proper musl cross-compilation - CSV file race condition by using unique file names for each test - Build status badge link in README ## [1.0.0] - 2025-07-07 ## [0.22.0] - 2025-05-29 - Add csv file output [#40](https://github.com/becheran/mlc/issues/40) ## [0.21.0] - 2025-02-08 - Fix do not log warnings #100 ## [0.20.0] - 2025-02-08 - Fix remove trailing slashes from OK messages - Feat add realistic browser accept headers - Feat warn if reference in markdown document is broken ## [0.19.2] - 2025-02-04 ## [0.19.1] - 2025-02-04 ## [0.19.0] - 2024-11-30 ## [0.18.0] - 2024-06-30 - Add `--gitignore` option #94 ## [0.17.2] - 2024-06-23 - Do not panic if ignore paths are not found #92 ## [0.17.1] - 2024-06-05 - Fix config ignore path from config toml interpreted correctly #78 - Changed make ignore directory much faster when traversing ## [0.17.0] - 2024-05-19 - Changed enhanced logging and do not crash if path can not be canonicalized - Added option to hide redirects #84 - Changed use ARM64 Mac OS - Fixed upgrade dependencies and added security fixes ## [0.16.3] - 2023-11-20 - Fixes issue with throttle parameter ## [0.16.2] - 2023-06-15 ## [0.16.1] - 2022-12-19 - Fixed Installation via `cargo install` failed #67 ## [0.16.0] - 2022-12-06 - Added config file - Added workflow command output for github actions #63 - Added format links in vs code console so that ctrl + left click opens the file at right location #60 - Changed report redirects as warnings, unless their destination errors #55 - Fixed wrong first line separator on windows #61 - Fixed set accept encoding headers #52 ## [0.15.4] - 2022-08-23 - Fix #54 column line index for files with CR + LF endings - Update external dependencies ## [0.15.3] - 2022-08-18 - Fix #53 broken docker container ## [0.15.2] - 2022-07-11 ## [0.15.1] - 2022-07-07 ## [0.15.0] - 2022-07-07 - Changed markdown parser to be CommonMark compatible - Changed column of detected link to start of tag instead of actual link - Fixed issue #35 detect link in headlines ## [0.14.3] - 2021-05-15 - Changed throttle for increased performance - Security upgraded external dependencies - Fixed #33 link not found near code block ## [0.14.2] - 2021-03-13 - Fixed broken path check if ../ were included on windows file systems ## [0.14.1] - 2021-03-07 ## [0.14.0] - 2020-11-11 - Fallback to GET requests if HEAD fails. See ## [0.13.12] - 2020-10-26 - Added GitHub action to README.md ## [0.13.11] - 2020-10-26 ## [0.13.9] - 2020-10-25 - Fix wrong count output for skipped links ## [0.13.8] - 2020-10-25 - Fix ignore-links - Add -i short for ignore-links argument - #23 - Add github action - Upgrade external dependencies ## [0.13.7] - 2020-09-16 - Fixed #24 - thanks to Alex Melville (Melvillian) for fixing the issue - Fixed #26 - add user-agent to requests ## [0.13.6] - 2020-08-31 - OSX builds ## [0.13.5] - 2020-08-30 - Fixed http requests to crates.io. Added header fields (#20) ## [0.13.4] - 2020-08-04 ## [0.13.3] - 2020-08-04 - Fixed https requests in docker container (#17) ## [0.13.2] - 2020-07-21 ## [0.13.1] - 2020-07-21 ## [0.13.0] - 2020-07-17 - Added `--throttle` command ## [0.12.0] - 2020-07-15 - Added `--match-file-extension` switch - Changed check links only once for speed improvement ## [0.11.2] - 2020-07-13 ### Changed - Improve fs checkup speed ## [0.11.1] - 2020-07-10 ### Fixed - Ignore path parameter ## [0.11.0] - 2020-07-08 ### Added - Ignore files and directories ## [0.10.5] - 2020-07-07 ### Fixed - Allow email address with special chars - Add html comment support to markdown files ## [0.10.4] - 2020-07-03 ### Changed - No error for unknown URL schemes ### Fixed - Ref links with hashtag are not classified as error - Case insensitive mail addresses ## [0.10.3] - 2020-07-02 ### Fixed - Link refs only allowed at beginning of line - Path separator for os ### Changed - Allow mails without mailto schema ## [0.10.2] - 2020-07-02 ## [0.10.1] - 2020-07-02 ## [0.10.0] - 2020-07-01 ### Added - Virtual root dir for easier local testing ## [0.9.3] - 2020-06-24 ## [0.9.2] - 2020-05-24 ## [0.9.1] - 2020-01-29 ### Fixed - Mailto URI path accepted without double slashes ## [0.9.0] - 2020-01-20 ### Changed - Faster execution with async tasks ### Fixed - Wildcard parser for excluded links ## [0.8.0] - 2020-01-11 ### Added - HTML support ### Fixed - No panic for not UTF-8 encoded files ## [0.7.0] - 2020-01-02 ### Added - Reference readme file - Ignore links option - No web link option for faster checks without following weblinks ## [0.6.4] - 2019-12-30 ### Changed - Retry with Get for status code 405 Method Not Allowed instead of error - Column number now points to the link directly instead of the markdown link beginning ### Fixed - Nested link support (Issue #1) ## [0.6.3] - 2019-12-29 ### Changed - Release binaries on GitHub releases instead of GitLab ## [0.6.2] - 2019-12-28 ### Removed - Remove pipeline badge from crates io ## [0.6.1] - 2019-12-28 ### Changed - Speedup for http links. Do create client only once - Move from GitLab to GitHub ## [0.6.0] - 2019-12-26 ### Added - Mail check support ## [0.5.1] - 2019-12-25 ### Fixed - Inline html link at start of line ## [0.5.0] - 2019-12-25 ### Added - Markup reference link support ## [0.4.2] - 2019-12-24 ### Changed - Description in readme ### Fixed - Typo ## [0.4.1] - 2019-12-23 ### Changed - Result output formatting ## [0.4.0] - 2019-12-23 ### Added - Change Log - Code block support in markdown files - More file markdown endings support (markdown, mkdn,...) ### Fixed - File extension separator (previously "somefilemd" was also taken as markdown file) ## [0.3.1] - 2019-12-21 ### Fixed - Code cleanup - Readme update ## [0.3.0] - 2019-12-19 ### Added - First version of markup link checker (previously mlc was another rust lib project) ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Contributor Covenant Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at becherarmin@gmail.com. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see https://www.contributor-covenant.org/faq ================================================ FILE: CONTRIBUTING.md ================================================ # Contribution All contributions and comments welcome! Open an issue or create a Pull Request whenever you find a bug or have an idea to improve this crate. ================================================ FILE: Cargo.toml ================================================ [package] name = "mlc" version = "1.2.0" authors = ["Armin Becher "] edition = "2018" description = "The markup link checker (mlc) checks for broken links in markup files." keywords = [ "link-checker", "broken", "markup", "html", "markdown"] readme = "README.md" license = "MIT" repository = "https://github.com/becheran/mlc" [badges] maintenance = { status = "actively-developed" } is-it-maintained-open-issues = { repository = "becheran/mlc" } is-it-maintained-issue-resolution = { repository = "becheran/mlc" } [dependencies] clap = { version = "4.6.0", features = ["cargo"] } log = "0.4.29" fern = "0.7.1" walkdir = "2.5.0" regex = "1.12.3" lazy_static = "1.5.0" url = "2.5.4" colored = "3.1.1" async-std = "1.13.2" reqwest = {version="0.13.2", features = ["native-tls-vendored", "brotli", "gzip", "deflate"] } tokio = {version="1.51.1", features = ["rt-multi-thread", "macros", "time"] } futures = "0.3.32" wildmatch = "2.6.1" pulldown-cmark = "0.13.3" toml = "1.1.2" serde = { version = "1.0.219", features = ["derive"] } url-escape = "0.1.1" [dev-dependencies] ntest = "0.9.5" criterion = "0.8.2" mockito = "1.7.2" [[bench]] name = "benchmarks" harness = false ================================================ FILE: Dockerfile ================================================ FROM ubuntu:24.04 RUN apt-get update; apt-get install -y ca-certificates; update-ca-certificates RUN apt-get install git -y ADD ./target/release/mlc /bin/mlc RUN chmod +x /bin/mlc RUN PATH=$PATH:/bin/mlc ================================================ FILE: GithubAction-Dockerfile ================================================ FROM becheran/mlc:1.2.0 LABEL repository="https://github.com/becheran/mlc" COPY entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh COPY LICENSE README.md / ENTRYPOINT ["/entrypoint.sh"] ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2020 Armin Becher Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # Markup Link Checker [![crates.io](https://img.shields.io/crates/v/mlc.svg?color=orange)](https://crates.io/crates/mlc) [![downloads](https://badgen.net/crates/d/mlc?color=blue)](https://crates.io/crates/mlc) [![build status](https://github.com/becheran/mlc/actions/workflows/ci.yml/badge.svg)](https://github.com/becheran/mlc/actions/workflows/ci.yml) [![license](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/license/mit) [![PRs welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/becheran/mlc/blob/master/CONTRIBUTING.md) ![image](./docs/mlc.gif) Check for broken links in markup files. Currently `html` and `markdown` files are supported. The Markup Link Checker can easily be integrated in your CI pipeline to prevent broken links in your markup docs. ## Features * Find and check links in `markdown` and `html` files * Validated absolute and relative file paths and URLs * Support for ignore/disable comments to skip specific links or blocks * User friendly command line interface * Easy [CI pipeline integration](#ci-pipeline) * Very fast execution using [async rust](https://rust-lang.github.io/async-book/) * Efficient link resolving strategy which tries with minimized network load * Throttle option to prevent *429 Too Many Requests* errors * Report broken links via GitHub workflow commands ## Install Locally There are different ways to install and use *mlc*. ### Cargo Use rust's package manager [cargo](https://doc.rust-lang.org/cargo/) to install *mlc* from [crates.io](https://crates.io/crates/mlc): ``` bash cargo install mlc ``` ### Download Binaries To download a compiled binary version of *mlc* go to [github releases](https://github.com/becheran/mlc/releases) and download the binaries compiled for: * **Linux**: x86_64 and aarch64 (arm64) * **macOS**: aarch64 (Apple Silicon) * **Windows**: x86_64 ### Arch Linux You can install from the [official repositories](https://archlinux.org/packages/extra/x86_64/markuplinkchecker/) using [pacman](https://wiki.archlinux.org/title/Pacman): ```bash pacman -S markuplinkchecker ``` ## CI Pipeline ### GitHub Actions Use *mlc* in GitHub using the *GitHub-Action* from the [Marketplace](https://github.com/marketplace/actions/markup-link-checker-mlc). ``` yaml - name: Markup Link Checker (mlc) uses: becheran/mlc@v1 ``` Use *mlc* command line arguments using the `with` argument: ``` yaml - name: Markup Link Checker (mlc) uses: becheran/mlc@v1 with: args: >- ./README.md -H "User-Agent: Mozilla/5.0" -H "Authorization: Bearer ${{ secrets.MY_TOKEN }}" ``` The action does uses [GitHub workflow commands](https://docs.github.com/en/actions/reference/workflows-and-actions/workflow-commands) to highlight broken links: ![annotation](./docs/FailingAnnotation.PNG) ### Binary To integrate *mlc* in your CI pipeline running in a *linux x86_64 environment* you can add the following commands to download and execute it: ``` bash curl -L https://github.com/becheran/mlc/releases/download/v1.2.0/mlc-x86_64-linux -o mlc chmod +x mlc ./mlc ``` For **linux aarch64/arm64** environments, use: ``` bash curl -L https://github.com/becheran/mlc/releases/download/v1.2.0/mlc-aarch64-linux -o mlc chmod +x mlc ./mlc ``` For example take a look at the [ntest repo](https://github.com/becheran/ntest/blob/master/.github/workflows/ci.yml) which uses *mlc* in the CI pipeline. ## Docker Use the *mlc* docker image from the [docker hub](https://hub.docker.com/r/becheran/mlc) which includes *mlc*: ``` sh docker run becheran/mlc mlc ``` ## Usage Once you have *mlc* installed, it can be called from the command line. The following call will check all links in markup files found in the current folder and all subdirectories: ``` bash mlc ``` Another example is to call *mlc* on a certain directory or file: ``` bash mlc ./docs ``` To check only specific files, for example all `README.md` files in a monorepo: ```bash mlc --files "./README.md,./project1/README.md,./project2/README.md" ``` Alternatively you may want to ignore all files currently ignored by `git` (requires `git` binary to be found on $PATH) and set a root-dir for relative links: ```bash mlc --gitignore --root-dir . ``` Call *mlc* with the `--help` flag to display all available cli arguments: ``` bash mlc -h ``` The following arguments are available: | Argument | Short | Description | |------------------|-------|-------------| | `` | | Only positional argument. Path to directory which shall be checked with all sub-dirs. Can also be a specific filename which shall be checked. | | `--help` | `-h` | Print help | | `--debug` | `-d` | Show verbose debug information | | `--do-not-warn-for-redirect-to` | | Do not warn for links which redirect to the given URL. Allows the same link format as `--ignore-links`. For example, `--do-not-warn-for-redirect-to "http*://crates.io*"` will not warn for links which redirect to the `crates.io` website. | | `--offline` | `-o` | Do not check any web links. Renamed from `--no-web-links` which is still an alias for downwards compatibility | | `--match-file-extension` | `-e` | Set the flag, if the file extension shall be checked as well. For example the following markup link `[link](dir/file)` matches if for example a file called `file.md` exists in `dir`, but would fail when the `--match-file-extension` flag is set. | | `--version` | `-V` | Print current version of mlc | | `--ignore-path` | `-p` | Comma separated list of directories or files which shall be ignored. For example | | `--gitignore` | `-g` | Ignore all files currently ignored by git (requires `git` binary to be available on $PATH). | | `--gituntracked` | `-u` | Ignore all files currently untracked by git (requires `git` binary to be available on $PATH). | | `--ignore-links` | `-i` | Comma separated list of links which shall be ignored. Use simple `?` and `*` wildcards. For example `--ignore-links "http*://crates.io*"` will skip all links to the crates.io website. See the [used lib](https://github.com/becheran/wildmatch) for more information. | | `--markup-types` | `-t` | Comma separated list list of markup types which shall be checked. Possible values: `md`, `html` | | `--root-dir` | `-r` | All links to the file system starting with a slash on linux or backslash on windows will use another virtual root dir. For example the link in a file `[link](/dir/other/file.md)` checked with the cli arg `--root-dir /env/another/dir` will let *mlc* check the existence of `/env/another/dir/dir/other/file.md`. | | `--throttle` | `-T` | Number of milliseconds to wait in between web requests to the same host. Default is zero which means no throttling. Set this if you need to slow down the web request frequency to avoid `429 - Too Many Requests` responses. For example with `--throttle 15`, between each http check to the same host, 15 ms will be waited. Note that this setting can slow down the link checker. | | `--csv` | | Path to csv file which contains all failed requests and warnings in the format `source,line,column,target,severity`. The severity column contains `ERR` for errors and `WARN` for warnings. | | `--files` | `-f` | Comma separated list of files which shall be checked. For example `--files "./README.md,./docs/README.md"` will check only the specified files. This is useful for checking specific files in a monorepo without having to exclude many directories. | | `--http-headers` | `-H` | Comma separated list of custom HTTP headers in the format `'Name: Value'`. This is useful for setting custom user agents or other headers required by specific websites. For example `--http-headers "User-Agent: Mozilla/5.0,X-Custom-Header: value"` will set both a custom user agent and an additional header. | ## Ignore Comments You can use HTML comments to disable link checking for specific lines or blocks in both markdown and HTML files: ### Disable for Current Line ```markdown [This link](http://broken-link.invalid) will be ignored ``` ### Disable for Next Line ```markdown [This link](http://broken-link.invalid) will be ignored ``` ### Disable/Enable Blocks ```markdown [This link](http://example.com) will be checked [This link](http://broken-link.invalid) will be ignored [This link](http://also-broken.invalid) will also be ignored [This link](http://example.org) will be checked again ``` If you use `` without a corresponding ``, all links from that point until the end of the file will be ignored. These comments work in both markdown and HTML files. All optional arguments which can be passed via the command line can also be configured via the `.mlc.toml` config file in the working directory: ``` toml # Print debug information to console debug = true # Do not warn for links which redirect to the given URL do-not-warn-for-redirect-to=["http*://crates.io*"] # Do not check web links offline = true # Check the exact file extension when searching for a file match-file-extension= true # List of files and directories which will be ignored ignore-path=["./ignore-me","./src"] # Ignore all files ignored by git gitignore = true # List of links which will be ignored ignore-links=["http://ignore-me.de/*","http://*.ignoresub-domain/*"] # List of markup types which shall be checked markup-types=["Markdown","Html"] # Wait time in milliseconds between http request to the same host throttle= 100 # Path to the root folder used to resolve all relative paths root-dir="./" # Path to csv file which contains all failed requests and warnings csv="output.csv" # List of specific files to check files=["./README.md","./docs/README.md"] # Custom HTTP headers to send with web requests http-headers=["User-Agent: Mozilla/5.0","X-Custom-Header: value"] ``` ## Changelog Checkout the [changelog file](https://github.com/becheran/mlc/blob/master/CHANGELOG.md) to see the changes between different versions. ## License This project is licensed under the *MIT License* - see the [LICENSE file](https://github.com/becheran/mlc/blob/master/LICENSE) for more details. ================================================ FILE: action.yml ================================================ name: 'Markup Link Checker (mlc)' description: 'Check links in markup files' inputs: args: description: 'arguments' default: './' runs: using: 'docker' image: 'GithubAction-Dockerfile' branding: icon: 'link' color: 'green' ================================================ FILE: benches/benchmark/html/many_links.html ================================================

Hello, world!

bla bla Visit W3Schools.com! bla bla

bla bla Visit W3Schools.com! bla bla

multiline Visit W3Schools.com! bla bla

sdjklf slfkj ================================================ FILE: benches/benchmark/html/no_links.html ================================================ ================================================ FILE: benches/benchmark/html/xhtml.xhtml ================================================ Title of document Visit W3Schools.com! some content ================================================ FILE: benches/benchmark/markdown/HashLinks.md ================================================ # Chapter 1 [go to chapter 2](#chapter-2) [go to chapter 2-2](#####chapter-21) [go to chapter 2-2](#####chapter-22) [go to Other Page](./ref_links.md#ref-link-chapters) [go to Other Page](./ref_links.md###ref-link-chapters) # Chapter 2 ## Chapter 21 ## Chapter 22 [go to chapter 1](#####chapter-1) # Chapter 3 ================================================ FILE: benches/benchmark/markdown/ansi_encoded.md ================================================ # File containing some ansi char � ================================================ FILE: benches/benchmark/markdown/broken-local-link.md ================================================ # All broken links [broken](./doc/broken-local-link.doc) [ok](./binary_file.md) ================================================ FILE: benches/benchmark/markdown/deep/deeper/go_up.md ================================================ [furtherup](../../HashLinks.md) ================================================ FILE: benches/benchmark/markdown/ignore_me.md ================================================ [Broken](broken_Link) [Broken](broken_Link) [Broken](broken_Link) ================================================ FILE: benches/benchmark/markdown/ignore_me_dir/ignore_me copy.md ================================================ [Broken](broken_Link) [Broken](broken_Link) [Broken](broken_Link) ================================================ FILE: benches/benchmark/markdown/ignore_me_dir/ignore_me.md ================================================ [Broken](broken_Link) [Broken](broken_Link) [Broken](broken_Link) ================================================ FILE: benches/benchmark/markdown/link_ignore_file_extension.md ================================================ # Chapter 1 [ref](./ref_links) [ref](./no_links/no_links) ================================================ FILE: benches/benchmark/markdown/many_links/many_links (another copy).md ================================================ # Torrentur suum abstrahor quique Iuppiter rerum mediocris [many_links](./many_links.md) [many_links](./many_links) ## Siquid suis Anguem sola Lorem markdownum illum Hymenaee crudelius, sub magni, sic missa, sui quas dixit adde Othrys successor conspecta. Acuto fuit, hinc nata caedit dolentes. Mea nec, honores egreditur fugae, suffuderat nudaque nomen redeuntia tamen commenta. Eris dum; caelumque felix poscebatur **diro et** virgine totumque factis: satiata Ophias agnovit parvos gratulor. Spatio et hasta. Somni hic Pergama saeviat vincta. Quod fessa aethere stratoque, abluit hoc *erat*, vera sua protinus rati? Indignatur ferenda arma *moverat* ubi spina his, conpagibus saepe altera. Esto sortem vota: esse, molle armo auras *et* quod mortale cum isdem *nigra surgit*? [many_links](./many_links.md) ## Figuram ait credita auctus Stygiisque ventrem redimicula Et quemquam nec nostri, nil atque stabat unus. Defensamus numina! Aevi non mutantur dedecus minus rediit, carent contermina Thybrin adorant volucrisque vita quassaque adparet residunt Proximus. [many_links](./many_links.md) characterRwData(user_scrolling_wiki, -5, 76); if (wi_platform_cpl) { token_encoding_android.progressiveLeft = kbps_honeypot + motherboard; } if (uploadBus.iphone(ppl + 95, jquery - 436282)) { webmail.plug -= -3; log_expansion += mampProperty; } else { installer_error = koffice_wiki_upnp * analystMultitasking; whois_card_correction = rippingMemorySsid; } bar_pim += esports(33, hardSli * verticalBarcraft * systemAutoresponderFlowchart); var laser_vdsl = displayCaptcha + standaloneBrowser + character + 17; [many_links](./many_links.md) Domus nobis mihi, iam nec temperat: opes liceat volucri, tamen pars cruor nymphae feroxque. Et hiems audierat atque ora avia huic Sidone; ut non est nubes epota. Erat satos nec suo ut inhaesi ignes, est **fer praesens cum** genus. Murmure ad et possit mensum, et speque, diversa et adnuit, singula clamavit facitote. [many_links](./many_links.md) ## Deos Atlas [many_links](./many_links.md) Hausit telluris et tandem inscius. Negaretur manu, scopulis fuit vulgique, invenit putes locuta. ppp *= mini(intranetMmsLte, system_repository_ldap) + gis + 17 / san + 1; vdu = dataIpad; pci(2); offline_hardening_cycle += telnet_flash_spyware + cybercrimeFormulaRate(37); [many_links](./many_links.md) Fecit interea **sub Melaneus**, veniente loco matre. Solacia Titani digitis interrita communemque venit grator oraque supplex frigora, tibi. Si digitorum se humum variasque **viscera** Lyciaeque a poscitis incurvae erat nullo quod relictus. [many_links](./many_links.md) [many_links](./many_links.md) [many_links](./many_links.md) ================================================ FILE: benches/benchmark/markdown/many_links/many_links (copy).md ================================================ # Torrentur suum abstrahor quique Iuppiter rerum mediocris [many_links](./many_links.md) ## Siquid suis Anguem sola Lorem markdownum illum Hymenaee crudelius, sub magni, sic missa, sui quas dixit adde Othrys successor conspecta. Acuto fuit, hinc nata caedit dolentes. Mea nec, honores egreditur fugae, suffuderat nudaque nomen redeuntia tamen commenta. Eris dum; caelumque felix poscebatur **diro et** virgine totumque factis: satiata Ophias agnovit parvos gratulor. Spatio et hasta. Somni hic Pergama saeviat vincta. Quod fessa aethere stratoque, abluit hoc *erat*, vera sua protinus rati? Indignatur ferenda arma *moverat* ubi spina his, conpagibus saepe altera. Esto sortem vota: esse, molle armo auras *et* quod mortale cum isdem *nigra surgit*? [many_links](./many_links.md) ## Figuram ait credita auctus Stygiisque ventrem redimicula Et quemquam nec nostri, nil atque stabat unus. Defensamus numina! Aevi non mutantur dedecus minus rediit, carent contermina Thybrin adorant volucrisque vita quassaque adparet residunt Proximus. [many_links](./many_links.md) characterRwData(user_scrolling_wiki, -5, 76); if (wi_platform_cpl) { token_encoding_android.progressiveLeft = kbps_honeypot + motherboard; } if (uploadBus.iphone(ppl + 95, jquery - 436282)) { webmail.plug -= -3; log_expansion += mampProperty; } else { installer_error = koffice_wiki_upnp * analystMultitasking; whois_card_correction = rippingMemorySsid; } bar_pim += esports(33, hardSli * verticalBarcraft * systemAutoresponderFlowchart); var laser_vdsl = displayCaptcha + standaloneBrowser + character + 17; [many_links](./many_links.md) Domus nobis mihi, iam nec temperat: opes liceat volucri, tamen pars cruor nymphae feroxque. Et hiems audierat atque ora avia huic Sidone; ut non est nubes epota. Erat satos nec suo ut inhaesi ignes, est **fer praesens cum** genus. Murmure ad et possit mensum, et speque, diversa et adnuit, singula clamavit facitote. [many_links](./many_links.md) ## Deos Atlas [many_links](./many_links.md) Hausit telluris et tandem inscius. Negaretur manu, scopulis fuit vulgique, invenit putes locuta. ppp *= mini(intranetMmsLte, system_repository_ldap) + gis + 17 / san + 1; vdu = dataIpad; pci(2); offline_hardening_cycle += telnet_flash_spyware + cybercrimeFormulaRate(37); [many_links](./many_links.md) Fecit interea **sub Melaneus**, veniente loco matre. Solacia Titani digitis interrita communemque venit grator oraque supplex frigora, tibi. Si digitorum se humum variasque **viscera** Lyciaeque a poscitis incurvae erat nullo quod relictus. [many_links](./many_links.md) [many_links](./many_links.md) [many_links](./many_links.md) ================================================ FILE: benches/benchmark/markdown/many_links/many_links.md ================================================ # Torrentur suum abstrahor quique Iuppiter rerum mediocris [many_links](./many_links.md) [a Stack data structure](https://en.wikipedia.org/wiki/Stack_(abstract_data_type)) ## Siquid suis Anguem sola Lorem markdownum illum Hymenaee crudelius, sub magni, sic missa, sui quas dixit adde Othrys successor conspecta. Acuto fuit, hinc nata caedit dolentes. Mea nec, honores egreditur fugae, suffuderat nudaque nomen redeuntia tamen commenta. Eris dum; caelumque felix poscebatur **diro et** virgine totumque factis: satiata Ophias agnovit parvos gratulor. Spatio et hasta. Somni hic Pergama saeviat vincta. Quod fessa aethere stratoque, abluit hoc *erat*, vera sua protinus rati? Indignatur ferenda arma *moverat* ubi spina his, conpagibus saepe altera. Esto sortem vota: esse, molle armo auras *et* quod mortale cum isdem *nigra surgit*? [many_links](./many_links.md) ## Figuram ait credita auctus Stygiisque ventrem redimicula Et quemquam nec nostri, nil atque stabat unus. Defensamus numina! Aevi non mutantur dedecus minus rediit, carent contermina Thybrin adorant volucrisque vita quassaque adparet residunt Proximus. [many_links](./many_links.md) characterRwData(user_scrolling_wiki, -5, 76); if (wi_platform_cpl) { token_encoding_android.progressiveLeft = kbps_honeypot + motherboard; } if (uploadBus.iphone(ppl + 95, jquery - 436282)) { webmail.plug -= -3; log_expansion += mampProperty; } else { installer_error = koffice_wiki_upnp * analystMultitasking; whois_card_correction = rippingMemorySsid; } bar_pim += esports(33, hardSli * verticalBarcraft * systemAutoresponderFlowchart); var laser_vdsl = displayCaptcha + standaloneBrowser + character + 17; [many_links](./many_links.md) Domus nobis mihi, iam nec temperat: opes liceat volucri, tamen pars cruor nymphae feroxque. Et hiems audierat atque ora avia huic Sidone; ut non est nubes epota. Erat satos nec suo ut inhaesi ignes, est **fer praesens cum** genus. Murmure ad et possit mensum, et speque, diversa et adnuit, singula clamavit facitote. [many_links](./many_links.md) ## Deos Atlas [many_links](./many_links.md) Hausit telluris et tandem inscius. Negaretur manu, scopulis fuit vulgique, invenit putes locuta. ppp *= mini(intranetMmsLte, system_repository_ldap) + gis + 17 / san + 1; vdu = dataIpad; pci(2); offline_hardening_cycle += telnet_flash_spyware + cybercrimeFormulaRate(37); [many_links](./many_links.md) Fecit interea **sub Melaneus**, veniente loco matre. Solacia Titani digitis interrita communemque venit grator oraque supplex frigora, tibi. Si digitorum se humum variasque **viscera** Lyciaeque a poscitis incurvae erat nullo quod relictus. [many_links](./many_links.md) [many_links](./many_links.md) [many_links](./many_links.md) * [option one] foo * [option two]: bar ================================================ FILE: benches/benchmark/markdown/many_links.md ================================================ # Many Links [local_file](many_links.md) [folder](./deep) [https_link](https://www.google.de/) [https_link2](https://www.google.de/?hl=de) [mail](mailto://test.mail@tester.com) [unkown_url](another://foobar) ================================================ FILE: benches/benchmark/markdown/md_file_endings/F3_with_umlaut.md ================================================ # Torrentur suum abstrahor quique Iuppiter rerum mediocris ## Siquid suis Anguem sola Lorem markdownum illum Hymenaee crudelius, sub magni, sic missa, sui quas dixit adde Othrys successor conspecta. Acuto fuit, hinc nata caedit dolentes. Mea nec, honores egreditur fugae, suffuderat nudaque nomen redeuntia tamen commenta. Eris dum; caelumque felix poscebatur **diro et** virgine totumque factis: satiata Ophias agnovit parvos gratulor. Spatio et hasta. Somni hic Pergama saeviat vincta. Quod fessa aethere stratoque, abluit hoc *erat*, vera sua protinus rati? Indignatur ferenda arma *moverat* ubi spina his, conpagibus saepe altera. Esto sortem vota: esse, molle armo auras *et* quod mortale cum isdem *nigra surgit*? ## Figuram ait credita auctus Stygiisque ventrem redimicula Et quemquam nec nostri, nil atque stabat unus. Defensamus numina! Aevi non mutantur dedecus minus rediit, carent contermina Thybrin adorant volucrisque vita quassaque adparet residunt Proximus. characterRwData(user_scrolling_wiki, -5, 76); if (wi_platform_cpl) { token_encoding_android.progressiveLeft = kbps_honeypot + motherboard; } if (uploadBus.iphone(ppl + 95, jquery - 436282)) { webmail.plug -= -3; log_expansion += mampProperty; } else { installer_error = koffice_wiki_upnp * analystMultitasking; whois_card_correction = rippingMemorySsid; } bar_pim += esports(33, hardSli * verticalBarcraft * systemAutoresponderFlowchart); var laser_vdsl = displayCaptcha + standaloneBrowser + character + 17; Domus nobis mihi, iam nec temperat: opes liceat volucri, tamen pars cruor nymphae feroxque. Et hiems audierat atque ora avia huic Sidone; ut non est nubes epota. Erat satos nec suo ut inhaesi ignes, est **fer praesens cum** genus. Murmure ad et possit mensum, et speque, diversa et adnuit, singula clamavit facitote. ## Deos Atlas Hausit telluris et tandem inscius. Negaretur manu, scopulis fuit vulgique, invenit putes locuta. ppp *= mini(intranetMmsLte, system_repository_ldap) + gis + 17 / san + 1; vdu = dataIpad; pci(2); offline_hardening_cycle += telnet_flash_spyware + cybercrimeFormulaRate(37); Fecit interea **sub Melaneus**, veniente [si ipsum](NoLInk) loco matre. Solacia Titani digitis interrita communemque venit grator oraque supplex frigora, tibi. Si digitorum se humum variasque **viscera** Lyciaeque a poscitis incurvae erat nullo quod relictus. ================================================ FILE: benches/benchmark/markdown/md_file_endings/NotMardown.nm ================================================ ================================================ FILE: benches/benchmark/markdown/md_file_endings/f1.md ================================================ # Torrentur suum abstrahor quique Iuppiter rerum mediocris ## Siquid suis Anguem sola Lorem markdownum illum Hymenaee crudelius, sub magni, sic missa, sui quas dixit adde Othrys successor conspecta. Acuto fuit, hinc nata caedit dolentes. Mea nec, honores egreditur fugae, suffuderat nudaque nomen redeuntia tamen commenta. Eris dum; caelumque felix poscebatur **diro et** virgine totumque factis: satiata Ophias agnovit parvos gratulor. Spatio et hasta. Somni hic Pergama saeviat vincta. Quod fessa aethere stratoque, abluit hoc *erat*, vera sua protinus rati? Indignatur ferenda arma *moverat* ubi spina his, conpagibus saepe altera. Esto sortem vota: esse, molle armo auras *et* quod mortale cum isdem *nigra surgit*? ## Figuram ait credita auctus Stygiisque ventrem redimicula Et quemquam nec nostri, nil atque stabat unus. Defensamus numina! Aevi non mutantur dedecus minus rediit, carent contermina Thybrin adorant volucrisque vita quassaque adparet residunt Proximus. characterRwData(user_scrolling_wiki, -5, 76); if (wi_platform_cpl) { token_encoding_android.progressiveLeft = kbps_honeypot + motherboard; } if (uploadBus.iphone(ppl + 95, jquery - 436282)) { webmail.plug -= -3; log_expansion += mampProperty; } else { installer_error = koffice_wiki_upnp * analystMultitasking; whois_card_correction = rippingMemorySsid; } bar_pim += esports(33, hardSli * verticalBarcraft * systemAutoresponderFlowchart); var laser_vdsl = displayCaptcha + standaloneBrowser + character + 17; Domus nobis mihi, iam nec temperat: opes liceat volucri, tamen pars cruor nymphae feroxque. Et hiems audierat atque ora avia huic Sidone; ut non est nubes epota. Erat satos nec suo ut inhaesi ignes, est **fer praesens cum** genus. Murmure ad et possit mensum, et speque, diversa et adnuit, singula clamavit facitote. ## Deos Atlas Hausit telluris et tandem inscius. Negaretur manu, scopulis fuit vulgique, invenit putes locuta. ppp *= mini(intranetMmsLte, system_repository_ldap) + gis + 17 / san + 1; vdu = dataIpad; pci(2); offline_hardening_cycle += telnet_flash_spyware + cybercrimeFormulaRate(37); Fecit interea **sub Melaneus**, veniente [si ipsum](NoLInk) loco matre. Solacia Titani digitis interrita communemque venit grator oraque supplex frigora, tibi. Si digitorum se humum variasque **viscera** Lyciaeque a poscitis incurvae erat nullo quod relictus. ================================================ FILE: benches/benchmark/markdown/md_file_endings/f10.text ================================================ ================================================ FILE: benches/benchmark/markdown/md_file_endings/f11.Rmd ================================================ ================================================ FILE: benches/benchmark/markdown/md_file_endings/f12.mkd ================================================ ================================================ FILE: benches/benchmark/markdown/md_file_endings/f2.MD ================================================ # Torrentur suum abstrahor quique Iuppiter rerum mediocris ## Siquid suis Anguem sola Lorem markdownum illum Hymenaee crudelius, sub magni, sic missa, sui quas dixit adde Othrys successor conspecta. Acuto fuit, hinc nata caedit dolentes. Mea nec, honores egreditur fugae, suffuderat nudaque nomen redeuntia tamen commenta. Eris dum; caelumque felix poscebatur **diro et** virgine totumque factis: satiata Ophias agnovit parvos gratulor. Spatio et hasta. Somni hic Pergama saeviat vincta. Quod fessa aethere stratoque, abluit hoc *erat*, vera sua protinus rati? Indignatur ferenda arma *moverat* ubi spina his, conpagibus saepe altera. Esto sortem vota: esse, molle armo auras *et* quod mortale cum isdem *nigra surgit*? ## Figuram ait credita auctus Stygiisque ventrem redimicula Et quemquam nec nostri, nil atque stabat unus. Defensamus numina! Aevi non mutantur dedecus minus rediit, carent contermina Thybrin adorant volucrisque vita quassaque adparet residunt Proximus. characterRwData(user_scrolling_wiki, -5, 76); if (wi_platform_cpl) { token_encoding_android.progressiveLeft = kbps_honeypot + motherboard; } if (uploadBus.iphone(ppl + 95, jquery - 436282)) { webmail.plug -= -3; log_expansion += mampProperty; } else { installer_error = koffice_wiki_upnp * analystMultitasking; whois_card_correction = rippingMemorySsid; } bar_pim += esports(33, hardSli * verticalBarcraft * systemAutoresponderFlowchart); var laser_vdsl = displayCaptcha + standaloneBrowser + character + 17; Domus nobis mihi, iam nec temperat: opes liceat volucri, tamen pars cruor nymphae feroxque. Et hiems audierat atque ora avia huic Sidone; ut non est nubes epota. Erat satos nec suo ut inhaesi ignes, est **fer praesens cum** genus. Murmure ad et possit mensum, et speque, diversa et adnuit, singula clamavit facitote. ## Deos Atlas Hausit telluris et tandem inscius. Negaretur manu, scopulis fuit vulgique, invenit putes locuta. ppp *= mini(intranetMmsLte, system_repository_ldap) + gis + 17 / san + 1; vdu = dataIpad; pci(2); offline_hardening_cycle += telnet_flash_spyware + cybercrimeFormulaRate(37); Fecit interea **sub Melaneus**, veniente [si ipsum](NoLInk) loco matre. Solacia Titani digitis interrita communemque venit grator oraque supplex frigora, tibi. Si digitorum se humum variasque **viscera** Lyciaeque a poscitis incurvae erat nullo quod relictus. ================================================ FILE: benches/benchmark/markdown/md_file_endings/f4.markdown ================================================ ================================================ FILE: benches/benchmark/markdown/md_file_endings/f5.mkdown ================================================ ================================================ FILE: benches/benchmark/markdown/md_file_endings/f6.mkdn ================================================ ================================================ FILE: benches/benchmark/markdown/md_file_endings/f7.mdwn ================================================ ================================================ FILE: benches/benchmark/markdown/md_file_endings/f8.mdtxt ================================================ ================================================ FILE: benches/benchmark/markdown/md_file_endings/f9.mdtext ================================================ ================================================ FILE: benches/benchmark/markdown/md_file_endings/notmd ================================================ Domus nobis mihi, iam nec temperat: opes liceat volucri, tamen pars cruor nymphae feroxque. Et hiems audierat atque ora avia huic Sidone; ut non est nubes epota. Erat satos nec suo ut inhaesi ignes, est **fer praesens cum** genus. Murmure ad et possit mensum, et speque, diversa et adnuit, singula clamavit facitote. ================================================ FILE: benches/benchmark/markdown/no_links/no_links (3rd copy).md ================================================ # Torrentur suum abstrahor quique Iuppiter rerum mediocris ## Siquid suis Anguem sola Lorem markdownum illum Hymenaee crudelius, sub magni, sic missa, sui quas dixit adde Othrys successor conspecta. Acuto fuit, hinc nata caedit dolentes. Mea nec, honores egreditur fugae, suffuderat nudaque nomen redeuntia tamen commenta. Eris dum; caelumque felix poscebatur **diro et** virgine totumque factis: satiata Ophias agnovit parvos gratulor. Spatio et hasta. Somni hic Pergama saeviat vincta. Quod fessa aethere stratoque, abluit hoc *erat*, vera sua protinus rati? Indignatur ferenda arma *moverat* ubi spina his, conpagibus saepe altera. Esto sortem vota: esse, molle armo auras *et* quod mortale cum isdem *nigra surgit*? ## Figuram ait credita auctus Stygiisque ventrem redimicula Et quemquam nec nostri, nil atque stabat unus. Defensamus numina! Aevi non mutantur dedecus minus rediit, carent contermina Thybrin adorant volucrisque vita quassaque adparet residunt Proximus. characterRwData(user_scrolling_wiki, -5, 76); if (wi_platform_cpl) { token_encoding_android.progressiveLeft = kbps_honeypot + motherboard; } if (uploadBus.iphone(ppl + 95, jquery - 436282)) { webmail.plug -= -3; log_expansion += mampProperty; } else { installer_error = koffice_wiki_upnp * analystMultitasking; whois_card_correction = rippingMemorySsid; } bar_pim += esports(33, hardSli * verticalBarcraft * systemAutoresponderFlowchart); var laser_vdsl = displayCaptcha + standaloneBrowser + character + 17; Domus nobis mihi, iam nec temperat: opes liceat volucri, tamen pars cruor nymphae feroxque. Et hiems audierat atque ora avia huic Sidone; ut non est nubes epota. Erat satos nec suo ut inhaesi ignes, est **fer praesens cum** genus. Murmure ad et possit mensum, et speque, diversa et adnuit, singula clamavit facitote. ## Deos Atlas Hausit telluris et tandem inscius. Negaretur manu, scopulis fuit vulgique, invenit putes locuta. ppp *= mini(intranetMmsLte, system_repository_ldap) + gis + 17 / san + 1; vdu = dataIpad; pci(2); offline_hardening_cycle += telnet_flash_spyware + cybercrimeFormulaRate(37); Fecit interea **sub Melaneus**, veniente loco matre. Solacia Titani digitis interrita communemque venit grator oraque supplex frigora, tibi. Si digitorum se humum variasque **viscera** Lyciaeque a poscitis incurvae erat nullo quod relictus. ================================================ FILE: benches/benchmark/markdown/no_links/no_links (4th copy).md ================================================ # Torrentur suum abstrahor quique Iuppiter rerum mediocris ## Siquid suis Anguem sola Lorem markdownum illum Hymenaee crudelius, sub magni, sic missa, sui quas dixit adde Othrys successor conspecta. Acuto fuit, hinc nata caedit dolentes. Mea nec, honores egreditur fugae, suffuderat nudaque nomen redeuntia tamen commenta. Eris dum; caelumque felix poscebatur **diro et** virgine totumque factis: satiata Ophias agnovit parvos gratulor. Spatio et hasta. Somni hic Pergama saeviat vincta. Quod fessa aethere stratoque, abluit hoc *erat*, vera sua protinus rati? Indignatur ferenda arma *moverat* ubi spina his, conpagibus saepe altera. Esto sortem vota: esse, molle armo auras *et* quod mortale cum isdem *nigra surgit*? ## Figuram ait credita auctus Stygiisque ventrem redimicula Et quemquam nec nostri, nil atque stabat unus. Defensamus numina! Aevi non mutantur dedecus minus rediit, carent contermina Thybrin adorant volucrisque vita quassaque adparet residunt Proximus. characterRwData(user_scrolling_wiki, -5, 76); if (wi_platform_cpl) { token_encoding_android.progressiveLeft = kbps_honeypot + motherboard; } if (uploadBus.iphone(ppl + 95, jquery - 436282)) { webmail.plug -= -3; log_expansion += mampProperty; } else { installer_error = koffice_wiki_upnp * analystMultitasking; whois_card_correction = rippingMemorySsid; } bar_pim += esports(33, hardSli * verticalBarcraft * systemAutoresponderFlowchart); var laser_vdsl = displayCaptcha + standaloneBrowser + character + 17; Domus nobis mihi, iam nec temperat: opes liceat volucri, tamen pars cruor nymphae feroxque. Et hiems audierat atque ora avia huic Sidone; ut non est nubes epota. Erat satos nec suo ut inhaesi ignes, est **fer praesens cum** genus. Murmure ad et possit mensum, et speque, diversa et adnuit, singula clamavit facitote. ## Deos Atlas Hausit telluris et tandem inscius. Negaretur manu, scopulis fuit vulgique, invenit putes locuta. ppp *= mini(intranetMmsLte, system_repository_ldap) + gis + 17 / san + 1; vdu = dataIpad; pci(2); offline_hardening_cycle += telnet_flash_spyware + cybercrimeFormulaRate(37); Fecit interea **sub Melaneus**, veniente loco matre. Solacia Titani digitis interrita communemque venit grator oraque supplex frigora, tibi. Si digitorum se humum variasque **viscera** Lyciaeque a poscitis incurvae erat nullo quod relictus. ================================================ FILE: benches/benchmark/markdown/no_links/no_links (5th copy).md ================================================ # Torrentur suum abstrahor quique Iuppiter rerum mediocris ## Siquid suis Anguem sola Lorem markdownum illum Hymenaee crudelius, sub magni, sic missa, sui quas dixit adde Othrys successor conspecta. Acuto fuit, hinc nata caedit dolentes. Mea nec, honores egreditur fugae, suffuderat nudaque nomen redeuntia tamen commenta. Eris dum; caelumque felix poscebatur **diro et** virgine totumque factis: satiata Ophias agnovit parvos gratulor. Spatio et hasta. Somni hic Pergama saeviat vincta. Quod fessa aethere stratoque, abluit hoc *erat*, vera sua protinus rati? Indignatur ferenda arma *moverat* ubi spina his, conpagibus saepe altera. Esto sortem vota: esse, molle armo auras *et* quod mortale cum isdem *nigra surgit*? ## Figuram ait credita auctus Stygiisque ventrem redimicula Et quemquam nec nostri, nil atque stabat unus. Defensamus numina! Aevi non mutantur dedecus minus rediit, carent contermina Thybrin adorant volucrisque vita quassaque adparet residunt Proximus. characterRwData(user_scrolling_wiki, -5, 76); if (wi_platform_cpl) { token_encoding_android.progressiveLeft = kbps_honeypot + motherboard; } if (uploadBus.iphone(ppl + 95, jquery - 436282)) { webmail.plug -= -3; log_expansion += mampProperty; } else { installer_error = koffice_wiki_upnp * analystMultitasking; whois_card_correction = rippingMemorySsid; } bar_pim += esports(33, hardSli * verticalBarcraft * systemAutoresponderFlowchart); var laser_vdsl = displayCaptcha + standaloneBrowser + character + 17; Domus nobis mihi, iam nec temperat: opes liceat volucri, tamen pars cruor nymphae feroxque. Et hiems audierat atque ora avia huic Sidone; ut non est nubes epota. Erat satos nec suo ut inhaesi ignes, est **fer praesens cum** genus. Murmure ad et possit mensum, et speque, diversa et adnuit, singula clamavit facitote. ## Deos Atlas Hausit telluris et tandem inscius. Negaretur manu, scopulis fuit vulgique, invenit putes locuta. ppp *= mini(intranetMmsLte, system_repository_ldap) + gis + 17 / san + 1; vdu = dataIpad; pci(2); offline_hardening_cycle += telnet_flash_spyware + cybercrimeFormulaRate(37); Fecit interea **sub Melaneus**, veniente loco matre. Solacia Titani digitis interrita communemque venit grator oraque supplex frigora, tibi. Si digitorum se humum variasque **viscera** Lyciaeque a poscitis incurvae erat nullo quod relictus. ================================================ FILE: benches/benchmark/markdown/no_links/no_links (6th copy).md ================================================ # Torrentur suum abstrahor quique Iuppiter rerum mediocris ## Siquid suis Anguem sola Lorem markdownum illum Hymenaee crudelius, sub magni, sic missa, sui quas dixit adde Othrys successor conspecta. Acuto fuit, hinc nata caedit dolentes. Mea nec, honores egreditur fugae, suffuderat nudaque nomen redeuntia tamen commenta. Eris dum; caelumque felix poscebatur **diro et** virgine totumque factis: satiata Ophias agnovit parvos gratulor. Spatio et hasta. Somni hic Pergama saeviat vincta. Quod fessa aethere stratoque, abluit hoc *erat*, vera sua protinus rati? Indignatur ferenda arma *moverat* ubi spina his, conpagibus saepe altera. Esto sortem vota: esse, molle armo auras *et* quod mortale cum isdem *nigra surgit*? ## Figuram ait credita auctus Stygiisque ventrem redimicula Et quemquam nec nostri, nil atque stabat unus. Defensamus numina! Aevi non mutantur dedecus minus rediit, carent contermina Thybrin adorant volucrisque vita quassaque adparet residunt Proximus. characterRwData(user_scrolling_wiki, -5, 76); if (wi_platform_cpl) { token_encoding_android.progressiveLeft = kbps_honeypot + motherboard; } if (uploadBus.iphone(ppl + 95, jquery - 436282)) { webmail.plug -= -3; log_expansion += mampProperty; } else { installer_error = koffice_wiki_upnp * analystMultitasking; whois_card_correction = rippingMemorySsid; } bar_pim += esports(33, hardSli * verticalBarcraft * systemAutoresponderFlowchart); var laser_vdsl = displayCaptcha + standaloneBrowser + character + 17; Domus nobis mihi, iam nec temperat: opes liceat volucri, tamen pars cruor nymphae feroxque. Et hiems audierat atque ora avia huic Sidone; ut non est nubes epota. Erat satos nec suo ut inhaesi ignes, est **fer praesens cum** genus. Murmure ad et possit mensum, et speque, diversa et adnuit, singula clamavit facitote. ## Deos Atlas Hausit telluris et tandem inscius. Negaretur manu, scopulis fuit vulgique, invenit putes locuta. ppp *= mini(intranetMmsLte, system_repository_ldap) + gis + 17 / san + 1; vdu = dataIpad; pci(2); offline_hardening_cycle += telnet_flash_spyware + cybercrimeFormulaRate(37); Fecit interea **sub Melaneus**, veniente loco matre. Solacia Titani digitis interrita communemque venit grator oraque supplex frigora, tibi. Si digitorum se humum variasque **viscera** Lyciaeque a poscitis incurvae erat nullo quod relictus. ================================================ FILE: benches/benchmark/markdown/no_links/no_links (7th copy).md ================================================ # Torrentur suum abstrahor quique Iuppiter rerum mediocris ## Siquid suis Anguem sola Lorem markdownum illum Hymenaee crudelius, sub magni, sic missa, sui quas dixit adde Othrys successor conspecta. Acuto fuit, hinc nata caedit dolentes. Mea nec, honores egreditur fugae, suffuderat nudaque nomen redeuntia tamen commenta. Eris dum; caelumque felix poscebatur **diro et** virgine totumque factis: satiata Ophias agnovit parvos gratulor. Spatio et hasta. Somni hic Pergama saeviat vincta. Quod fessa aethere stratoque, abluit hoc *erat*, vera sua protinus rati? Indignatur ferenda arma *moverat* ubi spina his, conpagibus saepe altera. Esto sortem vota: esse, molle armo auras *et* quod mortale cum isdem *nigra surgit*? ## Figuram ait credita auctus Stygiisque ventrem redimicula Et quemquam nec nostri, nil atque stabat unus. Defensamus numina! Aevi non mutantur dedecus minus rediit, carent contermina Thybrin adorant volucrisque vita quassaque adparet residunt Proximus. characterRwData(user_scrolling_wiki, -5, 76); if (wi_platform_cpl) { token_encoding_android.progressiveLeft = kbps_honeypot + motherboard; } if (uploadBus.iphone(ppl + 95, jquery - 436282)) { webmail.plug -= -3; log_expansion += mampProperty; } else { installer_error = koffice_wiki_upnp * analystMultitasking; whois_card_correction = rippingMemorySsid; } bar_pim += esports(33, hardSli * verticalBarcraft * systemAutoresponderFlowchart); var laser_vdsl = displayCaptcha + standaloneBrowser + character + 17; Domus nobis mihi, iam nec temperat: opes liceat volucri, tamen pars cruor nymphae feroxque. Et hiems audierat atque ora avia huic Sidone; ut non est nubes epota. Erat satos nec suo ut inhaesi ignes, est **fer praesens cum** genus. Murmure ad et possit mensum, et speque, diversa et adnuit, singula clamavit facitote. ## Deos Atlas Hausit telluris et tandem inscius. Negaretur manu, scopulis fuit vulgique, invenit putes locuta. ppp *= mini(intranetMmsLte, system_repository_ldap) + gis + 17 / san + 1; vdu = dataIpad; pci(2); offline_hardening_cycle += telnet_flash_spyware + cybercrimeFormulaRate(37); Fecit interea **sub Melaneus**, veniente loco matre. Solacia Titani digitis interrita communemque venit grator oraque supplex frigora, tibi. Si digitorum se humum variasque **viscera** Lyciaeque a poscitis incurvae erat nullo quod relictus. ================================================ FILE: benches/benchmark/markdown/no_links/no_links (another copy).md ================================================ # Torrentur suum abstrahor quique Iuppiter rerum mediocris ## Siquid suis Anguem sola Lorem markdownum illum Hymenaee crudelius, sub magni, sic missa, sui quas dixit adde Othrys successor conspecta. Acuto fuit, hinc nata caedit dolentes. Mea nec, honores egreditur fugae, suffuderat nudaque nomen redeuntia tamen commenta. Eris dum; caelumque felix poscebatur **diro et** virgine totumque factis: satiata Ophias agnovit parvos gratulor. Spatio et hasta. Somni hic Pergama saeviat vincta. Quod fessa aethere stratoque, abluit hoc *erat*, vera sua protinus rati? Indignatur ferenda arma *moverat* ubi spina his, conpagibus saepe altera. Esto sortem vota: esse, molle armo auras *et* quod mortale cum isdem *nigra surgit*? ## Figuram ait credita auctus Stygiisque ventrem redimicula Et quemquam nec nostri, nil atque stabat unus. Defensamus numina! Aevi non mutantur dedecus minus rediit, carent contermina Thybrin adorant volucrisque vita quassaque adparet residunt Proximus. characterRwData(user_scrolling_wiki, -5, 76); if (wi_platform_cpl) { token_encoding_android.progressiveLeft = kbps_honeypot + motherboard; } if (uploadBus.iphone(ppl + 95, jquery - 436282)) { webmail.plug -= -3; log_expansion += mampProperty; } else { installer_error = koffice_wiki_upnp * analystMultitasking; whois_card_correction = rippingMemorySsid; } bar_pim += esports(33, hardSli * verticalBarcraft * systemAutoresponderFlowchart); var laser_vdsl = displayCaptcha + standaloneBrowser + character + 17; Domus nobis mihi, iam nec temperat: opes liceat volucri, tamen pars cruor nymphae feroxque. Et hiems audierat atque ora avia huic Sidone; ut non est nubes epota. Erat satos nec suo ut inhaesi ignes, est **fer praesens cum** genus. Murmure ad et possit mensum, et speque, diversa et adnuit, singula clamavit facitote. ## Deos Atlas Hausit telluris et tandem inscius. Negaretur manu, scopulis fuit vulgique, invenit putes locuta. ppp *= mini(intranetMmsLte, system_repository_ldap) + gis + 17 / san + 1; vdu = dataIpad; pci(2); offline_hardening_cycle += telnet_flash_spyware + cybercrimeFormulaRate(37); Fecit interea **sub Melaneus**, veniente loco matre. Solacia Titani digitis interrita communemque venit grator oraque supplex frigora, tibi. Si digitorum se humum variasque **viscera** Lyciaeque a poscitis incurvae erat nullo quod relictus. ================================================ FILE: benches/benchmark/markdown/no_links/no_links (copy).md ================================================ # Torrentur suum abstrahor quique Iuppiter rerum mediocris ## Siquid suis Anguem sola Lorem markdownum illum Hymenaee crudelius, sub magni, sic missa, sui quas dixit adde Othrys successor conspecta. Acuto fuit, hinc nata caedit dolentes. Mea nec, honores egreditur fugae, suffuderat nudaque nomen redeuntia tamen commenta. Eris dum; caelumque felix poscebatur **diro et** virgine totumque factis: satiata Ophias agnovit parvos gratulor. Spatio et hasta. Somni hic Pergama saeviat vincta. Quod fessa aethere stratoque, abluit hoc *erat*, vera sua protinus rati? Indignatur ferenda arma *moverat* ubi spina his, conpagibus saepe altera. Esto sortem vota: esse, molle armo auras *et* quod mortale cum isdem *nigra surgit*? ## Figuram ait credita auctus Stygiisque ventrem redimicula Et quemquam nec nostri, nil atque stabat unus. Defensamus numina! Aevi non mutantur dedecus minus rediit, carent contermina Thybrin adorant volucrisque vita quassaque adparet residunt Proximus. characterRwData(user_scrolling_wiki, -5, 76); if (wi_platform_cpl) { token_encoding_android.progressiveLeft = kbps_honeypot + motherboard; } if (uploadBus.iphone(ppl + 95, jquery - 436282)) { webmail.plug -= -3; log_expansion += mampProperty; } else { installer_error = koffice_wiki_upnp * analystMultitasking; whois_card_correction = rippingMemorySsid; } bar_pim += esports(33, hardSli * verticalBarcraft * systemAutoresponderFlowchart); var laser_vdsl = displayCaptcha + standaloneBrowser + character + 17; Domus nobis mihi, iam nec temperat: opes liceat volucri, tamen pars cruor nymphae feroxque. Et hiems audierat atque ora avia huic Sidone; ut non est nubes epota. Erat satos nec suo ut inhaesi ignes, est **fer praesens cum** genus. Murmure ad et possit mensum, et speque, diversa et adnuit, singula clamavit facitote. ## Deos Atlas Hausit telluris et tandem inscius. Negaretur manu, scopulis fuit vulgique, invenit putes locuta. ppp *= mini(intranetMmsLte, system_repository_ldap) + gis + 17 / san + 1; vdu = dataIpad; pci(2); offline_hardening_cycle += telnet_flash_spyware + cybercrimeFormulaRate(37); Fecit interea **sub Melaneus**, veniente loco matre. Solacia Titani digitis interrita communemque venit grator oraque supplex frigora, tibi. Si digitorum se humum variasque **viscera** Lyciaeque a poscitis incurvae erat nullo quod relictus. ================================================ FILE: benches/benchmark/markdown/no_links/no_links.md ================================================ # Torrentur suum abstrahor quique Iuppiter rerum mediocris ## Siquid suis Anguem sola Lorem markdownum illum Hymenaee crudelius, sub magni, sic missa, sui quas dixit adde Othrys successor conspecta. Acuto fuit, hinc nata caedit dolentes. Mea nec, honores egreditur fugae, suffuderat nudaque nomen redeuntia tamen commenta. Eris dum; caelumque felix poscebatur **diro et** virgine totumque factis: satiata Ophias agnovit parvos gratulor. Spatio et hasta. Somni hic Pergama saeviat vincta. Quod fessa aethere stratoque, abluit hoc *erat*, vera sua protinus rati? Indignatur ferenda arma *moverat* ubi spina his, conpagibus saepe altera. Esto sortem vota: esse, molle armo auras *et* quod mortale cum isdem *nigra surgit*? ## Figuram ait credita auctus Stygiisque ventrem redimicula Et quemquam nec nostri, nil atque stabat unus. Defensamus numina! Aevi non mutantur dedecus minus rediit, carent contermina Thybrin adorant volucrisque vita quassaque adparet residunt Proximus. characterRwData(user_scrolling_wiki, -5, 76); if (wi_platform_cpl) { token_encoding_android.progressiveLeft = kbps_honeypot + motherboard; } if (uploadBus.iphone(ppl + 95, jquery - 436282)) { webmail.plug -= -3; log_expansion += mampProperty; } else { installer_error = koffice_wiki_upnp * analystMultitasking; whois_card_correction = rippingMemorySsid; } bar_pim += esports(33, hardSli * verticalBarcraft * systemAutoresponderFlowchart); var laser_vdsl = displayCaptcha + standaloneBrowser + character + 17; Domus nobis mihi, iam nec temperat: opes liceat volucri, tamen pars cruor nymphae feroxque. Et hiems audierat atque ora avia huic Sidone; ut non est nubes epota. Erat satos nec suo ut inhaesi ignes, est **fer praesens cum** genus. Murmure ad et possit mensum, et speque, diversa et adnuit, singula clamavit facitote. ## Deos Atlas Hausit telluris et tandem inscius. Negaretur manu, scopulis fuit vulgique, invenit putes locuta. ppp *= mini(intranetMmsLte, system_repository_ldap) + gis + 17 / san + 1; vdu = dataIpad; pci(2); offline_hardening_cycle += telnet_flash_spyware + cybercrimeFormulaRate(37); Fecit interea **sub Melaneus**, veniente loco matre. Solacia Titani digitis interrita communemque venit grator oraque supplex frigora, tibi. Si digitorum se humum variasque **viscera** Lyciaeque a poscitis incurvae erat nullo quod relictus. ================================================ FILE: benches/benchmark/markdown/ref_links.md ================================================ [link1][1] another Link: [link2][foo] Use link again [link1 agin][1] LKJDF [1]: ./ref_links.md [foo]: ./ref_links.md This aint no link [boo]: ./not_existent.md # Ref Link Chapter ================================================ FILE: benches/benchmark/markdown/reference_link.md ================================================ # Contain reference style markdown links [I'm a reference-style link][Arbitrary case-insensitive reference text] [I'm a relative reference to a repository file](./many_links.md) [You can use numbers for reference-style link definitions][1] Or leave it empty and use the [link text itself]. [This is not a valid reference link][2] URLs and URLs in angle brackets will automatically get turned into links. or and sometimes example.com (but not on Github, for example). Some text to show that the reference links can follow later. [arbitrary case-insensitive reference text]: https://www.mozilla.org [1]: http://slashdot.org [link text itself]: https://www.google.com ================================================ FILE: benches/benchmark/markdown/repeate_same_link.md ================================================ # Chapter 1 [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) [Google](https://google.de) ================================================ FILE: benches/benchmark/markdown/script_and_comments.md ================================================ [this is a link](./script_and_comments.md) ``` js [this is not a link](./nowhere) ``` [this is a link](./script_and_comments.md) also not a link `[this is not a link](./nowhere)` ``` [this is not a link](./nowhere) ``` [this is a link](./script_and_comments.md) [this is a link](./script_and_comments.md) [this is a link](./script_and_comments.md) ================================================ FILE: benches/benchmark/markdown/withUmlaut_ö/LinksWithUmläuts.md ================================================ # Torrentur suum abstrahor quique Iuppiter rerum mediocris [many_links](./LinksWithUmläuts.md) ================================================ FILE: benches/benchmarks.rs ================================================ #[cfg(test)] #[macro_use] extern crate criterion; use criterion::Criterion; use mlc::markup::MarkupType; use mlc::{Config, OptionalConfig}; use std::fs; async fn end_to_end_benchmark() { let config = Config { directory: fs::canonicalize("./benches/benchmark/markdown/ignore_me_dir").unwrap(), optional: OptionalConfig { markup_types: Some(vec![MarkupType::Markdown]), ..Default::default() }, }; mlc::run(&config).await.unwrap(); } fn criterion_benchmark(c: &mut Criterion) { c.bench_function("End to end benchmark", |b| b.iter(end_to_end_benchmark)); } criterion_group! { name = benches; config = Criterion::default().sample_size(10); targets = criterion_benchmark } criterion_main!(benches); ================================================ FILE: benches/different_root/one/two.md ================================================ [one](\one.md) [two](/two.md) ================================================ FILE: benches/different_root/one.md ================================================ [one](/one.md) [two](/two.md) [two](/one/two.md) ================================================ FILE: benches/different_root/two.md ================================================ [one](/one.md) [two](\two.md) [two](/one/two.md) ================================================ FILE: benches/throttle/different_host.md ================================================ # Chapter 1 [fooRandomNotValidURLBla0](https://fooRandomNotValidURLBla0.de/f0) [fooRandomNotValidURLBla1](https://fooRandomNotValidURLBla1.de/f1) [fooRandomNotValidURLBla2](https://fooRandomNotValidURLBla2.de/f2) [fooRandomNotValidURLBla3](https://fooRandomNotValidURLBla3.de/f3) [fooRandomNotValidURLBla4](https://fooRandomNotValidURLBla4.de/f4) [fooRandomNotValidURLBla5](https://fooRandomNotValidURLBla5.de/f5) [fooRandomNotValidURLBla6](https://fooRandomNotValidURLBla6.de/f6) [fooRandomNotValidURLBla7](https://fooRandomNotValidURLBla7.de/f7) [fooRandomNotValidURLBla8](https://fooRandomNotValidURLBla8.de/f8) [fooRandomNotValidURLBla9](https://fooRandomNotValidURLBla9.de/f9) ================================================ FILE: benches/throttle/same_host.md ================================================ # Chapter 1 [foo](https://fooRandomNotValidURLBla0.de/f0) [foo](https://fooRandomNotValidURLBla0.de/f1) [foo](https://fooRandomNotValidURLBla0.de/f2) [foo](https://fooRandomNotValidURLBla0.de/f3) [foo](https://fooRandomNotValidURLBla0.de/f4) [foo](https://fooRandomNotValidURLBla0.de/f5) [foo](https://fooRandomNotValidURLBla0.de/f6) [foo](https://fooRandomNotValidURLBla0.de/f7) [foo](https://fooRandomNotValidURLBla0.de/f8) [foo](https://fooRandomNotValidURLBla0.de/f9) ================================================ FILE: benches/throttle/same_ip.md ================================================ # Chapter 1 [foo](https://127.0.0.1/f0) [foo](https://127.0.0.1/f1) [foo](https://127.0.0.1/f2) [foo](https://127.0.0.1/f3) [foo](https://127.0.0.1/f4) [foo](https://127.0.0.1/f5) [foo](https://127.0.0.1/f6) [foo](https://127.0.0.1/f7) [foo](https://127.0.0.1/f8) [foo](https://127.0.0.1/f9) ================================================ FILE: entrypoint.sh ================================================ #!/bin/bash mlc $* ================================================ FILE: release.toml ================================================ pre-release-replacements = [ {file="README.md", search="releases/download/v[0-9\\.-]+", replace="releases/download/v{{version}}"}, {file="CHANGELOG.md", search="Unreleased", replace="{{version}}"}, {file="CHANGELOG.md", search="ReleaseDate", replace="{{date}}"}, {file="CHANGELOG.md", search="", replace="\n\n## [Unreleased] - ReleaseDate"}, {file="GithubAction-Dockerfile", search="FROM becheran/mlc:[0-9\\.-]+", replace="FROM becheran/mlc:{{version}}"}, ] ================================================ FILE: src/cli.rs ================================================ use crate::markup::MarkupType; use crate::Config; use crate::OptionalConfig; use clap::Arg; use clap::ArgAction; use std::fs; use std::path::Path; use std::path::MAIN_SEPARATOR; const CONFIG_FILE_PATH: &str = "./.mlc.toml"; fn normalize_path_separators(path: &str) -> String { path.replace(['/', '\\'], std::path::MAIN_SEPARATOR_STR) } #[must_use] pub fn parse_args() -> Config { let mut opt: OptionalConfig = match fs::read_to_string(CONFIG_FILE_PATH) { Ok(content) => match toml::from_str(&content) { Ok(o) => o, Err(err) => panic!("Invalid TOML file {:?}", err), }, Err(_) => OptionalConfig::default(), }; if let Some(root_dir) = &opt.root_dir { if !root_dir.is_dir() { eprintln!("Root path {root_dir:?} must be an existing directory (from .mlc.toml)."); std::process::exit(1); } } let matches = command!() .arg( Arg::new("directory") .help("Check all links in given directory and subdirectory") .required(false) .index(1), ) .arg(arg!(-d --debug "Print debug information to console").required(false)) .arg( arg!(-o --offline "Do not check web links") .alias("no-web-links") .required(false), ) .arg( Arg::new("do-not-warn-for-redirect-to") .long("do-not-warn-for-redirect-to") .value_name("LINKS") .value_delimiter(',') .action(ArgAction::Append) .help("Comma separated list of links which will be ignored") .required(false), ) .arg( Arg::new("match-file-extension") .long("match-file-extension") .short('e') .action(ArgAction::SetTrue) .help("Check the exact file extension when searching for a file") .required(false), ) .arg( Arg::new("ignore-path") .long("ignore-path") .short('p') .help("Comma separated list of files and directories which will be ignored") .value_name("PATHS") .value_delimiter(',') .action(ArgAction::Append) .required(false), ) .arg( Arg::new("ignore-links") .long("ignore-links") .short('i') .value_name("LINKS") .value_delimiter(',') .action(ArgAction::Append) .help("Comma separated list of links which will be ignored") .required(false), ) .arg( Arg::new("markup-types") .long("markup-types") .short('t') .value_name("TYPES") .help("Comma separated list of markup types which shall be checked") .action(ArgAction::Append) .value_delimiter(',') .required(false), ) .arg( Arg::new("throttle") .long("throttle") .short('T') .value_name("DELAY-MS") .help("Wait time in milliseconds between http request to the same host") .action(ArgAction::Append) .required(false), ) .arg( Arg::new("root-dir") .long("root-dir") .short('r') .value_name("DIR") .help("Path to the root folder used to resolve all relative paths") .required(false), ) .arg( Arg::new("gitignore") .long("gitignore") .short('g') .value_name("GIT") .help("Ignore all files ignored by git") .action(ArgAction::SetTrue) .required(false), ) .arg( Arg::new("csv") .long("csv") .value_name("CSV_FILE") .help("set the output file for the CSV report") .required(false), ) .arg( Arg::new("gituntracked") .long("gituntracked") .short('u') .value_name("GITUNTRACKED") .help("Ignore all files untracked by git") .action(ArgAction::SetTrue) .required(false), ) .arg( Arg::new("files") .long("files") .short('f') .help("Comma separated list of files which shall be checked") .value_name("FILES") .value_delimiter(',') .action(ArgAction::Append) .required(false), ) .arg( Arg::new("http-headers") .long("http-headers") .short('H') .help("Comma separated list of custom HTTP headers in the format 'Name: Value'. For example 'User-Agent: Mozilla/5.0'") .value_name("HEADERS") .value_delimiter(',') .action(ArgAction::Append) .required(false), ) .get_matches(); let default_dir = format!(".{}", &MAIN_SEPARATOR); let dir_string = matches .get_one::("directory") .unwrap_or(&default_dir); let directory = normalize_path_separators(dir_string) .parse() .expect("failed to parse path"); if matches.get_flag("debug") { opt.debug = Some(true); } if let Some(do_not_warn_for_redirect_to) = matches.get_many::("do-not-warn-for-redirect-to") { opt.do_not_warn_for_redirect_to = Some(do_not_warn_for_redirect_to.map(|x| x.to_string()).collect()); } if let Some(throttle_str) = matches.get_one::("throttle") { let throttle = throttle_str.parse::().unwrap(); opt.throttle = Some(throttle); } if let Some(f) = matches.get_one::("csv") { opt.csv_file = Some(Path::new(&normalize_path_separators(f)).to_path_buf()); } if let Some(markup_types) = matches.get_many::("markup-types") { opt.markup_types = Some( markup_types .map(|v| v.as_str().parse().expect("invalid markup type")) .collect(), ); } if opt.markup_types.is_none() { opt.markup_types = Some(vec![MarkupType::Markdown, MarkupType::Html]); } if matches.get_flag("offline") { opt.offline = Some(true); } if matches.get_flag("match-file-extension") { opt.match_file_extension = Some(true) } if let Some(ignore_links) = matches.get_many::("ignore-links") { opt.ignore_links = Some(ignore_links.map(|x| x.to_string()).collect()); } if let Some(ignore_path) = matches.get_many::("ignore-path") { let mut paths: Vec<_> = ignore_path.map(|x| Path::new(x).to_path_buf()).collect(); for p in paths.iter_mut() { match fs::canonicalize(&p) { Ok(canonical_path) => { *p = canonical_path; } Err(e) => { println!("⚠ Warn: Ignore path {p:?} not found. {e:?}."); } }; } opt.ignore_path = Some(paths); } if matches.get_flag("gitignore") { opt.gitignore = Some(true); } if matches.get_flag("gituntracked") { opt.gituntracked = Some(true); } if let Some(files) = matches.get_many::("files") { let mut file_paths: Vec<_> = files .map(|x| Path::new(&normalize_path_separators(x)).to_path_buf()) .collect(); for p in file_paths.iter_mut() { match fs::canonicalize(&p) { Ok(canonical_path) => { *p = canonical_path; } Err(e) => { println!("⚠ Warn: File path {p:?} not found. {e:?}."); } }; } opt.files = Some(file_paths); } if let Some(http_headers) = matches.get_many::("http-headers") { opt.http_headers = Some(http_headers.map(|x| x.to_string()).collect()); } if let Some(root_dir) = matches.get_one::("root-dir") { let root_path = Path::new(&normalize_path_separators(root_dir)).to_path_buf(); if !root_path.is_dir() { eprintln!("Root path {root_path:?} must be a directory!"); std::process::exit(1); } opt.root_dir = Some(root_path) } Config { directory, optional: opt, } } ================================================ FILE: src/file_traversal.rs ================================================ extern crate walkdir; use crate::markup::{MarkupFile, MarkupType}; use crate::Config; use std::collections::HashSet; use std::fs; use std::path::PathBuf; use walkdir::WalkDir; /// Checks if a file path has already been seen and adds it to the set if not. /// Returns true if the file should be skipped (already seen), false otherwise. fn should_skip_file(seen_paths: &mut HashSet, abs_path: PathBuf, f_name: &str) -> bool { if seen_paths.contains(&abs_path) { debug!( "Skip file {f_name}, already checked via canonical path: {:?}", abs_path ); true } else { seen_paths.insert(abs_path); false } } pub fn find(config: &Config, result: &mut Vec) { let mut seen_paths: HashSet = HashSet::new(); let markup_types = match &config.optional.markup_types { Some(t) => t, None => panic!("Bug! markup_types must be set"), }; // If specific files are provided, process only those files if let Some(files) = &config.optional.files { info!("Checking specific files: {files:?}"); for file_path in files { if !file_path.exists() { warn!("File path '{file_path:?}' does not exist."); continue; } if !file_path.is_file() { warn!("Path '{file_path:?}' is not a file."); continue; } let f_name = file_path .file_name() .map(|n| n.to_string_lossy().to_string()) .unwrap_or_default(); debug!("Check file: '{f_name}'"); if let Some(markup_type) = markup_type(&f_name, markup_types) { let abs_path = match fs::canonicalize(file_path) { Ok(abs_path) => abs_path, Err(e) => { warn!("Path '{file_path:?}' not able to canonicalize path. '{e}'"); continue; } }; let ignore = match &config.optional.ignore_path { Some(p) => p.iter().any(|ignore_path| ignore_path == &abs_path), None => false, }; if ignore { debug!("Ignore file {f_name}, because it is in the ignore path list."); } else if !should_skip_file(&mut seen_paths, abs_path, &f_name) { let file = MarkupFile { markup_type, path: file_path.to_string_lossy().to_string(), }; debug!("Found file: {file:?}."); result.push(file); } } else { warn!("File '{f_name}' does not match any supported markup type."); } } return; } // Otherwise, use directory traversal let root = &config.directory; info!("Search for files of markup types '{markup_types:?}' in directory '{root:?}'"); for entry in WalkDir::new(root) .follow_links(false) .into_iter() .filter_entry(|e| { !(e.file_type().is_dir() && config.optional.ignore_path.as_ref().is_some_and(|x| { x.iter().any(|f| { let ignore = f.is_dir() && e.path() .canonicalize() .unwrap_or_default() .starts_with(fs::canonicalize(f).unwrap_or_default()); if ignore { info!("Ignore directory: '{f:?}'"); } ignore }) })) }) .filter_map(Result::ok) .filter(|e| !e.file_type().is_dir()) { let f_name = entry.file_name().to_string_lossy(); debug!("Check file: '{f_name}'"); if let Some(markup_type) = markup_type(&f_name, markup_types) { let path = entry.path(); let abs_path = match fs::canonicalize(path) { Ok(abs_path) => abs_path, Err(e) => { warn!("Path '{path:?}' not able to canonicalize path. '{e}'"); continue; } }; let ignore = match &config.optional.ignore_path { Some(p) => p.iter().any(|ignore_path| ignore_path == &abs_path), None => false, }; if ignore { debug!("Ignore file {f_name}, because it is in the ignore path list."); } else if !should_skip_file(&mut seen_paths, abs_path, &f_name) { let file = MarkupFile { markup_type, path: path.to_string_lossy().to_string(), }; debug!("Found file: {file:?}."); result.push(file); } } } } fn markup_type(file: &str, markup_types: &[MarkupType]) -> Option { let file_low = file.to_lowercase(); for markup_type in markup_types { let extensions = markup_type.file_extensions(); for ext in extensions { let mut ext_low = String::from("."); ext_low.push_str(&ext.to_lowercase()); if file_low.ends_with(&ext_low) { return Some(*markup_type); } } } None } ================================================ FILE: src/lib.rs ================================================ #[macro_use] extern crate log; #[macro_use] extern crate clap; #[macro_use] extern crate lazy_static; use crate::link_extractors::link_extractor::MarkupLink; use crate::link_validator::link_type::get_link_type; use crate::link_validator::link_type::LinkType; use crate::link_validator::resolve_target_link; use crate::markup::MarkupFile; use link_extractors::link_extractor::BrokenExtractedLink; use serde::Deserialize; use std::collections::HashMap; use std::env; use std::fmt; use std::fs; use std::io::Write; use std::path::Path; use std::path::PathBuf; use std::process::Command; use std::sync::Arc; use std::vec; use tokio::sync::Mutex; use tokio::time::{sleep_until, Duration, Instant}; pub mod cli; pub mod file_traversal; pub mod link_extractors; pub mod link_validator; pub mod logger; pub mod markup; pub use colored::*; pub use wildmatch::WildMatch; use futures::{stream, StreamExt}; use link_validator::LinkCheckResult; use url::Url; const PARALLEL_REQUESTS: usize = 20; #[derive(Default, Debug, Deserialize)] pub struct OptionalConfig { pub debug: Option, #[serde(rename(deserialize = "do-not-warn-for-redirect-to"))] pub do_not_warn_for_redirect_to: Option>, #[serde(rename(deserialize = "markup-types"))] pub markup_types: Option>, pub offline: Option, #[serde(rename(deserialize = "match-file-extension"))] pub match_file_extension: Option, #[serde(rename(deserialize = "ignore-links"))] pub ignore_links: Option>, #[serde(rename(deserialize = "ignore-path"))] pub ignore_path: Option>, #[serde(rename(deserialize = "root-dir"))] pub root_dir: Option, #[serde(rename(deserialize = "csv"))] pub csv_file: Option, #[serde(rename(deserialize = "gitignore"))] pub gitignore: Option, #[serde(rename(deserialize = "gituntracked"))] pub gituntracked: Option, pub throttle: Option, #[serde(rename(deserialize = "files"))] pub files: Option>, #[serde(rename(deserialize = "http-headers"))] pub http_headers: Option>, } #[derive(Default, Debug, Deserialize)] pub struct Config { pub directory: PathBuf, pub optional: OptionalConfig, } impl fmt::Display for Config { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let ignore_str: Vec = match &self.optional.ignore_links { Some(s) => s.iter().map(|m| m.to_string()).collect(), None => vec![], }; let root_dir_str = match &self.optional.root_dir { Some(p) => p.to_str().unwrap_or(""), None => "", }; let ignore_path_str: Vec = match &self.optional.ignore_path { Some(p) => p.iter().map(|m| m.to_str().unwrap().to_string()).collect(), None => vec![], }; let csv_file_str: Vec = match &self.optional.csv_file { Some(p) => p.iter().map(|m| m.to_str().unwrap().to_string()).collect(), None => vec![], }; let markup_types_str: Vec = match &self.optional.markup_types { Some(p) => p.iter().map(|m| format!["{m:?}"]).collect(), None => vec![], }; let files_str: Vec = match &self.optional.files { Some(p) => p.iter().map(|m| m.to_str().unwrap().to_string()).collect(), None => vec![], }; let http_headers_str: Vec = match &self.optional.http_headers { Some(h) => h.clone(), None => vec![], }; write!( f, " Debug: {:?} Dir: {} DoNotWarnForRedirectTo: {:?} Types: {:?} Offline: {} MatchExt: {} RootDir: {} Gitignore: {} Gituntracked: {} IgnoreLinks: {} IgnorePath: {:?} Throttle: {} ms CSVFile: {:?} Files: {:?} HttpHeaders: {:?}", self.optional.debug.unwrap_or(false), self.directory.to_str().unwrap_or_default(), self.optional.do_not_warn_for_redirect_to, markup_types_str, self.optional.offline.unwrap_or_default(), self.optional.match_file_extension.unwrap_or_default(), root_dir_str, self.optional.gitignore.unwrap_or_default(), self.optional.gituntracked.unwrap_or_default(), ignore_str.join(","), ignore_path_str, self.optional.throttle.unwrap_or(0), csv_file_str, files_str, http_headers_str ) } } #[derive(Debug, Clone)] struct FinalResult { target: Target, result_code: LinkCheckResult, } #[derive(Hash, PartialEq, Eq, Clone, Debug)] struct Target { target: String, link_type: LinkType, } fn find_all_links(config: &Config) -> Vec> { let mut files: Vec = Vec::new(); file_traversal::find(config, &mut files); let mut links = vec![]; for file in files { links.append(&mut link_extractors::link_extractor::find_links(&file)); } links } fn git_repo_root(scan_root: &Path) -> Option { let output = Command::new("git") .arg("-C") .arg(scan_root) .args(["rev-parse", "--show-toplevel"]) .output() .ok()?; if !output.status.success() { debug!( "git rev-parse failed: {}", String::from_utf8_lossy(&output.stderr) ); return None; } let root = String::from_utf8(output.stdout).ok()?; let root = root.trim(); if root.is_empty() { None } else { Some(PathBuf::from(root)) } } fn scan_root_dir(config: &Config) -> &Path { let p = config.directory.as_path(); if p.is_file() { p.parent().unwrap_or_else(|| Path::new(".")) } else { p } } fn find_git_ignored_files(config: &Config) -> Option> { let scan_root = scan_root_dir(config); let repo_root = git_repo_root(scan_root)?; // Limit ls-files to the scanned subtree so nested .gitignore files are respected // and we don't accidentally base results on the caller's current working directory. let output = Command::new("git") .arg("-C") .arg(&repo_root) .args([ "ls-files", "--ignored", "--others", "--exclude-standard", "--", ".", ]) .current_dir(scan_root) .output() .ok()?; if output.status.success() { let ignored_files = String::from_utf8(output.stdout) .ok()? .lines() .filter(|line| line.ends_with(".md") || line.ends_with(".html")) .filter_map(|line| { let rel = line.trim(); let full = repo_root.join(rel); fs::canonicalize(full).ok() }) .collect::>(); Some(ignored_files) } else { eprintln!( "git ls-files command failed: {}", String::from_utf8_lossy(&output.stderr) ); None } } fn find_git_untracked_files(config: &Config) -> Option> { let scan_root = scan_root_dir(config); let repo_root = git_repo_root(scan_root)?; let output = Command::new("git") .arg("-C") .arg(&repo_root) .args(["ls-files", "--others", "--exclude-standard", "--", "."]) .current_dir(scan_root) .output() .ok()?; if output.status.success() { let untracked_files = String::from_utf8(output.stdout) .ok()? .lines() .filter(|line| line.ends_with(".md") || line.ends_with(".html")) .filter_map(|line| { let rel = line.trim(); let full = repo_root.join(rel); fs::canonicalize(full).ok() }) .collect::>(); Some(untracked_files) } else { eprintln!( "git ls-files command failed: {}", String::from_utf8_lossy(&output.stderr) ); None } } fn print_helper( link: &MarkupLink, status_code: &colored::ColoredString, msg: &str, error_channel: bool, ) { let mut link_str = format!("[{:^4}] {}", status_code, link.source_str()); if !msg.is_empty() { link_str += &format!(" - {msg}"); } if error_channel { eprintln!("{link_str}"); } else { println!("{link_str}"); } } fn print_result(result: &FinalResult, map: &HashMap>) { for link in &map[&result.target] { match &result.result_code { LinkCheckResult::Ok => { print_helper(link, &"OK".green(), "", false); } LinkCheckResult::NotImplemented(msg) | LinkCheckResult::Warning(msg) => { print_helper(link, &"Warn".yellow(), msg, false); } LinkCheckResult::Ignored(msg) => { print_helper(link, &"Skip".green(), msg, false); } LinkCheckResult::Failed(msg) => { print_helper(link, &"Err".red(), msg, true); } } } } pub async fn run(config: &Config) -> Result<(), ()> { let links = find_all_links(config); let mut link_target_groups: HashMap> = HashMap::new(); let mut skipped = 0; let ignore_links: Vec = match &config.optional.ignore_links { Some(s) => s.iter().map(|m| WildMatch::new(m)).collect(), None => vec![], }; let gitignored_files: Option> = if config.optional.gitignore.is_some() { let files = find_git_ignored_files(config); debug!("Found gitignored files: {files:?}"); files } else { None }; let is_gitignore_enabled = gitignored_files.is_some(); let gituntracked_files: Option> = if config.optional.gituntracked.is_some() { let files = find_git_untracked_files(config); debug!("Found gituntracked files: {files:?}"); files } else { None }; let is_gituntracked_enabled = gituntracked_files.is_some(); let mut broken_references: Vec = vec![]; for link in &links { match link { Ok(link) => { let canonical_link_source = match fs::canonicalize(&link.source) { Ok(path) => path, Err(e) => { warn!( "Failed to canonicalize link source: {}. Error: {:?}", link.source, e ); continue; } }; if is_gitignore_enabled { if let Some(ref gif) = gitignored_files { if gif.iter().any(|path| path == &canonical_link_source) { print_helper( link, &"Skip".green(), "Ignore link because it is ignored by git.", false, ); skipped += 1; continue; } } } if is_gituntracked_enabled { if let Some(ref gif) = gituntracked_files { if gif.iter().any(|path| path == &canonical_link_source) { print_helper( link, &"Skip".green(), "Ignore link because it is untracked by git.", false, ); skipped += 1; continue; } } } if ignore_links.iter().any(|m| m.matches(&link.target)) { print_helper( link, &"Skip".green(), "Ignore link because of ignore-links option.", false, ); skipped += 1; continue; } let link_type = get_link_type(&link.target); let target = resolve_target_link(link, &link_type, config).await; let t = Target { target, link_type }; match link_target_groups.get_mut(&t) { Some(v) => v.push(link.clone()), None => { link_target_groups.insert(t, vec![link.clone()]); } } } Err(broken_reference) => { broken_references.push(broken_reference.clone()); } } } let do_not_warn_for_redirect_to: Arc> = Arc::new(match &config.optional.do_not_warn_for_redirect_to { Some(s) => s.iter().map(|m| WildMatch::new(m)).collect(), None => vec![], }); // Parse HTTP headers from config let http_headers: Arc> = Arc::new(match &config.optional.http_headers { Some(headers) => headers .iter() .filter_map(|h| { let parts: Vec<&str> = h.splitn(2, ':').collect(); if parts.len() == 2 { Some((parts[0].trim().to_string(), parts[1].trim().to_string())) } else { warn!("Invalid HTTP header format (expected 'Name: Value'): {}", h); None } }) .collect(), None => vec![], }); info!("Custom HTTP headers: {:?}", http_headers); let throttle = config.optional.throttle.unwrap_or_default() > 0; info!("Throttle HTTP requests to same host: {throttle:?}"); let waits = Arc::new(Mutex::new(HashMap::new())); // See also http://patshaughnessy.net/2020/1/20/downloading-100000-files-using-async-rust let mut buffered_stream = stream::iter(link_target_groups.keys()) .map(|target| { let waits = waits.clone(); let do_not_warn_for_redirect_to = Arc::clone(&do_not_warn_for_redirect_to); let http_headers = Arc::clone(&http_headers); async move { if throttle && target.link_type == LinkType::Http { let parsed = match Url::parse(&target.target) { Ok(parsed) => parsed, Err(error) => { return FinalResult { target: target.clone(), result_code: LinkCheckResult::Failed(format!( "Could not parse URL type. Err: {error:?}" )), } } }; let host = match parsed.host_str() { Some(host) => host.to_string(), None => { return FinalResult { target: target.clone(), result_code: LinkCheckResult::Failed( "Failed to determine host".to_string(), ), } } }; let mut waits = waits.lock().await; let mut wait_until: Option = None; let next_wait = match waits.get(&host) { Some(old) => { wait_until = Some(*old); *old + Duration::from_millis( config.optional.throttle.unwrap_or_default().into(), ) } None => { Instant::now() + Duration::from_millis( config.optional.throttle.unwrap_or_default().into(), ) } }; waits.insert(host, next_wait); drop(waits); if let Some(deadline) = wait_until { sleep_until(deadline).await; } } let result_code = link_validator::check( &target.target, &target.link_type, config, &do_not_warn_for_redirect_to, &http_headers, ) .await; FinalResult { target: target.clone(), result_code, } } }) .buffer_unordered(PARALLEL_REQUESTS); let mut oks = 0; let mut warnings = 0; let mut errors = vec![]; let mut warning_results = vec![]; let is_github_runner_env = env::var("GITHUB_ENV").is_ok(); if is_github_runner_env { info!("Running in github environment. Print errors and warnings as workflow commands"); } let mut process_result = |result: FinalResult| match &result.result_code { LinkCheckResult::Ok => { oks += link_target_groups[&result.target].len(); } LinkCheckResult::NotImplemented(msg) | LinkCheckResult::Warning(msg) => { warnings += link_target_groups[&result.target].len(); warning_results.push(result.clone()); if is_github_runner_env { for link in &link_target_groups[&result.target] { println!( "::warning file={},line={},col={},title=link checker warning::{}. {}", link.source, link.line, link.column, result.target.target, msg ); } } } LinkCheckResult::Ignored(_) => { skipped += link_target_groups[&result.target].len(); } LinkCheckResult::Failed(msg) => { errors.push(result.clone()); if is_github_runner_env { for link in &link_target_groups[&result.target] { println!( "::error file={},line={},col={},title=broken link::{}. {}", link.source, link.line, link.column, result.target.target, msg ); } } } }; while let Some(result) = buffered_stream.next().await { print_result(&result, &link_target_groups); process_result(result); } for broken_ref in &broken_references { warnings += 1; println!( "[{:^4}] {}:{}:{} => {} - {}", &"Warn".yellow(), broken_ref.source, broken_ref.line, broken_ref.column, broken_ref.reference, broken_ref.error ); } println!(); let error_sum: usize = errors .iter() .map(|e| link_target_groups[&e.target].len()) .sum(); let sum = skipped + error_sum + warnings + oks; println!("Result ({sum} links):"); println!(); println!("OK {oks}"); println!("Skipped {skipped}"); println!("Warnings {warnings}"); println!("Errors {error_sum}"); println!(); // Prepare CSV file if needed let mut csv_file = if let Some(csv_path) = &config.optional.csv_file { info!("Write CSV file: {}", csv_path.display()); let mut file = fs::File::create(csv_path).unwrap(); writeln!(file, "source,line,column,target,severity").unwrap(); Some(file) } else { None }; // Helper function to write warnings to CSV let write_warnings_to_csv = |csv_file: &mut Option| { if let Some(ref mut file) = csv_file { // Write link-based warnings for res in &warning_results { for link in &link_target_groups[&res.target] { writeln!( file, "{},{},{},{},WARN", link.source, link.line, link.column, link.target ) .unwrap(); } } // Write broken reference warnings for broken_ref in &broken_references { writeln!( file, "{},{},{},{},WARN", broken_ref.source, broken_ref.line, broken_ref.column, broken_ref.reference ) .unwrap(); } } }; if errors.is_empty() { write_warnings_to_csv(&mut csv_file); Ok(()) } else { println!(); println!("The following links could not be resolved:"); println!(); for res in &errors { for link in &link_target_groups[&res.target] { println!("{}", link.source_str()); if let Some(ref mut file) = csv_file { writeln!( file, "{},{},{},{},ERR", link.source, link.line, link.column, link.target ) .unwrap(); } } } write_warnings_to_csv(&mut csv_file); Err(()) } } ================================================ FILE: src/link_extractors/html_link_extractor.rs ================================================ use crate::link_extractors::link_extractor::LinkExtractor; use crate::link_extractors::link_extractor::MarkupLink; use crate::link_validator::link_type::get_link_type; use crate::link_validator::link_type::LinkType; use super::ignore_comments::IgnoreRegions; use super::link_extractor::BrokenExtractedLink; pub struct HtmlLinkExtractor(); #[derive(Clone, Copy, Debug)] enum ParserState { Text, Comment, Anchor, EqualSign, Link, } impl LinkExtractor for HtmlLinkExtractor { fn find_links(&self, text: &str) -> Vec> { let mut result: Vec> = Vec::new(); let mut state: ParserState = ParserState::Text; let mut link_column = 0; let mut link_line = 0; let ignore_regions = IgnoreRegions::from_text(text); for (line, line_str) in text.lines().enumerate() { let line_chars: Vec = line_str.chars().collect(); let mut column: usize = 0; while line_chars.get(column).is_some() { match state { ParserState::Comment => { if line_chars.get(column) == Some(&'-') && line_chars.get(column + 1) == Some(&'-') && line_chars.get(column + 2) == Some(&'>') { column += 2; state = ParserState::Text; } } ParserState::Text => { link_column = column; link_line = line; if line_chars.get(column) == Some(&'<') && line_chars.get(column + 1) == Some(&'!') && line_chars.get(column + 2) == Some(&'-') && line_chars.get(column + 3) == Some(&'-') { column += 3; state = ParserState::Comment; } else if line_chars.get(column) == Some(&'<') && line_chars.get(column + 1) == Some(&'a') { column += 1; state = ParserState::Anchor; } } ParserState::Anchor => { if line_chars.get(column) == Some(&'h') && line_chars.get(column + 1) == Some(&'r') && line_chars.get(column + 2) == Some(&'e') && line_chars.get(column + 3) == Some(&'f') { column += 3; state = ParserState::EqualSign; } } ParserState::EqualSign => { match line_chars.get(column) { Some(x) if x.is_whitespace() => {} Some(x) if x == &'=' => state = ParserState::Link, Some(_) => state = ParserState::Anchor, None => {} }; } ParserState::Link => { match line_chars.get(column) { Some(x) if !x.is_whitespace() && x != &'"' => { let start_col = column; while line_chars.get(column).is_some() && !line_chars[column].is_whitespace() && line_chars[column] != '"' { column += 1; } while let Some(c) = line_chars.get(column) { if c == &'"' { break; } column += 1; } let mut link = (line_chars[start_col..column]).iter().collect::(); if get_link_type(&link) == LinkType::FileSystem { link = url_escape::decode(link.as_str()).to_string(); }; // Check if this line should be ignored let line_num = link_line + 1; // Convert to 1-indexed if !ignore_regions.is_line_ignored(line_num) { result.push(Ok(MarkupLink { column: link_column + 1, line: line_num, target: link.to_string(), source: "".to_string(), })); } state = ParserState::Text; } Some(_) | None => {} }; } } column += 1; } } result } } #[cfg(test)] mod tests { use super::*; use ntest::test_case; #[test] fn no_link() { let le = HtmlLinkExtractor(); let input = "]This is not a no link

Bla

attribute."; let result = le.find_links(input); assert!(result.is_empty()); } #[test] fn commented() { let le = HtmlLinkExtractor(); let input = "df "; let result = le.find_links(input); assert!(result.is_empty()); } #[test] fn space() { let le = HtmlLinkExtractor(); let result = le.find_links("blah foo."); let expected = Ok(MarkupLink { target: "some file.html".to_string(), line: 1, column: 6, source: "".to_string(), }); assert_eq!(vec![expected], result); } #[test] fn url_encoded_path() { let le = HtmlLinkExtractor(); let result = le.find_links("blah foo."); let expected = Ok(MarkupLink { target: "some file.html".to_string(), line: 1, column: 6, source: "".to_string(), }); assert_eq!(vec![expected], result); } #[test_case("Visit W3Schools.com!", 1, 1)] #[test_case( "\nVisit W3Schools.com!\n", 1, 1 )] #[test_case( "Visit W3Schools.com!", 1, 1 )] #[test_case( "Visit W3Schools.com!", 1, 15 )] fn links(input: &str, line: usize, column: usize) { let le = HtmlLinkExtractor(); let result = le.find_links(input); let expected = Ok(MarkupLink { target: "https://www.w3schools.com".to_string(), line, column, source: "".to_string(), }); assert_eq!(vec![expected], result); } #[test] fn ignore_disable_line() { let le = HtmlLinkExtractor(); let input = " link"; let result = le.find_links(input); assert!(result.is_empty()); } #[test] fn ignore_disable_next_line() { let le = HtmlLinkExtractor(); let input = "\nlink"; let result = le.find_links(input); assert!(result.is_empty()); } #[test] fn ignore_disable_block() { let le = HtmlLinkExtractor(); let input = "\nlink1\n\nlink2"; let result = le.find_links(input); assert_eq!(1, result.len()); assert_eq!(result[0].as_ref().unwrap().target, "http://example.com/"); assert_eq!(result[0].as_ref().unwrap().line, 4); } #[test] fn ignore_multiple_blocks() { let le = HtmlLinkExtractor(); let input = "1\n\n2\n\n3"; let result = le.find_links(input); assert_eq!(2, result.len()); assert_eq!(result[0].as_ref().unwrap().target, "http://a.com/"); assert_eq!(result[1].as_ref().unwrap().target, "http://c.com/"); } } ================================================ FILE: src/link_extractors/ignore_comments.rs ================================================ /// Module for parsing ignore/disable comments in markup files /// Supports comments like: /// - `` / `` /// - `` /// - `` use std::collections::HashSet; #[derive(Debug, Clone, Copy, PartialEq)] enum IgnoreState { Enabled, Disabled, } #[derive(Debug, Clone)] pub struct IgnoreRegions { /// Lines that should be ignored (1-indexed) ignored_lines: HashSet, /// Ranges of lines that should be ignored (1-indexed, inclusive) ignored_ranges: Vec<(usize, usize)>, } impl IgnoreRegions { /// Create a new IgnoreRegions from text content pub fn from_text(text: &str) -> Self { let mut ignored_lines = HashSet::new(); let mut ignored_ranges = Vec::new(); let mut state = IgnoreState::Enabled; let mut disable_start_line = 0; for (line_idx, line) in text.lines().enumerate() { let line_num = line_idx + 1; // 1-indexed // Check for disable/enable blocks if line.contains("") { if state == IgnoreState::Enabled { state = IgnoreState::Disabled; disable_start_line = line_num; } } else if line.contains("") && state == IgnoreState::Disabled { // Add the range from disable to enable (inclusive) ignored_ranges.push((disable_start_line, line_num)); state = IgnoreState::Enabled; } // Check for single-line ignores if line.contains("") { ignored_lines.insert(line_num); } // Check for next-line ignore if line.contains("") { ignored_lines.insert(line_num + 1); } } // If we ended in disabled state, ignore from disable_start_line to end if state == IgnoreState::Disabled { let total_lines = text.lines().count(); if total_lines > 0 { ignored_ranges.push((disable_start_line, total_lines)); } } Self { ignored_lines, ignored_ranges, } } /// Check if a given line number (1-indexed) should be ignored pub fn is_line_ignored(&self, line: usize) -> bool { // Check if line is in ignored_lines if self.ignored_lines.contains(&line) { return true; } // Check if line is in any ignored range for (start, end) in &self.ignored_ranges { if line >= *start && line <= *end { return true; } } false } } #[cfg(test)] mod tests { use super::*; #[test] fn no_ignore_comments() { let text = "This is a normal line\nAnother line"; let regions = IgnoreRegions::from_text(text); assert!(!regions.is_line_ignored(1)); assert!(!regions.is_line_ignored(2)); } #[test] fn disable_line_comment() { let text = "Line 1\n Line 2\nLine 3"; let regions = IgnoreRegions::from_text(text); assert!(!regions.is_line_ignored(1)); assert!(regions.is_line_ignored(2)); assert!(!regions.is_line_ignored(3)); } #[test] fn disable_next_line_comment() { let text = "Line 1\n\nLine 3\nLine 4"; let regions = IgnoreRegions::from_text(text); assert!(!regions.is_line_ignored(1)); assert!(!regions.is_line_ignored(2)); assert!(regions.is_line_ignored(3)); assert!(!regions.is_line_ignored(4)); } #[test] fn disable_enable_block() { let text = "Line 1\n\nLine 3\nLine 4\n\nLine 6"; let regions = IgnoreRegions::from_text(text); assert!(!regions.is_line_ignored(1)); assert!(regions.is_line_ignored(2)); assert!(regions.is_line_ignored(3)); assert!(regions.is_line_ignored(4)); assert!(regions.is_line_ignored(5)); assert!(!regions.is_line_ignored(6)); } #[test] fn disable_without_enable() { let text = "Line 1\nLine 2\n\nLine 4\nLine 5"; let regions = IgnoreRegions::from_text(text); assert!(!regions.is_line_ignored(1)); assert!(!regions.is_line_ignored(2)); assert!(regions.is_line_ignored(3)); assert!(regions.is_line_ignored(4)); assert!(regions.is_line_ignored(5)); } #[test] fn multiple_disable_blocks() { let text = "Line 1\n\nLine 3\n\nLine 5\n\nLine 7\n\nLine 9"; let regions = IgnoreRegions::from_text(text); assert!(!regions.is_line_ignored(1)); assert!(regions.is_line_ignored(2)); assert!(regions.is_line_ignored(3)); assert!(regions.is_line_ignored(4)); assert!(!regions.is_line_ignored(5)); assert!(regions.is_line_ignored(6)); assert!(regions.is_line_ignored(7)); assert!(regions.is_line_ignored(8)); assert!(!regions.is_line_ignored(9)); } #[test] fn mixed_ignore_types() { let text = "Line 1\n Line 2\n\nLine 4\n\nLine 6\n\nLine 8"; let regions = IgnoreRegions::from_text(text); assert!(!regions.is_line_ignored(1)); assert!(regions.is_line_ignored(2)); // disable-line assert!(!regions.is_line_ignored(3)); assert!(regions.is_line_ignored(4)); // disable-next-line assert!(regions.is_line_ignored(5)); // disable block start assert!(regions.is_line_ignored(6)); // disable block assert!(regions.is_line_ignored(7)); // disable block end (enable) assert!(!regions.is_line_ignored(8)); } } ================================================ FILE: src/link_extractors/link_extractor.rs ================================================ use super::html_link_extractor::HtmlLinkExtractor; use super::markdown_link_extractor::MarkdownLinkExtractor; use crate::markup::{MarkupFile, MarkupType}; use std::env; use std::fmt; use std::fs; /// Link found in markup files #[derive(Eq, PartialEq, Clone)] pub struct MarkupLink { /// The source file of the link pub source: String, /// The target the link points to pub target: String, /// The line number were the link was found pub line: usize, /// The column number were the link was found pub column: usize, } /// Broken link found in document #[derive(Eq, PartialEq, Clone, Debug)] pub struct BrokenExtractedLink { /// The error message pub error: String, /// The source pub source: String, /// The target pub reference: String, /// The line number were the link was found pub line: usize, /// The column number were the link was found pub column: usize, } impl fmt::Debug for MarkupLink { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "{} => {} (line {}, column {})", self.source, self.target, self.line, self.column ) } } impl MarkupLink { pub fn source_str(&self) -> String { lazy_static! { static ref IS_VS_CODE_TERMINAL: bool = env::var("TERM_PROGRAM") == Ok("vscode".to_string()); } if *IS_VS_CODE_TERMINAL { format! {"{}:{}:{} => {}", self.source, self.line, self.column, self.target} } else { format! {"{} ({}, {}) => {}", self.source, self.line, self.column, self.target} } } } #[must_use] pub fn find_links(file: &MarkupFile) -> Vec> { let path = &file.path; let link_extractor = link_extractor_factory(file.markup_type); info!("Scan file at path '{path}' for links."); match fs::read_to_string(path) { Ok(text) => { let mut links = link_extractor.find_links(&text); for l in &mut links { match l { Ok(link) => { link.source = path.to_string(); } Err(broken_link) => { broken_link.source = path.to_string(); } } } links } Err(e) => { warn!("File '{path}'. IO Error: \"{e}\". Check your file encoding."); vec![] } } } fn link_extractor_factory(markup_type: MarkupType) -> Box { match markup_type { MarkupType::Markdown => Box::new(MarkdownLinkExtractor()), MarkupType::Html => Box::new(HtmlLinkExtractor()), } } pub trait LinkExtractor { fn find_links(&self, text: &str) -> Vec>; } ================================================ FILE: src/link_extractors/markdown_link_extractor.rs ================================================ use super::html_link_extractor::HtmlLinkExtractor; use super::ignore_comments::IgnoreRegions; use super::link_extractor::BrokenExtractedLink; use crate::link_extractors::link_extractor::LinkExtractor; use crate::link_extractors::link_extractor::MarkupLink; use pulldown_cmark::{BrokenLink, Event, Options, Parser, Tag}; pub struct MarkdownLinkExtractor(); impl LinkExtractor for MarkdownLinkExtractor { fn find_links(&self, text: &str) -> Vec> { use std::cell::RefCell; let result: RefCell>> = RefCell::new(Vec::new()); let html_extractor = HtmlLinkExtractor(); let converter = LineColumnConverter::new(text); let ignore_regions = IgnoreRegions::from_text(text); let callback = &mut |broken_link: BrokenLink| { let line_col = converter.line_column_from_idx(broken_link.span.start); // Skip if line is ignored if ignore_regions.is_line_ignored(line_col.0) { return None; } info!( "Broken link in md file: {} (line {}, column {})", broken_link.reference, line_col.0, line_col.1 ); result.borrow_mut().push(Err(BrokenExtractedLink { source: String::new(), line: line_col.0, column: line_col.1, reference: broken_link.reference.to_string(), error: "Markdown reference not found".to_string(), })); None }; let parser = Parser::new_with_broken_link_callback(text, Options::ENABLE_TASKLISTS, Some(callback)); for (evt, range) in parser.into_offset_iter() { match evt { Event::Start(Tag::Link { dest_url, .. } | Tag::Image { dest_url, .. }) => { let line_col = converter.line_column_from_idx(range.start); // Skip if line is ignored if ignore_regions.is_line_ignored(line_col.0) { continue; } result.borrow_mut().push(Ok(MarkupLink { line: line_col.0, column: line_col.1, source: String::new(), target: dest_url.to_string(), })); } Event::Html(html) | Event::InlineHtml(html) => { let line_col = converter.line_column_from_idx(range.start); let html_result = html_extractor.find_links(html.as_ref()); let mut parsed_html = html_result .iter() .filter_map(|res| res.as_ref().ok()) .map(|md_link| { let line = line_col.0 + md_link.line - 1; let column = if md_link.line > 1 { md_link.column } else { line_col.1 + md_link.column - 1 }; Ok(MarkupLink { column, line, source: md_link.source.clone(), target: md_link.target.clone(), }) }) .filter(|link| { // Skip if line is ignored if let Ok(ml) = link { !ignore_regions.is_line_ignored(ml.line) } else { true } }) .collect(); result.borrow_mut().append(&mut parsed_html); } _ => (), }; } result.into_inner() } } struct LineColumnConverter { line_lengths: Vec, } impl LineColumnConverter { fn new(text: &str) -> Self { let mut line_lengths: Vec = Vec::new(); let mut current_line_len = 0; for c in text.chars() { current_line_len += c.len_utf8(); if c == '\n' { line_lengths.push(current_line_len); current_line_len = 0; } } Self { line_lengths } } fn line_column_from_idx(&self, idx: usize) -> (usize, usize) { let mut line = 1; let mut column = idx + 1; for line_length in &self.line_lengths { if *line_length >= column { return (line, column); } column -= line_length; line += 1; } (line, column) } } #[cfg(test)] mod tests { use super::*; use ntest::test_case; #[test] fn inline_no_link() { let le = MarkdownLinkExtractor(); let input = "]This is not a () link](! has no title attribute."; let result = le.find_links(input); assert!(result.is_empty()); } #[test] fn commented_link() { let le = MarkdownLinkExtractor(); let input = "]This is not a () ."; let result = le.find_links(input); assert!(result.is_empty()); } #[test] fn nested_links() { let le = MarkdownLinkExtractor(); let input = "\n\r\t\n[![](http://meritbadge.herokuapp.com/mlc)](https://crates.io/crates/mlc)"; let result = le.find_links(input); let img = Ok(MarkupLink { target: "http://meritbadge.herokuapp.com/mlc".to_string(), line: 3, column: 2, source: "".to_string(), }); let link = Ok(MarkupLink { target: "https://crates.io/crates/mlc".to_string(), line: 3, column: 1, source: "".to_string(), }); assert_eq!(vec![link, img], result); } #[test] fn link_escaped() { let le = MarkdownLinkExtractor(); let input = "This is not a \\[link\\](random_link)."; let result = le.find_links(input); assert!(result.is_empty()); } #[test] fn link_in_headline() { let le = MarkdownLinkExtractor(); let input = " # This is a [link](http://example.net/)."; let result = le.find_links(input); assert_eq!(result[0].as_ref().unwrap().column, 15); } #[test] fn no_link_colon() { let le = MarkdownLinkExtractor(); let input = "This is not a [link:bla."; let result = le.find_links(input); assert!(result.is_empty()); } #[test] fn broken_reference_link() { let le = MarkdownLinkExtractor(); let input = "This is not a [link]:bla."; let result = le.find_links(input); let expected = Err(BrokenExtractedLink { source: "".to_string(), reference: "link".to_string(), line: 1, column: 15, error: "Markdown reference not found".to_string(), }); assert_eq!(vec![expected], result); } #[test] fn inline_code() { let le = MarkdownLinkExtractor(); let input = " `[code](http://example.net/)`, no link!."; let result = le.find_links(input); assert!(result.is_empty()); } #[test] fn link_near_inline_code() { let le = MarkdownLinkExtractor(); let input = " `bug` [code](http://example.net/), link!."; let result = le.find_links(input); let expected = Ok(MarkupLink { target: "http://example.net/".to_string(), line: 1, column: 8, source: "".to_string(), }); assert_eq!(vec![expected], result); } #[test] fn link_very_near_inline_code() { let le = MarkdownLinkExtractor(); let input = "`bug`[code](http://example.net/)"; let result = le.find_links(input); let expected = Ok(MarkupLink { target: "http://example.net/".to_string(), line: 1, column: 6, source: "".to_string(), }); assert_eq!(vec![expected], result); } #[test] fn code_block() { let le = MarkdownLinkExtractor(); let input = " ``` js\n[code](http://example.net/)```, no link!."; let result = le.find_links(input); assert!(result.is_empty()); } #[test] fn html_code_block() { let le = MarkdownLinkExtractor(); let input = ", no link!."; let result = le.find_links(input); assert!(result.is_empty()); } #[test] fn escaped_code_block() { let le = MarkdownLinkExtractor(); let input = " klsdjf \\`[escape](http://example.net/)\\`, no link!."; let result = le.find_links(input); let expected = Ok(MarkupLink { target: "http://example.net/".to_string(), line: 1, column: 13, source: "".to_string(), }); assert_eq!(vec![expected], result); } #[test] fn link_in_code_block() { let le = MarkdownLinkExtractor(); let input = "```\n[only code](http://example.net/)\n```."; let result = le.find_links(input); assert!(result.is_empty()); } #[test] fn image_reference() { let le = MarkdownLinkExtractor(); let link_str = "http://example.net/"; let input = format!("\n\nBla ![This is an image link]({link_str})"); let result = le.find_links(&input); let expected = Ok(MarkupLink { target: link_str.to_string(), line: 3, column: 5, source: "".to_string(), }); assert_eq!(vec![expected], result); } #[test] fn link_no_title() { let le = MarkdownLinkExtractor(); let link_str = "http://example.net/"; let input = format!("[This link]({link_str}) has no title attribute."); let result = le.find_links(&input); let expected = Ok(MarkupLink { target: link_str.to_string(), line: 1, column: 1, source: "".to_string(), }); assert_eq!(vec![expected], result); } #[test] fn link_with_title() { let le = MarkdownLinkExtractor(); let link_str = "http://example.net/"; let input = format!("\n123[This is a link]({link_str} \"with title\") oh yea."); let result = le.find_links(&input); let expected = Ok(MarkupLink { target: link_str.to_string(), line: 2, column: 4, source: "".to_string(), }); assert_eq!(vec![expected], result); } #[test_case("", 1)] // TODO GitHub Link style support //#[test_case("This is a short link http://example.net/", 22)] //#[test_case("http://example.net/", 1)] #[test_case("This is a short link ", 22)] fn inline_link(input: &str, column: usize) { let le = MarkdownLinkExtractor(); let result = le.find_links(input); let expected = Ok(MarkupLink { target: "http://example.net/".to_string(), line: 1, column, source: "".to_string(), }); assert_eq!(vec![expected], result); } #[test_case( " target=\"_blank\">Visit W3Schools!", test_name = "html_link_with_target" )] #[test_case( " link text", test_name = "html_link_no_target" )] fn html_link(input: &str) { let le = MarkdownLinkExtractor(); let result = le.find_links(input); let expected = Ok(MarkupLink { target: "http://example.net/".to_string(), line: 1, column: 1, source: "".to_string(), }); assert_eq!(vec![expected], result); } #[test] fn html_link_ident() { let le = MarkdownLinkExtractor(); let result = le.find_links("123 link text"); let expected = Ok(MarkupLink { target: "http://example.net/".to_string(), line: 1, column: 4, source: "".to_string(), }); assert_eq!(vec![expected], result); } #[test] fn html_link_new_line() { let le = MarkdownLinkExtractor(); let result = le.find_links("\n123 link text"); let expected = Ok(MarkupLink { target: "http://example.net/".to_string(), line: 2, column: 4, source: "".to_string(), }); assert_eq!(vec![expected], result); } #[test] fn raw_html_issue_31() { let le = MarkdownLinkExtractor(); let result = le.find_links("Some text link text more text."); let expected = Ok(MarkupLink { target: "some_url".to_string(), line: 1, column: 11, source: "".to_string(), }); assert_eq!(vec![expected], result); } #[test] fn referenced_link() { let le = MarkdownLinkExtractor(); let link_str = "http://example.net/"; let input = format!( "This is [an example][arbitrary case-insensitive reference text] reference-style link.\n\n[Arbitrary CASE-insensitive reference text]: {link_str}" ); let result = le.find_links(&input); let expected = Ok(MarkupLink { target: link_str.to_string(), line: 1, column: 9, source: "".to_string(), }); assert_eq!(vec![expected], result); } #[test] fn referenced_link_tag_only() { let le = MarkdownLinkExtractor(); let link_str = "http://example.net/"; let input = format!("Foo Bar\n\n[Arbitrary CASE-insensitive reference text]: {link_str}"); let result = le.find_links(&input); assert_eq!(0, result.len()); } #[test] fn referenced_link_no_tag_only() { let le = MarkdownLinkExtractor(); let input = "[link][reference]"; let result = le.find_links(input); assert_eq!(1, result.len()); } #[test] fn ignore_disable_line() { let le = MarkdownLinkExtractor(); let input = " [link](http://example.net/)"; let result = le.find_links(input); assert!(result.is_empty()); } #[test] fn ignore_disable_next_line() { let le = MarkdownLinkExtractor(); let input = "\n[link](http://example.net/)"; let result = le.find_links(input); assert!(result.is_empty()); } #[test] fn ignore_disable_block() { let le = MarkdownLinkExtractor(); let input = "\n[link1](http://example.net/)\n\n[link2](http://example.com/)"; let result = le.find_links(input); assert_eq!(1, result.len()); assert_eq!(result[0].as_ref().unwrap().target, "http://example.com/"); assert_eq!(result[0].as_ref().unwrap().line, 4); } #[test] fn ignore_multiple_blocks() { let le = MarkdownLinkExtractor(); let input = "[link1](http://a.com/)\n\n[link2](http://b.com/)\n\n[link3](http://c.com/)\n\n[link4](http://d.com/)\n\n[link5](http://e.com/)"; let result = le.find_links(input); assert_eq!(3, result.len()); assert_eq!(result[0].as_ref().unwrap().target, "http://a.com/"); assert_eq!(result[1].as_ref().unwrap().target, "http://c.com/"); assert_eq!(result[2].as_ref().unwrap().target, "http://e.com/"); } #[test] fn ignore_html_link_in_markdown() { let le = MarkdownLinkExtractor(); let input = "\nlink"; let result = le.find_links(input); assert!(result.is_empty()); } #[test] fn ignore_mixed_types() { let le = MarkdownLinkExtractor(); let input = "[link1](http://a.com/)\n [link2](http://b.com/)\n[link3](http://c.com/)"; let result = le.find_links(input); assert_eq!(2, result.len()); assert_eq!(result[0].as_ref().unwrap().target, "http://a.com/"); assert_eq!(result[1].as_ref().unwrap().target, "http://c.com/"); } #[test] fn gfm_checkbox_not_link() { let le = MarkdownLinkExtractor(); let input = "- [x] checked task\n- [ ] unchecked task"; let result = le.find_links(input); // GitHub-flavored markdown task list checkboxes should NOT be treated as links assert!( result.is_empty(), "Task list checkboxes should not be detected as links: {:?}", result ); } #[test] fn gfm_checkbox_with_link() { let le = MarkdownLinkExtractor(); let input = "- [x] [actual link](http://example.com/)\n- [ ] unchecked task"; let result = le.find_links(input); // Only the actual link should be detected, not the checkboxes assert_eq!(1, result.len()); assert_eq!(result[0].as_ref().unwrap().target, "http://example.com/"); } } ================================================ FILE: src/link_extractors/mod.rs ================================================ mod html_link_extractor; mod ignore_comments; pub mod link_extractor; mod markdown_link_extractor; ================================================ FILE: src/link_validator/file_system.rs ================================================ use crate::link_validator::LinkCheckResult; use crate::Config; use async_std::fs::canonicalize; use async_std::path::Path; use async_std::path::PathBuf; use std::path::MAIN_SEPARATOR; use walkdir::WalkDir; pub async fn check_filesystem(target: &str, config: &Config) -> LinkCheckResult { let target = Path::new(target); debug!("Absolute target path {target:?}"); if target.exists().await { LinkCheckResult::Ok } else if !config.optional.match_file_extension.unwrap_or_default() && target.extension().is_none() { // Check if file exists ignoring the file extension let target_file_name = match target.file_name() { Some(s) => s, None => return LinkCheckResult::Failed("Target path not found.".to_string()), }; let target_parent = match target.parent() { Some(s) => s, None => return LinkCheckResult::Failed("Target parent not found.".to_string()), }; debug!("Check if file ignoring the extension exists."); if target_parent.exists().await { debug!("Parent {target_parent:?} exists. Search dir for file ignoring the extension."); for entry in WalkDir::new(target_parent) .follow_links(false) .max_depth(1) .into_iter() .filter_map(Result::ok) .filter(|e| !e.file_type().is_dir()) { let mut file_on_system = entry.into_path(); file_on_system.set_extension(""); match file_on_system.file_name() { Some(file_name) => { if target_file_name == file_name { info!("Found file {file_on_system:?}"); return LinkCheckResult::Ok; } } None => { return LinkCheckResult::Failed("Target filename not found.".to_string()) } } } LinkCheckResult::Failed("Target not found.".to_string()) } else { LinkCheckResult::Failed("Target not found.".to_string()) } } else { LinkCheckResult::Failed("Target filename not found.".to_string()) } } pub async fn resolve_target_link(source: &str, target: &str, config: &Config) -> String { let mut normalized_link = target.replace(['/', '\\'], std::path::MAIN_SEPARATOR_STR); if let Some(idx) = normalized_link.find('#') { info!( "Strip everything after #. The chapter part '{}' is not checked.", &normalized_link[idx..] ); normalized_link = normalized_link[..idx].to_string(); } let mut fs_link_target = Path::new(&normalized_link).to_path_buf(); if normalized_link.starts_with(MAIN_SEPARATOR) && config.optional.root_dir.is_some() { match canonicalize(&config.optional.root_dir.as_ref().unwrap()).await { Ok(new_root) => fs_link_target = new_root.join(Path::new(&normalized_link[1..])), Err(e) => panic!( "Root path could not be converted to an absolute path. Does the directory exit? {}", e ), } } debug!("Check file system link target {target:?}"); let abs_path = absolute_target_path(source, &fs_link_target) .await .to_str() .expect("Could not resolve target path") .to_string(); // Remove verbatim path identifier which causes trouble on windows when using ../../ in paths abs_path .strip_prefix("\\\\?\\") .unwrap_or(&abs_path) .to_string() } async fn absolute_target_path(source: &str, target: &PathBuf) -> PathBuf { let abs_source = canonicalize(source).await.expect("Expected path to exist."); if target.is_relative() { let root = format!("{MAIN_SEPARATOR}"); let parent = abs_source.parent().unwrap_or_else(|| Path::new(&root)); let new_target = match target.strip_prefix(format!(".{MAIN_SEPARATOR}")) { Ok(t) => t, Err(_) => target, }; parent.join(new_target) } else { target.clone() } } #[cfg(test)] mod test { use super::*; #[tokio::test] async fn remove_dot() { let source = Path::new(file!()) .parent() .unwrap() .parent() .unwrap() .parent() .unwrap() .join("benches") .join("benchmark"); let target = Path::new("./script_and_comments.md").to_path_buf(); let path = absolute_target_path(source.to_str().unwrap(), &target).await; let path_str = path.to_str().unwrap().to_string(); println!("{path_str:?}"); assert_eq!(path_str.matches('.').count(), 1); } } ================================================ FILE: src/link_validator/http.rs ================================================ use crate::link_validator::LinkCheckResult; use reqwest::header::ACCEPT; use reqwest::header::USER_AGENT; use reqwest::Client; use reqwest::Method; use reqwest::Request; use reqwest::StatusCode; use wildmatch::WildMatch; const BROWSER_ACCEPT_HEADER: &str = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; pub async fn check_http( target: &str, do_not_warn_for_redirect_to: &[WildMatch], http_headers: &[(String, String)], ) -> LinkCheckResult { debug!("Check http link target {target:?}"); let url = reqwest::Url::parse(target).expect("URL of unknown type"); match http_request(&url, do_not_warn_for_redirect_to, http_headers).await { Ok(response) => response, Err(error_msg) => LinkCheckResult::Failed(format!("Http(s) request failed. {error_msg}")), } } fn new_request(method: Method, url: &reqwest::Url, http_headers: &[(String, String)]) -> Request { let mut req = Request::new(method, url.clone()); let headers = req.headers_mut(); headers.insert(ACCEPT, BROWSER_ACCEPT_HEADER.parse().unwrap()); // Set default user agent if no custom User-Agent is provided let has_custom_user_agent = http_headers .iter() .any(|(k, _)| k.to_lowercase() == "user-agent"); if !has_custom_user_agent { headers.insert(USER_AGENT, "mlc (github.com/becheran/mlc)".parse().unwrap()); } // Apply custom headers for (key, value) in http_headers { if let (Ok(header_name), Ok(header_value)) = ( reqwest::header::HeaderName::from_bytes(key.as_bytes()), reqwest::header::HeaderValue::from_str(value), ) { headers.insert(header_name, header_value); } else { warn!("Invalid HTTP header: {}: {}", key, value); } } req } async fn http_request( url: &reqwest::Url, do_not_warn_for_redirect_to: &[WildMatch], http_headers: &[(String, String)], ) -> reqwest::Result { lazy_static! { static ref CLIENT: Client = reqwest::Client::builder() .brotli(true) .gzip(true) .deflate(true) .build() .expect("Bug! failed to build client"); } fn status_to_string(status: StatusCode) -> String { format!( "{} - {}", status.as_str(), status.canonical_reason().unwrap_or("Unknown reason") ) } let response = CLIENT .execute(new_request(Method::HEAD, url, http_headers)) .await?; let check_redirect = |response_url: &reqwest::Url| -> reqwest::Result { // Compare URLs ignoring fragments since fragments are not sent to the server // and the response URL will never have them let urls_match = url.scheme() == response_url.scheme() && url.host() == response_url.host() && url.port() == response_url.port() && url.path() == response_url.path() && url.query() == response_url.query(); if urls_match || do_not_warn_for_redirect_to .iter() .any(|x| x.matches(response_url.as_ref())) { Ok(LinkCheckResult::Ok) } else { Ok(LinkCheckResult::Warning( "Request was redirected to ".to_string() + response_url.as_ref(), )) } }; let status = response.status(); if status.is_success() || status.is_redirection() { check_redirect(response.url()) } else { debug!("Got the status code {status:?}. Retry with get-request."); let get_request = new_request(Method::GET, url, http_headers); let response = CLIENT.execute(get_request).await?; let status = response.status(); if status.is_success() || status.is_redirection() { check_redirect(response.url()) } else { Ok(LinkCheckResult::Failed(status_to_string(status))) } } } #[cfg(test)] mod test { use super::*; #[tokio::test] async fn check_http_is_available() { let mut server = mockito::Server::new_async().await; server .mock("GET", "/") .with_status(200) .create_async() .await; let result = check_http(&server.url(), &[], &[]).await; assert_eq!(result, LinkCheckResult::Ok); } #[tokio::test] async fn check_http_fail() { let mut server = mockito::Server::new_async().await; server .mock("GET", "/") .with_status(500) .create_async() .await; let result = check_http(&server.url(), &[], &[]).await; assert_eq!( result, LinkCheckResult::Failed("500 - Internal Server Error".to_string()) ); } #[tokio::test] async fn check_http_is_redirection() { let mut redirect_server = mockito::Server::new_async().await; redirect_server .mock("GET", "/") .with_status(200) .create_async() .await; let mut server = mockito::Server::new_async().await; server .mock("GET", "/") .with_status(301) .with_header("Location", &redirect_server.url()) .create_async() .await; let result = check_http(&server.url(), &[], &[]).await; assert_eq!( result, LinkCheckResult::Warning(format!( "Request was redirected to {}/", &redirect_server.url() )) ); } #[tokio::test] async fn check_http_redirection_do_not_warn_if_ignored() { let mut redirect_server = mockito::Server::new_async().await; redirect_server .mock("GET", "/") .with_status(200) .create_async() .await; let mut server = mockito::Server::new_async().await; server .mock("GET", "/") .with_status(301) .with_header("Location", &redirect_server.url()) .create_async() .await; let result = check_http( &server.url(), &[WildMatch::new(&format!("{}*", &redirect_server.url()))], &[], ) .await; assert_eq!(result, LinkCheckResult::Ok); } #[tokio::test] async fn check_http_redirection_do_not_warn_if_ignored_star_pattern() { let mut redirect_server = mockito::Server::new_async().await; redirect_server .mock("GET", "/") .with_status(200) .create_async() .await; let mut server = mockito::Server::new_async().await; server .mock("GET", "/") .with_status(301) .with_header("Location", &redirect_server.url()) .create_async() .await; let result = check_http(&server.url(), &[WildMatch::new("*")], &[]).await; assert_eq!(result, LinkCheckResult::Ok); } #[tokio::test] async fn check_http_redirection_do_warn_if_ignored_mismatch() { let mut redirect_server = mockito::Server::new_async().await; redirect_server .mock("GET", "/") .with_status(200) .create_async() .await; let mut server = mockito::Server::new_async().await; server .mock("GET", "/") .with_status(301) .with_header("Location", &redirect_server.url()) .create_async() .await; let result = check_http( &server.url(), &[WildMatch::new("http://is-mismatched.com/*")], &[], ) .await; assert_eq!( result, LinkCheckResult::Warning(format!( "Request was redirected to {}/", &redirect_server.url() )) ); } #[tokio::test] async fn check_http_is_redirection_failure() { let mut redirect_server = mockito::Server::new_async().await; redirect_server .mock("GET", "/") .with_status(403) .create_async() .await; let mut server = mockito::Server::new_async().await; server .mock("GET", "/") .with_status(301) .with_header("Location", &redirect_server.url()) .create_async() .await; let result = check_http(&server.url(), &[], &[]).await; assert_eq!( result, LinkCheckResult::Failed("403 - Forbidden".to_string()) ); } #[tokio::test] async fn check_http_with_fragment_no_warning() { let mut server = mockito::Server::new_async().await; server .mock("GET", "/page") .with_status(200) .create_async() .await; // The URL with a fragment should not produce a redirect warning // because the fragment is not sent to the server let url_with_fragment = format!("{}/page#anchor", server.url()); let result = check_http(&url_with_fragment, &[], &[]).await; assert_eq!(result, LinkCheckResult::Ok); } #[tokio::test] async fn check_http_with_fragment_real_redirect_warns() { let mut redirect_server = mockito::Server::new_async().await; redirect_server .mock("GET", "/other-page") .with_status(200) .create_async() .await; let mut server = mockito::Server::new_async().await; server .mock("GET", "/page") .with_status(301) .with_header("Location", &format!("{}/other-page", redirect_server.url())) .create_async() .await; // A real redirect to a different page should still produce a warning // even if the original URL had a fragment let url_with_fragment = format!("{}/page#anchor", server.url()); let result = check_http(&url_with_fragment, &[], &[]).await; assert_eq!( result, LinkCheckResult::Warning(format!( "Request was redirected to {}/other-page", &redirect_server.url() )) ); } #[tokio::test] async fn check_http_with_custom_headers() { let mut server = mockito::Server::new_async().await; server .mock("GET", "/") .match_header("user-agent", "CustomAgent/1.0") .match_header("x-custom-header", "test-value") .with_status(200) .create_async() .await; let custom_headers = vec![ ("User-Agent".to_string(), "CustomAgent/1.0".to_string()), ("X-Custom-Header".to_string(), "test-value".to_string()), ]; let result = check_http(&server.url(), &[], &custom_headers).await; assert_eq!(result, LinkCheckResult::Ok); } } ================================================ FILE: src/link_validator/link_type.rs ================================================ extern crate url; use self::url::Url; use regex::Regex; #[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] pub enum LinkType { Http, Ftp, Mail, FileSystem, UnknownUrlSchema, Unknown, } #[must_use] pub fn get_link_type(link: &str) -> LinkType { lazy_static! { static ref FILE_SYSTEM_REGEX: Regex = Regex::new(r"^(([[:alpha:]]:(\\|/))|(..?(\\|/))|((\\\\?|//?))).*").unwrap(); } if FILE_SYSTEM_REGEX.is_match(link) || !link.contains(':') { return if link.contains('@') { LinkType::Mail } else { LinkType::FileSystem }; } if let Ok(url) = Url::parse(link) { let scheme = url.scheme(); debug!("Link {link} is a URL type with scheme {scheme}"); return match scheme { "http" | "https" => LinkType::Http, "ftp" | "ftps" => LinkType::Ftp, "mailto" => LinkType::Mail, "file" => LinkType::FileSystem, _ => LinkType::UnknownUrlSchema, }; } LinkType::UnknownUrlSchema } #[cfg(test)] mod tests { use super::*; use ntest::test_case; fn test_link(link: &str, expected_type: &LinkType) { let link_type = get_link_type(link); assert_eq!(link_type, *expected_type); } #[test_case("https://doc.rust-lang.org.html")] #[test_case("http://www.website.php")] fn http_link_types(link: &str) { test_link(link, &LinkType::Http); } #[test_case("ftp://mueller:12345@ftp.downloading.ch")] fn ftp_link_types(ftp: &str) { test_link(ftp, &LinkType::Ftp); } #[test_case("F:/fake/windows/paths")] #[test_case("\\\\smb}\\paths")] #[test_case("C:\\traditional\\paths")] #[test_case("\\file.ext")] #[test_case("file:///some/path/")] #[test_case("path")] #[test_case("./file.ext")] #[test_case(".\\file.md")] #[test_case("../upper_dir.md")] #[test_case("..\\upper_dir.mdc")] #[test_case("D:\\Program Files(x86)\\file.log")] #[test_case("D:\\Program Files(x86)\\folder\\file.log")] fn test_file_system_link_types(link: &str) { test_link(link, &LinkType::FileSystem); } } ================================================ FILE: src/link_validator/mail.rs ================================================ use crate::link_validator::LinkCheckResult; use regex::Regex; pub fn check_mail(target: &str) -> LinkCheckResult { debug!("Check mail target {target:?}"); let mut mail = target; if let Some(stripped) = target.strip_prefix("mailto://") { mail = stripped; } else if let Some(stripped) = target.strip_prefix("mailto:") { mail = stripped; } lazy_static! { static ref EMAIL_REGEX: Regex = Regex::new( r"^((?i)[a-z0-9_!#$%&'*+-/=?^`{|}~+]([a-z0-9_!#$%&'*+-/=?^`{|}~+.]*[a-z0-9_!#$%&'*+-/=?^_{|}~+])?)@([a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,6})" ) .unwrap(); } if EMAIL_REGEX.is_match(mail) { LinkCheckResult::Ok } else { LinkCheckResult::Failed("Not a valid mail address.".to_string()) } } #[cfg(test)] mod tests { use super::*; use ntest::test_case; #[test_case("mailto://+bar@bar.com")] #[test_case("mailto://foo+@bar.com")] #[test_case("mailto://foo.lastname@bar.com")] #[test_case("mailto://tst@xyz.us")] #[test_case("mailto:bla.bla@web.de")] #[test_case("mailto:bla.bla.ext@web.de")] #[test_case("mailto:BlA.bLa.ext@web.de")] #[test_case("mailto:foo-bar@foobar.com")] #[test_case("mailto:!#$%&'*+-/=?^_`{|}~-foo@foobar.com")] #[test_case("mailto:some@hostnumbers123.com")] #[test_case("mailto:some@host-name.com")] #[test_case("bla.bla@web.de")] fn mail_links(link: &str) { let result = check_mail(link); assert_eq!(result, LinkCheckResult::Ok); } #[test_case("mailto://@bar@bar")] #[test_case("mailto://foobar.com")] #[test_case("mailto://foo.lastname.com")] #[test_case("mailto:foo.do@l$astname.cOM")] #[test_case("mailto:foo@l_astname.cOM")] fn invalid_mail_links(link: &str) { let result = check_mail(link); assert!(result != LinkCheckResult::Ok); } } ================================================ FILE: src/link_validator/mod.rs ================================================ mod file_system; mod http; mod mail; pub mod link_type; use crate::link_extractors::link_extractor::MarkupLink; use crate::link_validator::file_system::check_filesystem; use crate::link_validator::http::check_http; use crate::Config; use mail::check_mail; pub use link_type::get_link_type; pub use link_type::LinkType; use wildmatch::WildMatch; #[derive(Debug, Eq, PartialEq, Clone)] pub enum LinkCheckResult { Ok, Failed(String), Warning(String), Ignored(String), NotImplemented(String), } pub async fn resolve_target_link( link: &MarkupLink, link_type: &LinkType, config: &Config, ) -> String { if link_type == &LinkType::FileSystem { file_system::resolve_target_link(&link.source, &link.target, config).await } else { link.target.to_string() } } pub async fn check( link_target: &str, link_type: &LinkType, config: &Config, do_not_warn_for_redirect_to: &[WildMatch], http_headers: &[(String, String)], ) -> LinkCheckResult { info!("Check link {}.", &link_target); match link_type { LinkType::Ftp => LinkCheckResult::NotImplemented(format!( "Link type '{:?}' is not supported yet...", &link_target )), LinkType::UnknownUrlSchema | LinkType::Unknown => LinkCheckResult::NotImplemented( "Link type is not implemented yet and cannot be checked.".to_string(), ), LinkType::Mail => check_mail(link_target), LinkType::Http => { if config.optional.offline.unwrap_or_default() { LinkCheckResult::Ignored("Ignore web link because of the offline flag.".to_string()) } else { check_http(link_target, do_not_warn_for_redirect_to, http_headers).await } } LinkType::FileSystem => check_filesystem(link_target, config).await, } } ================================================ FILE: src/logger.rs ================================================ use std::time::SystemTime; pub fn init(log_level: log::LevelFilter) -> Result<(), fern::InitError> { fern::Dispatch::new() .format(|out, message, record| { out.finish(format_args!( "\x1B[{}m[{} {} {}] {}\x1B[0m", match record.level() { log::Level::Error => "31", // Red log::Level::Warn => "33", // Yellow log::Level::Info => "32", // Green log::Level::Debug => "34", // Blue log::Level::Trace => "37", // White }, SystemTime::now() .duration_since(SystemTime::UNIX_EPOCH) .unwrap() .as_secs(), record.level(), record.target(), message )) }) .level(log_level) .chain(std::io::stdout()) .apply()?; debug!("Initialized logging"); Ok(()) } ================================================ FILE: src/main.rs ================================================ #[macro_use] extern crate log; use mlc::cli; use mlc::logger; use std::process; #[macro_use] extern crate clap; fn print_header() { let width = 60; let header = format!("markup link checker - mlc v{:}", crate_version!()); println!(); println!("{:+<1$}", "", width); print!("+"); print!("{: <1$}", "", width - 2); println!("+"); print!("+"); print!("{: ^1$}", header, width - 2); println!("+"); print!("+"); print!("{: <1$}", "", width - 2); println!("+"); println!("{:+<1$}", "", width); println!(); } #[tokio::main] async fn main() -> Result<(), Box> { print_header(); let config = cli::parse_args(); let log_level = match config.optional.debug { Some(true) => log::LevelFilter::Debug, _ => log::LevelFilter::Error, }; logger::init(log_level)?; info!("Config: {}", &config); if mlc::run(&config).await.is_err() { process::exit(1); } else { process::exit(0); } } ================================================ FILE: src/markup.rs ================================================ use serde::Deserialize; use std::str::FromStr; #[derive(Debug)] pub struct MarkupFile { pub markup_type: MarkupType, pub path: String, } #[derive(Debug, Clone, Copy, Deserialize)] pub enum MarkupType { Markdown, Html, } impl FromStr for MarkupType { type Err = (); fn from_str(s: &str) -> Result { match s { "md" => Ok(MarkupType::Markdown), "html" => Ok(MarkupType::Html), _ => Err(()), } } } impl MarkupType { #[must_use] pub fn file_extensions(&self) -> Vec { match self { MarkupType::Markdown => vec![ "md".to_string(), "markdown".to_string(), "mkdown".to_string(), "mkdn".to_string(), "mkd".to_string(), "mdwn".to_string(), "mdtxt".to_string(), "mdtext".to_string(), "text".to_string(), "rmd".to_string(), ], MarkupType::Html => vec!["htm".to_string(), "html".to_string(), "xhtml".to_string()], } } } ================================================ FILE: tests/end_to_end.rs ================================================ #[cfg(test)] mod helper; use helper::benches_dir; use mlc::markup::MarkupType; use mlc::Config; use mlc::OptionalConfig; use std::fs; use std::path::MAIN_SEPARATOR; #[tokio::test] async fn end_to_end() { let config = Config { directory: benches_dir().join("benchmark"), optional: OptionalConfig { debug: None, do_not_warn_for_redirect_to: None, markup_types: Some(vec![MarkupType::Markdown]), offline: Some(true), // Use offline mode to avoid checking external URLs match_file_extension: None, throttle: None, ignore_links: Some(vec!["./doc/broken-local-link.doc".to_string()]), ignore_path: Some(vec![ fs::canonicalize("benches/benchmark/markdown/ignore_me.md").unwrap(), fs::canonicalize("./benches/benchmark/markdown/ignore_me_dir").unwrap(), ]), root_dir: None, gitignore: None, gituntracked: None, csv_file: None, files: None, http_headers: None, }, }; if let Err(e) = mlc::run(&config).await { panic!("Test failed. {:?}", e); } } #[tokio::test] async fn end_to_end_different_root() { let test_files = benches_dir().join("different_root"); let csv_output = std::env::temp_dir().join("mlc_test_different_root.csv"); let config = Config { directory: test_files.clone(), optional: OptionalConfig { debug: Some(true), do_not_warn_for_redirect_to: None, markup_types: Some(vec![MarkupType::Markdown]), offline: None, match_file_extension: None, ignore_links: None, ignore_path: None, throttle: None, root_dir: Some(test_files), gitignore: None, gituntracked: None, csv_file: Some(csv_output.clone()), files: None, http_headers: None, }, }; if let Err(e) = mlc::run(&config).await { panic!("Test with custom root failed. {:?}", e); } else { // Check if the CSV file was created, but is empty except for the header let content = fs::read_to_string(csv_output).unwrap(); let lines: Vec<&str> = content.lines().collect(); assert_eq!(lines.len(), 1); assert_eq!(lines[0], "source,line,column,target,severity"); } } #[tokio::test] async fn end_to_end_write_csv_file() { let csv_output = std::env::temp_dir().join("mlc_test_write_csv.csv"); let config = Config { directory: benches_dir().join("benchmark/markdown/ignore_me.md"), optional: OptionalConfig { debug: None, do_not_warn_for_redirect_to: None, markup_types: Some(vec![MarkupType::Markdown]), offline: None, match_file_extension: None, throttle: None, ignore_links: None, ignore_path: None, root_dir: None, gitignore: None, gituntracked: None, csv_file: Some(csv_output.clone()), files: None, http_headers: None, }, }; if (mlc::run(&config).await).is_err() { let content = fs::read_to_string(csv_output).unwrap(); let lines: Vec<&str> = content.lines().collect(); assert_eq!(lines.len(), 4); assert_eq!(lines[0], "source,line,column,target,severity"); for (i, line) in lines.iter().enumerate().skip(1) { assert_eq!( line, &format!( "benches{MAIN_SEPARATOR}benchmark/markdown/ignore_me.md,{i},1,broken_Link,ERR", ) ); } } else { panic!("Should have detected errors"); } } #[tokio::test] async fn end_to_end_csv_include_warnings() { let csv_output = std::env::temp_dir().join("mlc_test_csv_warnings.csv"); let config = Config { directory: benches_dir().join("benchmark/markdown/ref_links.md"), optional: OptionalConfig { debug: None, do_not_warn_for_redirect_to: None, markup_types: Some(vec![MarkupType::Markdown]), offline: Some(true), // Use offline mode to avoid actual HTTP calls match_file_extension: None, throttle: None, ignore_links: None, ignore_path: None, root_dir: None, gitignore: None, gituntracked: None, csv_file: Some(csv_output.clone()), files: None, http_headers: None, }, }; // Run the check - should succeed because we're offline let result = mlc::run(&config).await; // Check that CSV was created assert!(csv_output.exists(), "CSV file should exist"); let content = fs::read_to_string(&csv_output).unwrap(); let lines: Vec<&str> = content.lines().collect(); // Should have header and warning entries assert!( lines.len() > 1, "CSV should have header and warning entries" ); assert_eq!(lines[0], "source,line,column,target,severity"); // Verify that warning entries are present - the ref_links.md file has several broken markdown references // Check that all lines after header have the expected CSV format with severity column for line in lines.iter().skip(1) { let parts: Vec<&str> = line.split(',').collect(); assert_eq!( parts.len(), 5, "Each CSV line should have 5 columns including severity" ); assert!( parts[0].contains("ref_links.md"), "Source should be ref_links.md" ); assert_eq!(parts[4], "WARN", "Severity should be WARN for warnings"); } // Verify specific warnings are captured (broken markdown references) assert!( content.contains(",WARN"), "CSV should contain WARN severity" ); // Clean up let _ = fs::remove_file(csv_output); // Also verify the test would pass assert!(result.is_ok(), "Should succeed with warnings only"); } ================================================ FILE: tests/end_to_end_mock.rs ================================================ #[cfg(test)] mod helper; use helper::benches_dir; use mlc::markup::MarkupType; use mlc::Config; use mlc::OptionalConfig; use mockito::ServerGuard; use std::fs; use std::path::PathBuf; async fn setup_mock_servers() -> Vec { let mut servers = Vec::new(); // Create multiple mock servers for _ in 0..8 { let mut server = mockito::Server::new_async().await; server .mock("HEAD", "/") .with_status(200) .create_async() .await; server .mock("GET", "/") .with_status(200) .create_async() .await; servers.push(server); } servers } fn replace_mock_urls(content: &str, servers: &[ServerGuard]) -> String { let mut result = content.to_string(); for (i, server) in servers.iter().enumerate() { let placeholder = format!("MOCK_SERVER_URL_{}", i + 1); result = result.replace(&placeholder, &server.url()); } result } fn test_files_dir() -> PathBuf { benches_dir() .parent() .unwrap() .join("tests") .join("test_files") } #[tokio::test] async fn end_to_end_with_mock_servers() { // Set up mock servers let servers = setup_mock_servers().await; // Create temporary directory for test files with replaced URLs let temp_dir = std::env::temp_dir().join("mlc_test_mock_servers"); if temp_dir.exists() { fs::remove_dir_all(&temp_dir).unwrap(); } fs::create_dir_all(&temp_dir).unwrap(); fs::create_dir_all(temp_dir.join("deep")).unwrap(); // Copy and replace URLs in test files let test_files = test_files_dir(); // Reference links file let content = fs::read_to_string(test_files.join("reference_links.md")).unwrap(); let updated_content = replace_mock_urls(&content, &servers); fs::write(temp_dir.join("reference_links.md"), updated_content).unwrap(); // Many links file let content = fs::read_to_string(test_files.join("many_links.md")).unwrap(); let updated_content = replace_mock_urls(&content, &servers); fs::write(temp_dir.join("many_links.md"), updated_content).unwrap(); // Repeat links file let content = fs::read_to_string(test_files.join("repeat_links.md")).unwrap(); let updated_content = replace_mock_urls(&content, &servers); fs::write(temp_dir.join("repeat_links.md"), updated_content).unwrap(); // Deep directory file let content = fs::read_to_string(test_files.join("deep/index.md")).unwrap(); fs::write(temp_dir.join("deep/index.md"), content).unwrap(); // Run mlc with the temporary directory let config = Config { directory: temp_dir.clone(), optional: OptionalConfig { debug: Some(true), do_not_warn_for_redirect_to: None, markup_types: Some(vec![MarkupType::Markdown]), offline: None, match_file_extension: None, throttle: None, ignore_links: Some(vec![ // Only ignore non-http links that are expected to be unsupported "mailto://*".to_string(), "another://*".to_string(), ]), ignore_path: None, root_dir: None, gitignore: None, gituntracked: None, csv_file: None, files: None, http_headers: None, }, }; // Run the link checker - should succeed because all mock servers return 200 if let Err(e) = mlc::run(&config).await { panic!("Test failed with mock servers. {:?}", e); } // Clean up fs::remove_dir_all(&temp_dir).unwrap(); } #[tokio::test] async fn end_to_end_with_mock_server_failure() { // Set up a mock server that returns 404 let mut server = mockito::Server::new_async().await; server .mock("HEAD", "/") .with_status(404) .create_async() .await; server .mock("GET", "/") .with_status(404) .create_async() .await; // Create temporary directory for test files let temp_dir = std::env::temp_dir().join("mlc_test_mock_failure"); if temp_dir.exists() { fs::remove_dir_all(&temp_dir).unwrap(); } fs::create_dir_all(&temp_dir).unwrap(); // Create a simple test file with a broken link let content = format!("[Broken Link]({})", server.url()); fs::write(temp_dir.join("broken.md"), content).unwrap(); let config = Config { directory: temp_dir.clone(), optional: OptionalConfig { debug: Some(true), do_not_warn_for_redirect_to: None, markup_types: Some(vec![MarkupType::Markdown]), offline: None, match_file_extension: None, throttle: None, ignore_links: None, ignore_path: None, root_dir: None, gitignore: None, gituntracked: None, csv_file: None, files: None, http_headers: None, }, }; // Run the link checker - should fail because server returns 404 if mlc::run(&config).await.is_ok() { panic!("Test should have failed due to 404 response from mock server"); } // Clean up fs::remove_dir_all(&temp_dir).unwrap(); } #[tokio::test] async fn end_to_end_with_mock_server_redirect() { // Set up redirect and target mock servers let mut target_server = mockito::Server::new_async().await; target_server .mock("HEAD", "/") .with_status(200) .create_async() .await; target_server .mock("GET", "/") .with_status(200) .create_async() .await; let mut redirect_server = mockito::Server::new_async().await; redirect_server .mock("HEAD", "/") .with_status(301) .with_header("Location", &target_server.url()) .create_async() .await; redirect_server .mock("GET", "/") .with_status(301) .with_header("Location", &target_server.url()) .create_async() .await; // Create temporary directory for test files let temp_dir = std::env::temp_dir().join("mlc_test_mock_redirect"); if temp_dir.exists() { fs::remove_dir_all(&temp_dir).unwrap(); } fs::create_dir_all(&temp_dir).unwrap(); // Create a test file with a redirect let content = format!("[Redirect Link]({})", redirect_server.url()); fs::write(temp_dir.join("redirect.md"), content).unwrap(); let config = Config { directory: temp_dir.clone(), optional: OptionalConfig { debug: Some(true), do_not_warn_for_redirect_to: None, markup_types: Some(vec![MarkupType::Markdown]), offline: None, match_file_extension: None, throttle: None, ignore_links: None, ignore_path: None, root_dir: None, gitignore: None, gituntracked: None, csv_file: None, files: None, http_headers: None, }, }; // Run the link checker - should succeed but with warnings // The run should succeed even with redirect warnings let result = mlc::run(&config).await; assert!( result.is_ok(), "Test should succeed even with redirect warnings" ); // Clean up fs::remove_dir_all(&temp_dir).unwrap(); } ================================================ FILE: tests/file_traversal.rs ================================================ #[cfg(test)] use mlc::file_traversal; use mlc::markup::{MarkupFile, MarkupType}; use mlc::Config; use mlc::OptionalConfig; use std::path::Path; #[test] fn find_markdown_files() { let path = Path::new("./benches/benchmark/markdown/md_file_endings").to_path_buf(); let config: Config = Config { directory: path, optional: OptionalConfig { markup_types: Some(vec![MarkupType::Markdown]), ..Default::default() }, }; let mut result: Vec = Vec::new(); file_traversal::find(&config, &mut result); assert_eq!(result.len(), 12); } #[test] fn empty_folder() { let path = Path::new("./benches/benchmark/markdown/empty").to_path_buf(); let config: Config = Config { directory: path, optional: OptionalConfig { markup_types: Some(vec![MarkupType::Markdown]), ..Default::default() }, }; let mut result: Vec = Vec::new(); file_traversal::find(&config, &mut result); assert!(result.is_empty()); } ================================================ FILE: tests/files_option.rs ================================================ use mlc::file_traversal; use mlc::markup::{MarkupFile, MarkupType}; use mlc::Config; use mlc::OptionalConfig; use std::path::{Path, PathBuf}; #[test] fn find_specific_files() { let file1 = Path::new("./README.md").to_path_buf(); let file2 = Path::new("./CHANGELOG.md").to_path_buf(); let config: Config = Config { directory: PathBuf::from("."), optional: OptionalConfig { markup_types: Some(vec![MarkupType::Markdown]), files: Some(vec![file1, file2]), ..Default::default() }, }; let mut result: Vec = Vec::new(); file_traversal::find(&config, &mut result); assert_eq!(result.len(), 2); assert!(result.iter().any(|f| f.path.contains("README.md"))); assert!(result.iter().any(|f| f.path.contains("CHANGELOG.md"))); } #[test] fn find_single_file() { let file1 = Path::new("./README.md").to_path_buf(); let config: Config = Config { directory: PathBuf::from("."), optional: OptionalConfig { markup_types: Some(vec![MarkupType::Markdown]), files: Some(vec![file1]), ..Default::default() }, }; let mut result: Vec = Vec::new(); file_traversal::find(&config, &mut result); assert_eq!(result.len(), 1); assert!(result[0].path.contains("README.md")); } #[test] fn find_files_ignores_non_matching_types() { // Test with a markdown file but only HTML markup type configured let file1 = Path::new("./README.md").to_path_buf(); let config: Config = Config { directory: PathBuf::from("."), optional: OptionalConfig { markup_types: Some(vec![MarkupType::Html]), files: Some(vec![file1]), ..Default::default() }, }; let mut result: Vec = Vec::new(); file_traversal::find(&config, &mut result); // Should not find any files since README.md is markdown, not HTML assert_eq!(result.len(), 0); } #[test] fn find_files_with_ignore_path() { let file1 = Path::new("./README.md").to_path_buf(); let ignore_file = std::fs::canonicalize(Path::new("./README.md")).unwrap(); let config: Config = Config { directory: PathBuf::from("."), optional: OptionalConfig { markup_types: Some(vec![MarkupType::Markdown]), files: Some(vec![file1]), ignore_path: Some(vec![ignore_file]), ..Default::default() }, }; let mut result: Vec = Vec::new(); file_traversal::find(&config, &mut result); // Should be empty because the file is in ignore_path assert_eq!(result.len(), 0); } ================================================ FILE: tests/gitignore_recursive.rs ================================================ use mlc::markup::MarkupType; use mlc::Config; use mlc::OptionalConfig; use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; struct TempDir { path: PathBuf, } impl TempDir { fn new(name: &str) -> Self { let mut path = std::env::temp_dir(); let unique = format!( "mlc_test_{name}_{}_{}", std::process::id(), std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .unwrap_or_default() .as_nanos() ); path.push(unique); fs::create_dir_all(&path).expect("failed to create temp dir"); Self { path } } } impl Drop for TempDir { fn drop(&mut self) { let _ = fs::remove_dir_all(&self.path); } } fn git_available() -> bool { Command::new("git") .arg("--version") .output() .map(|o| o.status.success()) .unwrap_or(false) } fn run_git(repo: &Path, args: &[&str]) { let status = Command::new("git") .current_dir(repo) .args(args) .status() .expect("failed to run git"); assert!(status.success(), "git command failed: git {:?}", args); } #[tokio::test] async fn gitignore_is_recursive_nested_gitignore_is_respected() { if !git_available() { panic!("Failing test: git executable must be available"); } let repo = TempDir::new("gitignore_recursive"); // Create nested structure let docs_dir = repo.path.join("docs"); fs::create_dir_all(&docs_dir).expect("failed to create docs dir"); // Nested .gitignore ignores only ignored.md (not configured in root .gitignore) fs::write(docs_dir.join(".gitignore"), "ignored.md\n") .expect("failed to write nested .gitignore"); // Tracked files (should be checked) fs::write(docs_dir.join("ok_target.md"), "# ok\n").expect("failed to write ok_target.md"); fs::write(docs_dir.join("checked.md"), "[ok](./ok_target.md)\n") .expect("failed to write checked.md"); // Ignored file contains a broken link; if this file is (incorrectly) checked, mlc should fail. fs::write(docs_dir.join("ignored.md"), "[broken](./missing.md)\n") .expect("failed to write ignored.md"); // Initialize git repo and commit tracked files. run_git(&repo.path, &["init"]); run_git(&repo.path, &["config", "user.email", "test@example.com"]); run_git(&repo.path, &["config", "user.name", "mlc test"]); run_git( &repo.path, &[ "add", "docs/.gitignore", "docs/ok_target.md", "docs/checked.md", ], ); run_git(&repo.path, &["commit", "-m", "test fixtures"]); let config = Config { directory: repo.path.clone(), optional: OptionalConfig { debug: None, do_not_warn_for_redirect_to: None, markup_types: Some(vec![MarkupType::Markdown]), offline: Some(true), match_file_extension: None, ignore_links: None, ignore_path: None, root_dir: None, gitignore: Some(true), gituntracked: None, csv_file: None, throttle: None, files: None, http_headers: None, }, }; let result = mlc::run(&config).await; assert!( result.is_ok(), "Expected ok because ignored.md should be ignored by nested .gitignore" ); } ================================================ FILE: tests/helper/mod.rs ================================================ #[cfg(test)] use std::path::{Path, PathBuf}; pub fn benches_dir() -> PathBuf { Path::new(file!()) .parent() .unwrap() .parent() .unwrap() .parent() .unwrap() .join("benches") } ================================================ FILE: tests/markdown_files.rs ================================================ #[cfg(test)] use mlc::link_extractors::link_extractor::find_links; use mlc::markup::{MarkupFile, MarkupType}; #[test] fn no_links() { let path = "./benches/benchmark/markdown/no_links/no_links.md".to_string(); let file = MarkupFile { path, markup_type: MarkupType::Markdown, }; let result = find_links(&file); assert!(result.is_empty()); } #[test] fn some_links() { let path = "./benches/benchmark/markdown/many_links/many_links.md".to_string(); let file = MarkupFile { path, markup_type: MarkupType::Markdown, }; let result = find_links(&file); assert_eq!(result.len(), 12); } ================================================ FILE: tests/symlink_test.rs ================================================ #[cfg(test)] use mlc::file_traversal; use mlc::markup::{MarkupFile, MarkupType}; use mlc::Config; use mlc::OptionalConfig; use std::path::Path; #[test] fn test_symlink_dedupe() { let path = Path::new("./tests/test_files/symlink_test").to_path_buf(); let config: Config = Config { directory: path, optional: OptionalConfig { markup_types: Some(vec![MarkupType::Markdown]), ..Default::default() }, }; let mut result: Vec = Vec::new(); file_traversal::find(&config, &mut result); // Should find only 1 file (not 2) since symlink.md points to original.md assert_eq!( result.len(), 1, "Expected to find only 1 file, but found {}: {:?}", result.len(), result ); } ================================================ FILE: tests/test_files/deep/index.md ================================================ # Deep file Some content here. ================================================ FILE: tests/test_files/many_links.md ================================================ # Many Links [local_file](many_links.md) [folder](./deep) [https_link](MOCK_SERVER_URL_6) [https_link2](MOCK_SERVER_URL_7) [mail](mailto://test.mail@tester.com) [unknown_url](another://foobar) ================================================ FILE: tests/test_files/reference_links.md ================================================ # Contain reference style markdown links [I'm a reference-style link][Arbitrary case-insensitive reference text] [I'm a relative reference to a repository file](./many_links.md) [You can use numbers for reference-style link definitions][1] Or leave it empty and use the [link text itself]. [This is not a valid reference link][2] URLs and URLs in angle brackets will automatically get turned into links. or and sometimes example.com (but not on Github, for example). Some text to show that the reference links can follow later. [arbitrary case-insensitive reference text]: MOCK_SERVER_URL_3 [1]: MOCK_SERVER_URL_4 [link text itself]: MOCK_SERVER_URL_5 ================================================ FILE: tests/test_files/repeat_links.md ================================================ # Chapter 1 [Mock1](MOCK_SERVER_URL_8) [Mock2](MOCK_SERVER_URL_8) [Mock3](MOCK_SERVER_URL_8) ================================================ FILE: tests/test_files/symlink_test/original.md ================================================ # Test File This is a test markdown file. [Valid Link](https://www.rust-lang.org/) ================================================ FILE: tests/throttle.rs ================================================ #[cfg(test)] mod helper; use helper::benches_dir; use mlc::{markup::MarkupType, Config, OptionalConfig}; use std::time::{Duration, Instant}; const TEST_THROTTLE_MS: u32 = 100; const TEST_URLS: u32 = 10; const THROTTLED_TIME_MS: u64 = (TEST_THROTTLE_MS as u64) * ((TEST_URLS as u64) - 1); #[tokio::test] async fn throttle_different_hosts() { let config = Config { directory: benches_dir().join("throttle").join("different_host.md"), optional: OptionalConfig { throttle: Some(TEST_THROTTLE_MS), markup_types: Some(vec![MarkupType::Markdown]), ..Default::default() }, }; let start = Instant::now(); mlc::run(&config).await.unwrap_or(()); let duration = start.elapsed(); assert!(duration < Duration::from_millis(THROTTLED_TIME_MS)) } #[tokio::test] async fn throttle_same_hosts() { let config = Config { directory: benches_dir().join("throttle").join("same_host.md"), optional: OptionalConfig { throttle: Some(TEST_THROTTLE_MS), markup_types: Some(vec![MarkupType::Markdown]), ..Default::default() }, }; let start = Instant::now(); mlc::run(&config).await.unwrap_or(()); let duration = start.elapsed(); assert!(duration > Duration::from_millis(THROTTLED_TIME_MS)) } #[tokio::test] async fn throttle_same_ip() { let config = Config { directory: benches_dir().join("throttle").join("same_ip.md"), optional: OptionalConfig { throttle: Some(TEST_THROTTLE_MS), markup_types: Some(vec![MarkupType::Markdown]), ..Default::default() }, }; let start = Instant::now(); mlc::run(&config).await.unwrap_or(()); let duration = start.elapsed(); assert!(duration > Duration::from_millis(THROTTLED_TIME_MS)) }