Repository: Y2Z/monolith Branch: master Commit: 8702e66fed5b Files: 103 Total size: 338.1 KB Directory structure: gitextract_1w8c2ho6/ ├── .actor/ │ ├── Dockerfile │ ├── README.md │ ├── actor.json │ ├── bin/ │ │ └── actor.sh │ ├── dataset_schema.json │ └── input_schema.json ├── .dockerignore ├── .github/ │ ├── FUNDING.yml │ └── workflows/ │ ├── build_gnu_linux.yml │ ├── build_macos.yml │ ├── build_windows.yml │ ├── cd.yml │ ├── ci-netbsd.yml │ └── ci.yml ├── .gitignore ├── Cargo.toml ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── assets/ │ └── icon/ │ └── icon.blend ├── dist/ │ └── run-in-container.sh ├── monolith.nuspec ├── snap/ │ └── snapcraft.yaml ├── src/ │ ├── cache.rs │ ├── cookies.rs │ ├── core.rs │ ├── css.rs │ ├── gui.rs │ ├── html.rs │ ├── js.rs │ ├── lib.rs │ ├── main.rs │ ├── session.rs │ └── url.rs └── tests/ ├── _data_/ │ ├── basic/ │ │ ├── local-file.html │ │ ├── local-script.js │ │ └── local-style.css │ ├── css/ │ │ ├── index.html │ │ └── style.css │ ├── import-css-via-data-url/ │ │ ├── index.html │ │ └── style.css │ ├── integrity/ │ │ ├── index.html │ │ ├── script.js │ │ └── style.css │ ├── noscript/ │ │ ├── index.html │ │ ├── nested.html │ │ └── script.html │ ├── svg/ │ │ ├── image.html │ │ ├── index.html │ │ └── svg.html │ └── unusual_encodings/ │ ├── gb2312.html │ └── iso-8859-1.html ├── cli/ │ ├── base_url.rs │ ├── basic.rs │ ├── data_url.rs │ ├── local_files.rs │ ├── mod.rs │ ├── noscript.rs │ └── unusual_encodings.rs ├── cookies/ │ ├── cookie/ │ │ ├── is_expired.rs │ │ ├── matches_url.rs │ │ └── mod.rs │ ├── mod.rs │ └── parse_cookie_file_contents.rs ├── core/ │ ├── detect_media_type.rs │ ├── format_output_path.rs │ ├── mod.rs │ ├── options.rs │ └── parse_content_type.rs ├── css/ │ ├── embed_css.rs │ ├── is_image_url_prop.rs │ └── mod.rs ├── html/ │ ├── add_favicon.rs │ ├── check_integrity.rs │ ├── compose_csp.rs │ ├── create_metadata_tag.rs │ ├── embed_srcset.rs │ ├── get_base_url.rs │ ├── get_charset.rs │ ├── get_node_attr.rs │ ├── get_node_name.rs │ ├── has_favicon.rs │ ├── is_favicon.rs │ ├── mod.rs │ ├── parse_link_type.rs │ ├── parse_srcset.rs │ ├── serialize_document.rs │ ├── set_node_attr.rs │ └── walk.rs ├── js/ │ ├── attr_is_event_handler.rs │ └── mod.rs ├── mod.rs ├── session/ │ ├── mod.rs │ └── retrieve_asset.rs └── url/ ├── clean_url.rs ├── create_data_url.rs ├── domain_is_within_domain.rs ├── get_referer_url.rs ├── is_url_and_has_protocol.rs ├── mod.rs ├── parse_data_url.rs └── resolve_url.rs ================================================ FILE CONTENTS ================================================ ================================================ FILE: .actor/Dockerfile ================================================ FROM node:alpine RUN apk --no-cache add curl bash git monolith jq RUN npm -g install apify-cli COPY .actor .actor CMD ./.actor/bin/actor.sh ================================================ FILE: .actor/README.md ================================================ # Monolith Actor on Apify [![Monolith Actor](https://apify.com/actor-badge?actor=snshn/monolith)](https://apify.com/snshn/monolith?fpr=snshn) This Actor wraps [Monolith](https://crates.io/crates/monolith) to crawl a web page URL and bundle the entire content in a single HTML file, without installing and running the tool locally. ## What are Actors? [Actors](https://docs.apify.com/platform/actors?fpr=snshn) are serverless microservices running on the [Apify Platform](https://apify.com/?fpr=snshn). They are based on the [Actor SDK](https://docs.apify.com/sdk/js?fpr=snshn) and can be found in the [Apify Store](https://apify.com/store?fpr=snshn). Learn more about Actors in the [Apify Whitepaper](https://whitepaper.actor?fpr=snshn). ## Usage ### Apify Console 1. Go to the Apify Actor page 2. Click "Run" 3. In the input form, fill in **URL(s)** to crawl and bundle 4. The Actor will run and : - save the bundled HTML files in the run's default key-value store - save the links to the KVS with original URL and monolith process exit status to the dataset ### Apify CLI ```bash apify call snshn/monolith --input='{ "urls": ["https://news.ycombinator.com/"] }' ``` ### Using Apify API ```bash curl --request POST \ --url "https://api.apify.com/v2/acts/snshn~monolith/run" \ --header 'Content-Type: application/json' \ --header 'Authorization: Bearer YOUR_API_TOKEN' \ --data '{ "urls": ["https://news.ycombinator.com/"], } }' ``` ## Input Parameters The Actor accepts a JSON schema with the following structure: | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `urls` | array | Yes | - | List of URLs to monolith | | `urls[]` | string | Yes | - | URL to monolith | ### Example Input ```json { "urls": ["https://news.ycombinator.com/"], } ``` ## Output The Actor provides three types of outputs: ### Dataset Record | Field | Type | Required | Description | |-------|------|----------|-------------| | `url` | string | Yes | A link to the Apify key-value store object where the monolithic html is available for download | | `kvsUrl` | array | Yes | Exit status of the monolith process | | `status`| number | No | The original start URL for the monolith process | ### Example Dataset Item (JSON) ```json { "url": "https://news.ycombinator.com/", "kvsUrl": "https://api.apify.com/v2/key-value-stores/JRFLHRy9DOtdKGpdm/records/https___news.ycombinator.com_", "status": "0" } ``` ## Performance & Resources - **Memory Requirements**: - Minimum: 4168 MB RAM - **Processing Time**: - 30s per complex page like [bbc.co.uk](https://bbc.co.uk) For more help, check the [Monolith Project documentation](https://github.com/Y2Z/monolith) or raise an issue in the [Actor page detail](https://apify.com/snshn/monolith?fpr=snshn) on Apify. ================================================ FILE: .actor/actor.json ================================================ { "actorSpecification": 1, "name": "monolith", "version": "0.0", "buildTag": "latest", "environmentVariables": {}, "dockerFile": "./Dockerfile", "dockerContext": "../", "input": "./input_schema.json", "storages": { "dataset": "./dataset_schema.json" } } ================================================ FILE: .actor/bin/actor.sh ================================================ #!/bin/bash #pwd #find ./storage apify actor:get-input > /dev/null INPUT=`apify actor:get-input | jq -r .urls[] | xargs echo` echo "INPUT: $INPUT" for url in $INPUT; do # support for local usage # sanitize url to a safe *nix filename - replace nonalfanumerical characters # https://stackoverflow.com/questions/9847288/is-it-possible-to-use-in-a-filename # https://serverfault.com/questions/348482/how-to-remove-invalid-characters-from-filenames safe_filename=`echo $url | sed -e 's/[^A-Za-z0-9._-]/_/g'` echo "Monolith-ing $url to key $safe_filename" monolith $url | apify actor:set-value "$safe_filename" --contentType=text/html kvs_url="https://api.apify.com/v2/key-value-stores/${APIFY_DEFAULT_KEY_VALUE_STORE_ID}/records/${safe_filename}" result=$? echo "Pushing result item to the datastore" echo "{\"url\":\"${url}\",\"status\":\"${result}\", \"kvsUrl\":\"${kvs_url}\"}" | apify actor:push-data done exit 0 ================================================ FILE: .actor/dataset_schema.json ================================================ { "actorSpecification": 1, "fields":{ "title": "Sherlock actor input", "description": "This is actor input schema", "type": "object", "schemaVersion": 1, "properties": { "kvsUrl": { "title": "Object URL", "type": "string", "description": "A link to the Apify key-value store object where the monolithic html is available" }, "status": { "title": "Exist status", "type": "string", "description": "Exit status of the monolith process" }, "url": { "title": "URL", "type": "string", "description": "The original start URL for the monolith process " } }, "required": [ "kvsUrl", "status", "url" ] }, "views": { "overview": { "title": "Overview", "transformation": { "fields": [ "url", "kvsUrl", "status" ], }, "display": { "component": "table", "url": { "label": "Page URL" }, "kvsUrl": { "label": "KVS URL" }, "status": { "label": "Status" } } } } } ================================================ FILE: .actor/input_schema.json ================================================ { "title": "Sherlock actor input", "description": "This is actor input schema", "type": "object", "schemaVersion": 1, "properties": { "urls": { "title": "Urls", "type": "array", "description": "A list of urls of pages to bundle into single HTML document", "editor": "stringList", "prefill": ["http://www.google.com"] } }, "required": [ "urls" ] } ================================================ FILE: .dockerignore ================================================ /target/ ================================================ FILE: .github/FUNDING.yml ================================================ # These are supported funding model platforms github: snshn ================================================ FILE: .github/workflows/build_gnu_linux.yml ================================================ name: GNU/Linux on: push: branches: [ master ] paths-ignore: - 'assets/' - 'dist/' - 'snap/' - 'Dockerfile' - 'LICENSE' - 'Makefile' - 'monolith.nuspec' - 'README.md' jobs: build: strategy: matrix: os: - ubuntu-latest rust: - stable runs-on: ${{ matrix.os }} steps: - run: git config --global core.autocrlf false - uses: actions/checkout@v2 - name: Build run: cargo build --all --locked --verbose ================================================ FILE: .github/workflows/build_macos.yml ================================================ name: macOS on: push: branches: [ master ] paths-ignore: - 'assets/' - 'dist/' - 'snap/' - 'Dockerfile' - 'LICENSE' - 'Makefile' - 'monolith.nuspec' - 'README.md' jobs: build: strategy: matrix: os: - macos-latest rust: - stable runs-on: ${{ matrix.os }} steps: - run: git config --global core.autocrlf false - uses: actions/checkout@v2 - name: Build run: cargo build --all --locked --verbose ================================================ FILE: .github/workflows/build_windows.yml ================================================ name: Windows on: push: branches: [ master ] paths-ignore: - 'assets/' - 'dist/' - 'snap/' - 'Dockerfile' - 'LICENSE' - 'Makefile' - 'monolith.nuspec' - 'README.md' jobs: build: strategy: matrix: os: - windows-latest rust: - stable runs-on: ${{ matrix.os }} steps: - run: git config --global core.autocrlf false - uses: actions/checkout@v2 - name: Build run: cargo build --all --locked --verbose ================================================ FILE: .github/workflows/cd.yml ================================================ # CD GitHub Actions workflow for monolith name: CD on: release: types: - created jobs: gnu_linux_aarch64: runs-on: ubuntu-20.04 steps: - name: Checkout the repository uses: actions/checkout@v4 - name: Prepare cross-platform environment run: | sudo mkdir /cross-build sudo touch /etc/apt/sources.list.d/arm64.list echo "deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ focal main" | sudo tee -a /etc/apt/sources.list.d/arm64.list sudo apt-get update sudo apt-get install -y gcc-aarch64-linux-gnu libc6-arm64-cross libc6-dev-arm64-cross sudo apt-get download libssl1.1:arm64 libssl-dev:arm64 sudo dpkg -x libssl1.1*.deb /cross-build sudo dpkg -x libssl-dev*.deb /cross-build rustup target add aarch64-unknown-linux-gnu echo "C_INCLUDE_PATH=/cross-build/usr/include" >> $GITHUB_ENV echo "OPENSSL_INCLUDE_DIR=/cross-build/usr/include/aarch64-linux-gnu" >> $GITHUB_ENV echo "OPENSSL_LIB_DIR=/cross-build/usr/lib/aarch64-linux-gnu" >> $GITHUB_ENV echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV echo "RUSTFLAGS=-C linker=aarch64-linux-gnu-gcc -L/usr/aarch64-linux-gnu/lib -L/cross-build/usr/lib/aarch64-linux-gnu" >> $GITHUB_ENV - name: Build the executable run: cargo build --release --target=aarch64-unknown-linux-gnu --no-default-features --features cli - name: Attach artifact to the release uses: Shopify/upload-to-release@v2.0.0 with: name: monolith-gnu-linux-aarch64 path: target/aarch64-unknown-linux-gnu/release/monolith repo-token: ${{ secrets.GITHUB_TOKEN }} gnu_linux_armhf: runs-on: ubuntu-20.04 steps: - name: Checkout the repository uses: actions/checkout@v4 - name: Prepare cross-platform environment run: | sudo mkdir /cross-build sudo touch /etc/apt/sources.list.d/armhf.list echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ focal main" | sudo tee -a /etc/apt/sources.list.d/armhf.list sudo apt-get update sudo apt-get install -y gcc-arm-linux-gnueabihf libc6-armhf-cross libc6-dev-armhf-cross sudo apt-get download libssl1.1:armhf libssl-dev:armhf sudo dpkg -x libssl1.1*.deb /cross-build sudo dpkg -x libssl-dev*.deb /cross-build rustup target add arm-unknown-linux-gnueabihf echo "C_INCLUDE_PATH=/cross-build/usr/include" >> $GITHUB_ENV echo "OPENSSL_INCLUDE_DIR=/cross-build/usr/include/arm-linux-gnueabihf" >> $GITHUB_ENV echo "OPENSSL_LIB_DIR=/cross-build/usr/lib/arm-linux-gnueabihf" >> $GITHUB_ENV echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV echo "RUSTFLAGS=-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build/usr/lib/arm-linux-gnueabihf -L/cross-build/lib/arm-linux-gnueabihf" >> $GITHUB_ENV - name: Build the executable run: cargo build --release --target=arm-unknown-linux-gnueabihf --no-default-features --features cli - name: Attach artifact to the release uses: Shopify/upload-to-release@v2.0.0 with: name: monolith-gnu-linux-armhf path: target/arm-unknown-linux-gnueabihf/release/monolith repo-token: ${{ secrets.GITHUB_TOKEN }} gnu_linux_x86_64: runs-on: ubuntu-20.04 steps: - name: Checkout the repository uses: actions/checkout@v4 - name: Build the executable run: cargo build --release - uses: Shopify/upload-to-release@v2.0.0 with: name: monolith-gnu-linux-x86_64 path: target/release/monolith repo-token: ${{ secrets.GITHUB_TOKEN }} windows: runs-on: windows-2019 steps: - run: git config --global core.autocrlf false - name: Checkout the repository uses: actions/checkout@v4 - name: Build the executable run: cargo build --release - uses: Shopify/upload-to-release@v2.0.0 with: name: monolith.exe path: target\release\monolith.exe repo-token: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .github/workflows/ci-netbsd.yml ================================================ # CI NetBSD GitHub Actions workflow for monolith name: CI (NetBSD) on: pull_request: branches: [ master ] paths-ignore: - 'assets/' - 'dist/' - 'snap/' - 'Dockerfile' - 'LICENSE' - 'Makefile' - 'monolith.nuspec' - 'README.md' jobs: build_and_test: runs-on: ubuntu-latest name: Build and test (netbsd) steps: - name: "Checkout repository" uses: actions/checkout@v4 - name: Test in NetBSD uses: vmactions/netbsd-vm@v1 with: usesh: true prepare: | /usr/sbin/pkg_add cwrappers gmake mktools pkgconf rust run: | cargo build --all --locked --verbose --no-default-features --features cli cargo test --all --locked --verbose --no-default-features --features cli ================================================ FILE: .github/workflows/ci.yml ================================================ # CI GitHub Actions workflow for monolith name: CI on: pull_request: branches: [ master ] paths-ignore: - 'assets/' - 'dist/' - 'snap/' - 'Dockerfile' - 'LICENSE' - 'Makefile' - 'monolith.nuspec' - 'README.md' jobs: build_and_test: name: Build and test strategy: matrix: os: - ubuntu-latest - macos-latest - windows-latest runs-on: ${{ matrix.os }} steps: - run: git config --global core.autocrlf false - name: "Checkout repository" uses: actions/checkout@v4 - name: Build run: cargo build --all --locked --verbose - name: Run tests run: cargo test --all --locked --verbose - name: Check code formatting run: | rustup component add rustfmt cargo fmt --all -- --check ================================================ FILE: .gitignore ================================================ # Generated by Cargo # will have compiled files and executables /target/ # These are backup files generated by rustfmt **/*.rs.bk # Added by Apify CLI storage node_modules .venv ================================================ FILE: Cargo.toml ================================================ [package] name = "monolith" version = "2.11.0" authors = [ "Sunshine ", "Mahdi Robatipoor ", "Emmanuel Delaborde ", "Emi Simpson ", "rhysd ", "Andriy Rakhnin ", ] edition = "2021" description = "CLI tool and library for saving web pages as a single HTML file" homepage = "https://github.com/Y2Z/monolith" repository = "https://github.com/Y2Z/monolith" readme = "README.md" keywords = ["web", "http", "html", "download", "command-line"] categories = ["command-line-utilities", "web-programming"] include = ["src/*.rs", "Cargo.toml"] license = "CC0-1.0" [dependencies] atty = "=0.2.14" # Used for highlighting network errors base64 = "=0.22.1" # Used for integrity attributes chrono = "=0.4.41" # Used for formatting timestamps clap = { version = "=4.5.37", features = [ "derive", ], optional = true } # Used for processing CLI arguments cssparser = "=0.35.0" # Used for dealing with CSS directories = { version = "=6.0.0", optional = true } # Used for GUI druid = { version = "=0.8.3", optional = true } # Used for GUI encoding_rs = "=0.8.35" # Used for parsing and converting document charsets html5ever = "=0.29.1" # Used for all things DOM markup5ever_rcdom = "=0.5.0-unofficial" # Used for manipulating DOM percent-encoding = "=2.3.1" # Used for encoding URLs sha2 = "=0.10.9" # Used for calculating checksums during integrity checks redb = "=2.4.0" # Used for on-disk caching of remote assets tempfile = { version = "=3.19.1", optional = true } # Used for on-disk caching of remote assets url = "=2.5.4" # Used for parsing URLs openssl = "=0.10.72" # Used for static linking of the OpenSSL library # Used for unwrapping NOSCRIPT [dependencies.regex] version = "=1.11.1" default-features = false features = ["std", "perf-dfa", "unicode-perl"] # Used for making network requests [dependencies.reqwest] version = "=0.12.15" default-features = false features = ["default-tls", "blocking", "gzip", "brotli", "deflate"] [dev-dependencies] assert_cmd = "=2.0.17" [features] default = ["cli", "vendored-openssl"] cli = ["clap", "tempfile"] # Build a CLI tool that includes main() function gui = [ "directories", "druid", "tempfile", ] # Build a GUI executable that includes main() function vendored-openssl = [ "openssl/vendored", ] # Compile and statically link a copy of OpenSSL [lib] name = "monolith" path = "src/lib.rs" [[bin]] name = "monolith" path = "src/main.rs" required-features = ["cli"] [[bin]] name = "monolith-gui" path = "src/gui.rs" required-features = ["gui"] ================================================ FILE: Dockerfile ================================================ FROM clux/muslrust:stable as builder RUN curl -L -o monolith.tar.gz $(curl -s https://api.github.com/repos/y2z/monolith/releases/latest \ | grep "tarball_url.*\"," \ | cut -d '"' -f 4) RUN tar xfz monolith.tar.gz \ && mv Y2Z-monolith-* monolith \ && rm monolith.tar.gz WORKDIR monolith/ RUN make install FROM alpine RUN apk update && \ apk add --no-cache openssl && \ rm -rf "/var/cache/apk/*" COPY --from=builder /root/.cargo/bin/monolith /usr/bin/monolith WORKDIR /tmp ENTRYPOINT ["/usr/bin/monolith"] ================================================ FILE: LICENSE ================================================ Creative Commons Legal Code CC0 1.0 Universal CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER. Statement of Purpose The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work"). Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others. For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights. 1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following: i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work; ii. moral rights retained by the original author(s) and/or performer(s); iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work; iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below; v. rights protecting the extraction, dissemination, use and reuse of data in a Work; vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof. 2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose. 3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose. 4. Limitations and Disclaimers. a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document. b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law. c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work. d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work. ================================================ FILE: Makefile ================================================ # Makefile for monolith all: build build-gui .PHONY: all build: @cargo build --locked .PHONY: build build-gui: @cargo build --locked --bin monolith-gui --features="gui" .PHONY: build_gui clean: @cargo clean .PHONY: clean format: @cargo fmt --all -- .PHONY: format format-check: @cargo fmt --all -- --check .PHONY: format install: @cargo install --force --locked --path . .PHONY: install lint: @cargo clippy --fix --allow-dirty --allow-staged # @cargo fix --allow-dirty --allow-staged .PHONY: lint lint-check: @cargo clippy -- .PHONY: lint_check test: build @cargo test --locked .PHONY: test uninstall: @cargo uninstall .PHONY: uninstall update-lock-file: @cargo update .PHONY: clean ================================================ FILE: README.md ================================================ [![monolith build status on GNU/Linux](https://github.com/Y2Z/monolith/workflows/GNU%2FLinux/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AGNU%2FLinux) [![monolith build status on macOS](https://github.com/Y2Z/monolith/workflows/macOS/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AmacOS) [![monolith build status on Windows](https://github.com/Y2Z/monolith/workflows/Windows/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows) [![Monolith Actor on Apify](https://apify.com/actor-badge?actor=snshn/monolith)](https://apify.com/snshn/monolith?fpr=snshn) ``` _____ _____________ __________ ___________________ ___ | \ / \ | | | | | | | \/ __ \| __ | | ___ ___ |__| | | | | | | | | | | | | | | |\ /| |__| |__| |___| | | | | __ | | | \__/ | |\ | | | | | | | |___| |__________| \___________________| |___| |___| |___| ``` A data hoarder’s dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive. Unlike the conventional “Save page as”, `monolith` not only saves the target document, it embeds CSS, image, and JavaScript assets **all at once**, producing a single HTML5 document that is a joy to store and share. If compared to saving websites with `wget -mpk`, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available. --------------------------------------------------- ## Installation #### Using [Cargo](https://crates.io/crates/monolith) (cross-platform) ```console cargo install monolith ``` #### Via [Homebrew](https://formulae.brew.sh/formula/monolith) (macOS and GNU/Linux) ```console brew install monolith ``` #### Via [Chocolatey](https://community.chocolatey.org/packages/monolith) (Windows) ```console choco install monolith ``` #### Via [Scoop](https://scoop.sh/#/apps?q=monolith) (Windows) ```console scoop install main/monolith ``` #### Via [Winget](https://winstall.app/apps/Y2Z.Monolith) (Windows) ```console winget install --id=Y2Z.Monolith -e ``` #### Via [MacPorts](https://ports.macports.org/port/monolith/summary) (macOS) ```console sudo port install monolith ``` #### Using [Snapcraft](https://snapcraft.io/monolith) (GNU/Linux) ```console snap install monolith ``` #### Using [Guix](https://packages.guix.gnu.org/packages/monolith) (GNU/Linux) ```console guix install monolith ``` #### Using [NixPkgs](https://search.nixos.org/packages?channel=unstable&show=monolith&query=monolith) ```console nix-env -iA nixpkgs.monolith ``` #### Using [Flox](https://flox.dev) ```console flox install monolith ``` #### Using [Pacman](https://archlinux.org/packages/extra/x86_64/monolith) (Arch Linux) ```console pacman -S monolith ``` #### Using [aports](https://pkgs.alpinelinux.org/packages?name=monolith) (Alpine Linux) ```console apk add monolith ``` #### Using [XBPS Package Manager](https://voidlinux.org/packages/?q=monolith) (Void Linux) ```console xbps-install -S monolith ``` #### Using [FreeBSD packages](https://svnweb.freebsd.org/ports/head/www/monolith/) (FreeBSD) ```console pkg install monolith ``` #### Using [FreeBSD ports](https://www.freshports.org/www/monolith/) (FreeBSD) ```console cd /usr/ports/www/monolith/ make install clean ``` #### Using [pkgsrc](https://pkgsrc.se/www/monolith) (NetBSD, OpenBSD, Haiku, etc) ```console cd /usr/pkgsrc/www/monolith make install clean ``` #### Using [containers](https://www.docker.com/) ```console docker build -t y2z/monolith . sudo install -b dist/run-in-container.sh /usr/local/bin/monolith ``` #### From [source](https://github.com/Y2Z/monolith) Dependencies: `libssl`, `cargo`
Install cargo (GNU/Linux) Check if cargo is installed ```console cargo -v ``` If cargo is not already installed, install and add it to your existing ```$PATH``` (paraphrasing the [official installation instructions](https://doc.rust-lang.org/cargo/getting-started/installation.html)): ```console curl https://sh.rustup.rs -sSf | sh . "$HOME/.cargo/env" ``` Proceed with installing from source:
```console git clone https://github.com/Y2Z/monolith.git cd monolith make install ``` #### Using [pre-built binaries](https://github.com/Y2Z/monolith/releases) (Windows, ARM-based devices, etc) Every release contains pre-built binaries for Windows, GNU/Linux, as well as platforms with non-standard CPU architecture. --------------------------------------------------- ## Usage ```console monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o %title%.%timestamp%.html ``` ```console cat some-site-page.html | monolith -aIiFfcMv -b https://some.site/ - > some-site-page-with-assets.html ``` --------------------------------------------------- ## Options - `-a`: Exclude audio sources - `-b`: Use `custom base URL` - `-B`: Forbid retrieving assets from specified domain(s) - `-c`: Exclude CSS - `-C`: Read cookies from `file` - `-d`: Allow retrieving assets only from specified `domain(s)` - `-e`: Ignore network errors - `-E`: Save document using `custom encoding` - `-f`: Omit frames - `-F`: Exclude web fonts - `-h`: Print help information - `-i`: Remove images - `-I`: Isolate the document - `-j`: Exclude JavaScript - `-k`: Accept invalid X.509 (TLS) certificates - `-m`: Output in MHTML format instead of HTML - `-M`: Don't add timestamp and URL information - `-n`: Extract contents of NOSCRIPT elements - `-o`: Write output to `file` (use “-” for STDOUT) - `-q`: Be quiet - `-t`: Adjust `network request timeout` - `-u`: Provide `custom User-Agent` - `-v`: Exclude videos - `-V`: Print version number --------------------------------------------------- ## Whitelisting and blacklisting domains Options `-d` and `-B` provide control over what domains can be used to retrieve assets from, e.g.: ```console monolith -I -d example.com -d www.example.com https://example.com -o example-only.html ``` ```console monolith -I -B -d .googleusercontent.com -d googleanalytics.com -d .google.com https://example.com -o example-no-ads.html ``` --------------------------------------------------- ## Dynamic content Monolith doesn't feature a JavaScript engine, hence websites that retrieve and display data after initial load may require usage of additional tools. For example, Chromium (Chrome) can be used to act as a pre-processor for such pages: ```console chromium --headless --window-size=1920,1080 --run-all-compositor-stages-before-draw --virtual-time-budget=9000 --incognito --dump-dom https://github.com | monolith - -I -b https://github.com -o github.html ``` --------------------------------------------------- ## Authentication ```console monolith https://username:password@example.com -o example-basic-auth.html ``` --------------------------------------------------- ## Proxies Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables. --------------------------------------------------- ### Apify Actor Usage Run Monolith Actor on Apify You can run Monolith in the cloud without installation using the [Monolith Actor](https://apify.com/snshn/monolith?fpr=snshn) on [Apify](https://apify.com?fpr=snshn) free of charge. ``` bash echo '{"urls": ["https://news.ycombinator.com/"]}' | apify call -so snshn/monolith [{ "url": "https://news.ycombinator.com/", "status": "0", "kvsUrl": "https://api.apify.com/v2/key-value-stores/of9xNgvpon4elPLbc/records/https___news.ycombinator.com_" }] ``` Read more about the [Monolith Actor](.actor/README.md), including how to use it via the Apify UI, API and CLI without installation. --------------------------------------------------- ## Contributing Please open an issue if something is wrong, that helps make this project better. --------------------------------------------------- ## License To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty. ================================================ FILE: dist/run-in-container.sh ================================================ #!/bin/sh DOCKER=docker if which podman 2>&1 > /dev/null; then DOCKER=podman fi ORG_NAME=y2z PROG_NAME=monolith $DOCKER run --rm $ORG_NAME/$PROG_NAME "$@" ================================================ FILE: monolith.nuspec ================================================ monolith 2.8.1 Monolith Sunshine, Mahdi Robatipoor, Emmanuel Delaborde, Emi Simpson, rhysd https://github.com/Y2Z/monolith https://raw.githubusercontent.com/Y2Z/monolith/master/assets/icon/icon.png https://raw.githubusercontent.com/Y2Z/monolith/master/LICENSE false CLI tool for saving complete web pages as a single HTML file A data hoarder’s dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive. Unlike the conventional “Save page as”, monolith not only saves the target document, it embeds CSS, image, and JavaScript assets all at once, producing a single HTML5 document that is a joy to store and share. If compared to saving websites using wget, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available. Public Domain en-US scraping archiving https://github.com/Y2Z/monolith/blob/master/README.md ================================================ FILE: snap/snapcraft.yaml ================================================ name: monolith base: core18 # Version data defined inside the monolith part below adopt-info: monolith summary: Monolith - Save HTML pages with ease description: | A data hoarder's dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive. Unlike conventional "Save page as…", monolith not only saves the target document, it embeds CSS, image, and JavaScript assets all at once, producing a single HTML5 document that is a joy to store and share. If compared to saving websites with wget -mpk, monolith embeds all assets as data URLs and therefore displays the saved page exactly the same, being completely separated from the Internet. confinement: strict architectures: - build-on: amd64 - build-on: arm64 - build-on: armhf - build-on: i386 - build-on: ppc64el - build-on: s390x parts: monolith: plugin: rust source: . build-packages: - libssl-dev - pkg-config override-pull: | snapcraftctl pull # Determine the current tag last_committed_tag="$(git describe --tags --abbrev=0)" last_committed_tag_ver="$(echo ${last_committed_tag} | sed 's/v//')" # Determine the most recent version in the beta channel in the Snap Store last_released_tag="$(snap info $SNAPCRAFT_PROJECT_NAME | awk '$1 == "beta:" { print $2 }')" # If the latest tag from the upstream project has not been released to # beta, build that tag instead of master. if [ "${last_committed_tag_ver}" != "${last_released_tag}" ]; then git fetch git checkout "${last_committed_tag}" fi # set version number of the snap based on what we did above snapcraftctl set-version $(git describe --tags --abbrev=0) apps: monolith: command: monolith plugs: - home - network - removable-media ================================================ FILE: src/cache.rs ================================================ use std::collections::HashMap; use std::fs::File; use std::io::{BufWriter, Write}; use std::path::Path; use redb::{Database, Error, TableDefinition}; pub struct CacheMetadataItem { data: Option>, // Asset's blob; used for caching small files or if on-disk database isn't utilized media_type: Option, // MIME-type, things like "text/plain", "image/png"... charset: Option, // "UTF-8", "UTF-16"... } // #[derive(Debug)] pub struct Cache { min_file_size: usize, // Only use database for assets larger than this size (in bytes), otherwise keep them in RAM metadata: HashMap, // Dictionary of metadata (and occasionally data [mostly for very small files]) db: Option, // Pointer to database instance; None if not yet initialized or if failed to initialize db_ok: Option, // None by default, Some(true) if was able to initialize database, Some (false) if an error occurred db_file_path: Option, // Filesystem path to file used for storing database } const FILE_WRITE_BUF_LEN: usize = 1024 * 100; // On-disk cache file write buffer size (in bytes) const TABLE: TableDefinition<&str, &[u8]> = TableDefinition::new("_"); impl Cache { pub fn new(min_file_size: usize, db_file_path: Option) -> Cache { let mut cache = Cache { min_file_size, metadata: HashMap::new(), db: None, db_ok: None, db_file_path: db_file_path.clone(), }; if db_file_path.is_some() { // Attempt to initialize on-disk database match Database::create(Path::new(&db_file_path.unwrap())) { Ok(db) => { cache.db = Some(db); cache.db_ok = Some(true); cache } Err(..) => { cache.db_ok = Some(false); cache } } } else { cache.db_ok = Some(false); cache } } pub fn set(&mut self, key: &str, data: &Vec, media_type: String, charset: String) { let mut cache_metadata_item: CacheMetadataItem = CacheMetadataItem { data: if self.db_ok.is_some() && self.db_ok.unwrap() { None } else { Some(data.to_owned().to_vec()) }, media_type: Some(media_type.to_owned()), charset: Some(charset), }; if (self.db_ok.is_none() || !self.db_ok.unwrap()) || data.len() <= self.min_file_size { cache_metadata_item.data = Some(data.to_owned().to_vec()); } else { match self.db.as_ref().unwrap().begin_write() { Ok(write_txn) => { { let mut table = write_txn.open_table(TABLE).unwrap(); table.insert(key, &*data.to_owned()).unwrap(); } write_txn.commit().unwrap(); } Err(..) => { // Fall back to caching everything in memory cache_metadata_item.data = Some(data.to_owned().to_vec()); } } } self.metadata .insert((*key).to_string(), cache_metadata_item); } pub fn get(&self, key: &str) -> Result<(Vec, String, String), Error> { if self.metadata.contains_key(key) { let metadata_item = self.metadata.get(key).unwrap(); if metadata_item.data.is_some() { return Ok(( metadata_item.data.as_ref().unwrap().to_vec(), metadata_item.media_type.as_ref().expect("").to_string(), metadata_item.charset.as_ref().expect("").to_string(), )); } else if self.db_ok.is_some() && self.db_ok.unwrap() { let read_txn = self.db.as_ref().unwrap().begin_read()?; let table = read_txn.open_table(TABLE)?; let data = table.get(key)?; let bytes = data.unwrap(); return Ok(( bytes.value().to_vec(), metadata_item.media_type.as_ref().expect("").to_string(), metadata_item.charset.as_ref().expect("").to_string(), )); } } Err(Error::TransactionInProgress) // XXX } pub fn contains_key(&self, key: &str) -> bool { self.metadata.contains_key(key) } pub fn destroy_database_file(&mut self) { if self.db_ok.is_none() || !self.db_ok.unwrap() { return; } // Destroy database instance (prevents writes into file) self.db = None; self.db_ok = Some(false); // Wipe database file if let Some(db_file_path) = self.db_file_path.to_owned() { // Overwrite file with zeroes if let Ok(temp_file) = File::options() .read(true) .write(true) .open(db_file_path.clone()) { let mut buffer = [0; FILE_WRITE_BUF_LEN]; let mut remaining_size: usize = temp_file.metadata().unwrap().len() as usize; let mut writer = BufWriter::new(temp_file); while remaining_size > 0 { let bytes_to_write: usize = if remaining_size < FILE_WRITE_BUF_LEN { remaining_size } else { FILE_WRITE_BUF_LEN }; let buffer = &mut buffer[..bytes_to_write]; writer.write(buffer).unwrap(); remaining_size -= bytes_to_write; } } } } } ================================================ FILE: src/cookies.rs ================================================ use std::time::{SystemTime, UNIX_EPOCH}; use crate::url::Url; pub struct Cookie { pub domain: String, pub include_subdomains: bool, pub path: String, pub https_only: bool, pub expires: u64, pub name: String, pub value: String, } #[derive(Debug)] pub enum CookieFileContentsParseError { InvalidHeader, } impl Cookie { pub fn is_expired(&self) -> bool { if self.expires == 0 { return false; // Session, never expires } let start = SystemTime::now(); let since_the_epoch = start .duration_since(UNIX_EPOCH) .expect("Time went backwards"); self.expires < since_the_epoch.as_secs() } pub fn matches_url(&self, url: &str) -> bool { match Url::parse(url) { Ok(url) => { // Check protocol scheme match url.scheme() { "http" => { if self.https_only { return false; } } "https" => {} _ => { // Should never match URLs of protocols other than HTTP(S) return false; } } // Check host if let Some(url_host) = url.host_str() { if self.domain.starts_with(".") && self.include_subdomains { if !url_host.to_lowercase().ends_with(&self.domain) && !url_host .eq_ignore_ascii_case(&self.domain[1..self.domain.len() - 1]) { return false; } } else if !url_host.eq_ignore_ascii_case(&self.domain) { return false; } } else { return false; } // Check path if !url.path().eq_ignore_ascii_case(&self.path) && !url.path().starts_with(&self.path) { return false; } } Err(_) => { return false; } } true } } pub fn parse_cookie_file_contents( cookie_file_contents: &str, ) -> Result, CookieFileContentsParseError> { let mut cookies: Vec = Vec::new(); for (i, line) in cookie_file_contents.lines().enumerate() { if i == 0 { // Parsing first line if !line.eq("# HTTP Cookie File") && !line.eq("# Netscape HTTP Cookie File") { return Err(CookieFileContentsParseError::InvalidHeader); } } else { // Ignore comment lines if line.starts_with("#") { continue; } // Attempt to parse values let mut fields = line.split("\t"); if fields.clone().count() != 7 { continue; } cookies.push(Cookie { domain: fields.next().unwrap().to_string().to_lowercase(), include_subdomains: fields.next().unwrap() == "TRUE", path: fields.next().unwrap().to_string(), https_only: fields.next().unwrap() == "TRUE", expires: fields.next().unwrap().parse::().unwrap(), name: fields.next().unwrap().to_string(), value: fields.next().unwrap().to_string(), }); } } Ok(cookies) } ================================================ FILE: src/core.rs ================================================ use std::env; use std::error::Error; use std::fmt; use std::fs; use std::io::{self, Write}; use std::path::Path; use chrono::{SecondsFormat, Utc}; use encoding_rs::Encoding; use markup5ever_rcdom::RcDom; use url::Url; use crate::html::{ add_favicon, create_metadata_tag, get_base_url, get_charset, get_robots, get_title, has_favicon, html_to_dom, serialize_document, set_base_url, set_charset, set_robots, walk, }; use crate::session::Session; use crate::url::{create_data_url, resolve_url}; #[derive(Debug)] pub struct MonolithError { details: String, } impl MonolithError { fn new(msg: &str) -> MonolithError { MonolithError { details: msg.to_string(), } } } impl fmt::Display for MonolithError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.details) } } impl Error for MonolithError { fn description(&self) -> &str { &self.details } } #[derive(Clone, Debug, PartialEq, Eq, Default)] pub enum MonolithOutputFormat { #[default] HTML, MHTML, // WARC, // ZIM, // HAR, } #[derive(Default)] pub struct MonolithOptions { pub base_url: Option, pub blacklist_domains: bool, pub domains: Option>, pub encoding: Option, pub ignore_errors: bool, pub insecure: bool, pub isolate: bool, pub no_audio: bool, pub no_css: bool, pub no_fonts: bool, pub no_frames: bool, pub no_images: bool, pub no_js: bool, pub no_metadata: bool, pub no_video: bool, pub output_format: MonolithOutputFormat, pub silent: bool, pub timeout: u64, pub unwrap_noscript: bool, pub user_agent: Option, } const ANSI_COLOR_RED: &str = "\x1b[31m"; const ANSI_COLOR_RESET: &str = "\x1b[0m"; const FILE_SIGNATURES: [[&[u8]; 2]; 18] = [ // Image [b"GIF87a", b"image/gif"], [b"GIF89a", b"image/gif"], [b"\xFF\xD8\xFF", b"image/jpeg"], [b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"], [b", input_encoding: Option, input_target: Option, ) -> Result<(Vec, Option), MonolithError> { // Validate options { // Check if custom encoding value is acceptable if let Some(custom_output_encoding) = session.options.encoding.clone() { if Encoding::for_label_no_replacement(custom_output_encoding.as_bytes()).is_none() { return Err(MonolithError::new(&format!( "unknown encoding \"{}\"", &custom_output_encoding ))); } } } let mut base_url: Url = if input_target.is_some() { Url::parse(&input_target.clone().unwrap()).unwrap() } else { Url::parse("data:text/html,").unwrap() }; let mut document_encoding: String = input_encoding.clone().unwrap_or("utf-8".to_string()); let mut dom: RcDom; // Initial parse dom = html_to_dom(&input_data, document_encoding.clone()); // Attempt to determine document's encoding if let Some(html_charset) = get_charset(&dom.document) { if !html_charset.is_empty() { // Check if the charset specified inside HTML is valid if let Some(document_charset) = Encoding::for_label_no_replacement(html_charset.as_bytes()) { document_encoding = html_charset; dom = html_to_dom(&input_data, document_charset.name().to_string()); } } } // Use custom base URL if specified; read and use what's in the DOM otherwise let custom_base_url: String = session.options.base_url.clone().unwrap_or_default(); if custom_base_url.is_empty() { // No custom base URL is specified; try to see if document has BASE element if let Some(existing_base_url) = get_base_url(&dom.document) { base_url = resolve_url(&base_url, &existing_base_url); } } else { // Custom base URL provided match Url::parse(&custom_base_url) { Ok(parsed_url) => { if parsed_url.scheme() == "file" { // File base URLs can only work with documents saved from filesystem if base_url.scheme() == "file" { base_url = parsed_url; } } else { base_url = parsed_url; } } Err(_) => { // Failed to parse given base URL, perhaps it's a filesystem path? if base_url.scheme() == "file" { // Relative paths could work for documents saved from filesystem let path: &Path = Path::new(&custom_base_url); if path.exists() { match Url::from_file_path(fs::canonicalize(path).unwrap()) { Ok(file_url) => { base_url = file_url; } Err(_) => { return Err(MonolithError::new(&format!( "could not map given path to base URL \"{}\"", custom_base_url ))); } } } } } } } // Traverse through the document and embed remote assets walk(&mut session, &base_url, &dom.document); // Update or add new BASE element to reroute network requests and hash-links if let Some(new_base_url) = session.options.base_url.clone() { dom = set_base_url(&dom.document, new_base_url); } // Request and embed /favicon.ico (unless it's already linked in the document) if !session.options.no_images && (base_url.scheme() == "http" || base_url.scheme() == "https") && (input_target.is_some() && (input_target.as_ref().unwrap().starts_with("http:") || input_target.as_ref().unwrap().starts_with("https:"))) && !has_favicon(&dom.document) { let favicon_ico_url: Url = resolve_url(&base_url, "/favicon.ico"); match session.retrieve_asset(/*&target_url, */ &base_url, &favicon_ico_url) { Ok((data, final_url, media_type, charset)) => { let favicon_data_url: Url = create_data_url(&media_type, &charset, &data, &final_url); dom = add_favicon(&dom.document, favicon_data_url.to_string()); } Err(_) => { // Failed to retrieve /favicon.ico } } } // Append noindex META-tag let meta_robots_content_value = get_robots(&dom.document).unwrap_or_default(); if meta_robots_content_value.trim().is_empty() || meta_robots_content_value != "none" { dom = set_robots(dom, "none"); } // Save using specified charset, if given if let Some(custom_encoding) = session.options.encoding.clone() { document_encoding = custom_encoding; dom = set_charset(dom, document_encoding.clone()); } let document_title: Option = get_title(&dom.document); if session.options.output_format == MonolithOutputFormat::HTML { // Serialize DOM tree let mut result: Vec = serialize_document(dom, document_encoding, &session.options); // Prepend metadata comment tag if !session.options.no_metadata && !input_target.clone().unwrap_or_default().is_empty() { let mut metadata_comment: String = create_metadata_tag(&Url::parse(&input_target.unwrap_or_default()).unwrap()); // let mut metadata_comment: String = create_metadata_tag(target); metadata_comment += "\n"; result.splice(0..0, metadata_comment.as_bytes().to_vec()); } // Ensure newline at end of result if result.last() != Some(&b"\n"[0]) { result.extend_from_slice(b"\n"); } Ok((result, document_title)) } else if session.options.output_format == MonolithOutputFormat::MHTML { // Serialize DOM tree let mut result: Vec = serialize_document(dom, document_encoding, &session.options); // Prepend metadata comment tag if !session.options.no_metadata && !input_target.clone().unwrap_or_default().is_empty() { let mut metadata_comment: String = create_metadata_tag(&Url::parse(&input_target.unwrap_or_default()).unwrap()); // let mut metadata_comment: String = create_metadata_tag(target); metadata_comment += "\n"; result.splice(0..0, metadata_comment.as_bytes().to_vec()); } // Extremely hacky way to convert output to MIME let mime = "MIME-Version: 1.0\r\n\ Content-Type: multipart/related; boundary=\"----=_NextPart_000_0000\"\r\n\ \r\n\ ------=_NextPart_000_0000\r\n\ Content-Type: text/html; charset=\"utf-8\"\r\n\ Content-Location: http://example.com/\r\n\ \r\n"; result.splice(0..0, mime.as_bytes().to_vec()); let mime = "\r\n------=_NextPart_000_0000--\r\n"; result.extend_from_slice(mime.as_bytes()); Ok((result, document_title)) } else { Ok((vec![], document_title)) } } pub fn create_monolithic_document( mut session: Session, target: String, ) -> Result<(Vec, Option), MonolithError> { // Check if target was provided if target.is_empty() { return Err(MonolithError::new("no target specified")); } // Validate options { // Check if custom encoding value is acceptable if let Some(custom_encoding) = session.options.encoding.clone() { if Encoding::for_label_no_replacement(custom_encoding.as_bytes()).is_none() { return Err(MonolithError::new(&format!( "unknown encoding \"{}\"", &custom_encoding ))); } } } let mut target_url = match target.as_str() { target_str => match Url::parse(target_str) { Ok(target_url) => match target_url.scheme() { "data" | "file" | "http" | "https" => target_url, unsupported_scheme => { return Err(MonolithError::new(&format!( "unsupported target URL scheme \"{}\"", unsupported_scheme ))); } }, Err(_) => { // Failed to parse given base URL (perhaps it's a filesystem path?) let path: &Path = Path::new(&target_str); match path.exists() { true => match path.is_file() { true => { let canonical_path = fs::canonicalize(path).unwrap(); match Url::from_file_path(canonical_path) { Ok(url) => url, Err(_) => { return Err(MonolithError::new(&format!( "could not generate file URL out of given path \"{}\"", &target_str ))); } } } false => { return Err(MonolithError::new(&format!( "local target \"{}\" is not a file", &target_str ))); } }, false => { // It is not a FS path, now we do what browsers do: // prepend "http://" and hope it points to a website Url::parse(&format!("http://{}", &target_str)).unwrap() } } } }, }; let data: Vec; let document_encoding: Option; // Retrieve target document if target_url.scheme() == "file" || target_url.scheme() == "http" || target_url.scheme() == "https" || target_url.scheme() == "data" { match session.retrieve_asset(&target_url, &target_url) { Ok((retrieved_data, final_url, media_type, charset)) => { if !media_type.eq_ignore_ascii_case("text/html") && !media_type.eq_ignore_ascii_case("application/xhtml+xml") { // Provide output as text (without processing it, the way browsers do) return Ok((retrieved_data, None)); } // If got redirected, set target_url to that if final_url != target_url { target_url = final_url.clone(); } data = retrieved_data; document_encoding = Some(charset); } Err(_) => { return Err(MonolithError::new("could not retrieve target document")); } } } else { return Err(MonolithError::new("unsupported target")); } create_monolithic_document_from_data( session, data, document_encoding, Some(target_url.to_string()), ) } pub fn detect_media_type(data: &[u8], url: &Url) -> String { // At first attempt to read file's header for file_signature in FILE_SIGNATURES.iter() { if data.starts_with(file_signature[0]) { return String::from_utf8(file_signature[1].to_vec()).unwrap(); } } // If header didn't match any known magic signatures, // try to guess media type from file name let parts: Vec<&str> = url.path().split('/').collect(); detect_media_type_by_file_name(parts.last().unwrap()) } pub fn detect_media_type_by_file_name(filename: &str) -> String { let filename_lowercased: &str = &filename.to_lowercase(); let parts: Vec<&str> = filename_lowercased.split('.').collect(); let mime: &str = match parts.last() { Some(v) => match *v { "avi" => "video/avi", "bmp" => "image/bmp", "css" => "text/css", "flac" => "audio/flac", "gif" => "image/gif", "htm" | "html" => "text/html", "ico" => "image/x-icon", "jpeg" | "jpg" => "image/jpeg", "js" => "text/javascript", "json" => "application/json", "jsonld" => "application/ld+json", "mp3" => "audio/mpeg", "mp4" | "m4v" => "video/mp4", "ogg" => "audio/ogg", "ogv" => "video/ogg", "pdf" => "application/pdf", "png" => "image/png", "svg" => "image/svg+xml", "swf" => "application/x-shockwave-flash", "tif" | "tiff" => "image/tiff", "txt" => "text/plain", "wav" => "audio/wav", "webp" => "image/webp", "woff" => "font/woff", "woff2" => "font/woff2", "xhtml" => "application/xhtml+xml", "xml" => "text/xml", &_ => "", }, None => "", }; mime.to_string() } pub fn format_output_path( path: &str, document_title: &str, output_format: MonolithOutputFormat, ) -> String { let datetime: &str = &Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true); path.replace("%timestamp%", &datetime.replace(':', "_")) .replace( "%title%", document_title .to_string() .replace(['/', '\\'], "_") .replace('<', "[") .replace('>', "]") .replace(':', " - ") .replace('\"', "") .replace('|', "-") .replace('?', "") .trim_start_matches('.'), ) .replace( "%ext%", if output_format == MonolithOutputFormat::HTML { "htm" } else if output_format == MonolithOutputFormat::MHTML { "mht" } else { "" }, ) .replace( "%extension%", if output_format == MonolithOutputFormat::HTML { "html" } else if output_format == MonolithOutputFormat::MHTML { "mhtml" } else { "" }, ) .to_string() } pub fn is_plaintext_media_type(media_type: &str) -> bool { media_type.to_lowercase().as_str().starts_with("text/") || PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str()) } pub fn parse_content_type(content_type: &str) -> (String, String, bool) { let mut media_type: String = "text/plain".to_string(); let mut charset: String = "US-ASCII".to_string(); let mut is_base64: bool = false; // Parse meta data let content_type_items: Vec<&str> = content_type.split(';').collect(); let mut i: i8 = 0; for item in &content_type_items { if i == 0 { if !item.trim().is_empty() { media_type = item.trim().to_string(); } } else if item.trim().eq_ignore_ascii_case("base64") { is_base64 = true; } else if item.trim().starts_with("charset=") { charset = item.trim().chars().skip(8).collect(); } i += 1; } (media_type, charset, is_base64) } pub fn print_error_message(text: &str) { let stderr = io::stderr(); let mut handle = stderr.lock(); const ENV_VAR_NO_COLOR: &str = "NO_COLOR"; const ENV_VAR_TERM: &str = "TERM"; let mut no_color = env::var_os(ENV_VAR_NO_COLOR).is_some() || atty::isnt(atty::Stream::Stderr); if let Some(term) = env::var_os(ENV_VAR_TERM) { if term == "dumb" { no_color = true; } } if handle .write_all( format!( "{}{}{}\n", if no_color { "" } else { ANSI_COLOR_RED }, &text, if no_color { "" } else { ANSI_COLOR_RESET }, ) .as_bytes(), ) .is_ok() {} } pub fn print_info_message(text: &str) { let stderr = io::stderr(); let mut handle = stderr.lock(); if handle.write_all(format!("{}\n", &text).as_bytes()).is_ok() {} } ================================================ FILE: src/css.rs ================================================ use cssparser::{ serialize_identifier, serialize_string, ParseError, Parser, ParserInput, SourcePosition, Token, }; use crate::session::Session; use crate::url::{create_data_url, resolve_url, Url, EMPTY_IMAGE_DATA_URL}; const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[ // Universal "background", "background-image", "border-image", "border-image-source", "content", "cursor", "list-style", "list-style-image", "mask", "mask-image", // Specific to @counter-style "additive-symbols", "negative", "pad", "prefix", "suffix", "symbols", ]; pub fn embed_css(session: &mut Session, document_url: &Url, css: &str) -> String { let mut input = ParserInput::new(css); let mut parser = Parser::new(&mut input); process_css(session, document_url, &mut parser, "", "", "").unwrap() } pub fn format_ident(ident: &str) -> String { let mut res: String = "".to_string(); let _ = serialize_identifier(ident, &mut res); res = res.trim_end().to_string(); res } pub fn format_quoted_string(string: &str) -> String { let mut res: String = "".to_string(); let _ = serialize_string(string, &mut res); res } pub fn is_image_url_prop(prop_name: &str) -> bool { CSS_PROPS_WITH_IMAGE_URLS .iter() .any(|p| prop_name.eq_ignore_ascii_case(p)) } pub fn process_css<'a>( session: &mut Session, document_url: &Url, parser: &mut Parser, rule_name: &str, prop_name: &str, func_name: &str, ) -> Result> { let mut result: String = "".to_string(); let mut curr_rule: String = rule_name.to_string(); let mut curr_prop: String = prop_name.to_string(); let mut token: &Token; let mut token_offset: SourcePosition; loop { token_offset = parser.position(); token = match parser.next_including_whitespace_and_comments() { Ok(token) => token, Err(_) => { break; } }; match *token { Token::Comment(_) => { let token_slice = parser.slice_from(token_offset); result.push_str(token_slice); } Token::Semicolon => result.push(';'), Token::Colon => result.push(':'), Token::Comma => result.push(','), Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => { if session.options.no_fonts && curr_rule == "font-face" { continue; } let closure: &str; if token == &Token::ParenthesisBlock { result.push('('); closure = ")"; } else if token == &Token::SquareBracketBlock { result.push('['); closure = "]"; } else { result.push('{'); closure = "}"; } let block_css: String = parser .parse_nested_block(|parser| { process_css( session, document_url, parser, rule_name, curr_prop.as_str(), func_name, ) }) .unwrap(); result.push_str(block_css.as_str()); result.push_str(closure); } Token::CloseParenthesis => result.push(')'), Token::CloseSquareBracket => result.push(']'), Token::CloseCurlyBracket => result.push('}'), Token::IncludeMatch => result.push_str("~="), Token::DashMatch => result.push_str("|="), Token::PrefixMatch => result.push_str("^="), Token::SuffixMatch => result.push_str("$="), Token::SubstringMatch => result.push_str("*="), Token::CDO => result.push_str(""), Token::WhiteSpace(value) => { result.push_str(value); } // div... Token::Ident(ref value) => { curr_rule = "".to_string(); curr_prop = value.to_string(); result.push_str(&format_ident(value)); } // @import, @font-face, @charset, @media... Token::AtKeyword(ref value) => { curr_rule = value.to_string(); if session.options.no_fonts && curr_rule == "font-face" { continue; } result.push('@'); result.push_str(value); } Token::Hash(ref value) => { result.push('#'); result.push_str(value); } Token::QuotedString(ref value) => { if curr_rule == "import" { // Reset current at-rule value curr_rule = "".to_string(); // Skip empty import values if value.len() == 0 { result.push_str("''"); continue; } let import_full_url: Url = resolve_url(document_url, value); match session.retrieve_asset(document_url, &import_full_url) { Ok(( import_contents, import_final_url, import_media_type, import_charset, )) => { let mut import_data_url = create_data_url( &import_media_type, &import_charset, embed_css( session, &import_final_url, &String::from_utf8_lossy(&import_contents), ) .as_bytes(), &import_final_url, ); import_data_url.set_fragment(import_full_url.fragment()); result .push_str(format_quoted_string(import_data_url.as_ref()).as_str()); } Err(_) => { // Keep remote reference if unable to retrieve the asset if import_full_url.scheme() == "http" || import_full_url.scheme() == "https" { result.push_str( format_quoted_string(import_full_url.as_ref()).as_str(), ); } } } } else if func_name == "url" { // Skip empty url()'s if value.len() == 0 { continue; } if session.options.no_images && is_image_url_prop(curr_prop.as_str()) { result.push_str(format_quoted_string(EMPTY_IMAGE_DATA_URL).as_str()); } else { let resolved_url: Url = resolve_url(document_url, value); match session.retrieve_asset(document_url, &resolved_url) { Ok((data, final_url, media_type, charset)) => { // TODO: if it's @font-face, exclude definitions of non-woff/woff-2 fonts (if woff/woff-2 are present) let mut data_url = create_data_url(&media_type, &charset, &data, &final_url); data_url.set_fragment(resolved_url.fragment()); result.push_str(format_quoted_string(data_url.as_ref()).as_str()); } Err(_) => { // Keep remote reference if unable to retrieve the asset if resolved_url.scheme() == "http" || resolved_url.scheme() == "https" { result.push_str( format_quoted_string(resolved_url.as_ref()).as_str(), ); } } } } } else { result.push_str(format_quoted_string(value).as_str()); } } Token::Number { ref has_sign, ref value, .. } => { if *has_sign && *value >= 0. { result.push('+'); } result.push_str(&value.to_string()) } Token::Percentage { ref has_sign, ref unit_value, .. } => { if *has_sign && *unit_value >= 0. { result.push('+'); } result.push_str(&(unit_value * 100.0).to_string()); result.push('%'); } Token::Dimension { ref has_sign, ref value, ref unit, .. } => { if *has_sign && *value >= 0. { result.push('+'); } result.push_str(&value.to_string()); result.push_str(unit.as_ref()); } // #selector, #id... Token::IDHash(ref value) => { curr_rule = "".to_string(); result.push('#'); result.push_str(&format_ident(value)); } // url() Token::UnquotedUrl(ref value) => { let is_import: bool = curr_rule == "import"; if is_import { // Reset current at-rule value curr_rule = "".to_string(); } // Skip empty url()'s if value.len() < 1 { result.push_str("url()"); continue; } else if value.starts_with("#") { result.push_str("url("); result.push_str(value); result.push(')'); continue; } result.push_str("url("); if is_import { let full_url: Url = resolve_url(document_url, value); match session.retrieve_asset(document_url, &full_url) { Ok((css, final_url, media_type, charset)) => { let mut data_url = create_data_url( &media_type, &charset, embed_css(session, &final_url, &String::from_utf8_lossy(&css)) .as_bytes(), &final_url, ); data_url.set_fragment(full_url.fragment()); result.push_str(format_quoted_string(data_url.as_ref()).as_str()); } Err(_) => { // Keep remote reference if unable to retrieve the asset if full_url.scheme() == "http" || full_url.scheme() == "https" { result.push_str(format_quoted_string(full_url.as_ref()).as_str()); } } } } else if is_image_url_prop(curr_prop.as_str()) && session.options.no_images { result.push_str(format_quoted_string(EMPTY_IMAGE_DATA_URL).as_str()); } else { let full_url: Url = resolve_url(document_url, value); match session.retrieve_asset(document_url, &full_url) { Ok((data, final_url, media_type, charset)) => { let mut data_url = create_data_url(&media_type, &charset, &data, &final_url); data_url.set_fragment(full_url.fragment()); result.push_str(format_quoted_string(data_url.as_ref()).as_str()); } Err(_) => { // Keep remote reference if unable to retrieve the asset if full_url.scheme() == "http" || full_url.scheme() == "https" { result.push_str(format_quoted_string(full_url.as_ref()).as_str()); } } } } result.push(')'); } // = Token::Delim(ref value) => result.push(*value), Token::Function(ref name) => { let function_name: &str = &name.clone(); result.push_str(function_name); result.push('('); let block_css: String = parser .parse_nested_block(|parser| { process_css( session, document_url, parser, curr_rule.as_str(), curr_prop.as_str(), function_name, ) }) .unwrap(); result.push_str(block_css.as_str()); result.push(')'); } Token::BadUrl(_) | Token::BadString(_) => {} } } // Ensure empty CSS is really empty if !result.is_empty() && result.trim().is_empty() { result = result.trim().to_string() } Ok(result) } ================================================ FILE: src/gui.rs ================================================ use std::fs; use std::io::Write; use std::path; use std::thread; use directories::UserDirs; use druid::widget::{Button, Checkbox, Either, Flex, Label, Spinner, TextBox}; use druid::{ commands, AppDelegate, AppLauncher, Command, Data, DelegateCtx, Env, FileDialogOptions, FileSpec, Handled, Lens, LocalizedString, PlatformError, Target, Widget, WidgetExt, WindowDesc, }; use tempfile::{Builder, NamedTempFile}; use monolith::cache::Cache; use monolith::core::{ create_monolithic_document, format_output_path, MonolithError, MonolithOptions, MonolithOutputFormat, }; use monolith::session::Session; const CACHE_ASSET_FILE_SIZE_THRESHOLD: usize = 1024 * 20; // Minimum file size for on-disk caching (in bytes) const FILESPEC_HTML: FileSpec = FileSpec::new("HTML files", &["html"]); const MONOLITH_GUI_WRITE_OUTPUT: druid::Selector<(Vec, Option)> = druid::Selector::new("monolith-gui.write-output"); const MONOLITH_GUI_ERROR: druid::Selector = druid::Selector::new("monolith-gui.error"); const TEXT_BOX_WIDTH: f64 = 512_f64; struct Delegate; #[derive(Clone, Data, Lens)] struct AppState { target: String, keep_fonts: bool, keep_frames: bool, keep_images: bool, keep_scripts: bool, keep_styles: bool, output_path: String, isolate: bool, unwrap_noscript: bool, busy: bool, } fn main() -> Result<(), PlatformError> { let mut program_name: String = env!("CARGO_PKG_NAME").to_string(); if let Some(l) = program_name.get_mut(0..1) { l.make_ascii_uppercase(); } let main_window = WindowDesc::new(ui_builder()) .title(program_name) .with_min_size((720_f64, 360_f64)); let state = AppState { target: "".to_string(), keep_fonts: false, keep_frames: true, keep_images: true, keep_scripts: true, keep_styles: true, output_path: if let Some(base_dirs) = UserDirs::new() { base_dirs.download_dir().unwrap().display().to_string() + &path::MAIN_SEPARATOR.to_string() + "%title%.%ext%" } else { "%title%.%ext%".to_string() }, isolate: true, unwrap_noscript: false, busy: false, }; AppLauncher::with_window(main_window) .delegate(Delegate) .launch(state) } fn ui_builder() -> impl Widget { let target_label: Label = Label::new("Target:"); let target_input = TextBox::new() .with_placeholder("URL or filesystem path") .fix_width(TEXT_BOX_WIDTH) .lens(AppState::target) .disabled_if(|state: &AppState, _env| state.busy); let target_button = Button::new(LocalizedString::new("Open file")) .on_click(|ctx, _, _| { ctx.submit_command( commands::SHOW_OPEN_PANEL.with( FileDialogOptions::new() .allowed_types(vec![FILESPEC_HTML]) .default_type(FILESPEC_HTML), ), ) }) .disabled_if(|state: &AppState, _env| state.busy) .padding(5.0); let output_path_label: Label = Label::new("Output path:"); let output_path_input = TextBox::new() .with_placeholder("Filesystem path") .fix_width(TEXT_BOX_WIDTH) .lens(AppState::output_path) .disabled_if(|state: &AppState, _env| state.busy); let output_path_button = Button::new(LocalizedString::new("Browse")) .on_click(|ctx, state: &mut AppState, _env| { ctx.submit_command( commands::SHOW_SAVE_PANEL.with( FileDialogOptions::new() // .force_starting_directory( // state // .output_path.clone() // .split(path::MAIN_SEPARATOR).collect::>()[..2] // .join(&path::MAIN_SEPARATOR.to_string()) // ) .default_name( state .output_path .clone() .split(path::MAIN_SEPARATOR) .last() .unwrap_or_default(), ), ), ) }) .disabled_if(|state: &AppState, _env| state.busy) .padding(5.0); let fonts_checkbox = Checkbox::new("Include fonts") .lens(AppState::keep_fonts) .disabled_if(|state: &AppState, _env| state.busy) .padding(5.0); let frames_checkbox = Checkbox::new("Include frames") .lens(AppState::keep_frames) .disabled_if(|state: &AppState, _env| state.busy) .padding(5.0); let images_checkbox = Checkbox::new("Include images") .lens(AppState::keep_images) .disabled_if(|state: &AppState, _env| state.busy) .padding(5.0); let styles_checkbox = Checkbox::new("Include styles") .lens(AppState::keep_styles) .disabled_if(|state: &AppState, _env| state.busy) .padding(5.0); let scripts_checkbox = Checkbox::new("Include scripts") .lens(AppState::keep_scripts) .disabled_if(|state: &AppState, _env| state.busy) .padding(5.0); let isolate_checkbox = Checkbox::new("Isolate document") .lens(AppState::isolate) .disabled_if(|state: &AppState, _env| state.busy) .padding(5.0); let unwrap_noscript_checkbox = Checkbox::new("Unwrap NOSCRIPT") .lens(AppState::unwrap_noscript) .disabled_if(|state: &AppState, _env| state.busy) .padding(5.0); let start_stop_button = Button::new(LocalizedString::new("Start")) .on_click(|ctx, state: &mut AppState, _env| { if state.busy { return; } let mut options: MonolithOptions = MonolithOptions::default(); options.ignore_errors = true; options.insecure = true; options.silent = true; options.no_frames = !state.keep_frames; options.no_fonts = !state.keep_fonts; options.no_images = !state.keep_images; options.no_css = !state.keep_styles; options.no_js = !state.keep_scripts; options.isolate = state.isolate; options.unwrap_noscript = state.unwrap_noscript; let handle = ctx.get_external_handle(); let thread_state = state.clone(); state.busy = true; // Set up cache (attempt to create temporary file) let temp_cache_file: Option = match Builder::new().prefix("monolith-").tempfile() { Ok(tempfile) => Some(tempfile), Err(_) => None, }; let cache = Some(Cache::new( CACHE_ASSET_FILE_SIZE_THRESHOLD, if temp_cache_file.is_some() { Some( temp_cache_file .as_ref() .unwrap() .path() .display() .to_string(), ) } else { None }, )); let session: Session = Session::new(cache, None, options); thread::spawn( move || match create_monolithic_document(session, thread_state.target) { Ok(result) => { handle .submit_command(MONOLITH_GUI_WRITE_OUTPUT, result, Target::Auto) .unwrap(); // TODO: make it work again //cache.unwrap().destroy_database_file(); } Err(error) => { handle .submit_command(MONOLITH_GUI_ERROR, error, Target::Auto) .unwrap(); // TODO: make it work again //cache.unwrap().destroy_database_file(); } }, ); }) .disabled_if(|state: &AppState, _env| { state.busy || state.target.is_empty() || state.output_path.is_empty() }) .padding(5.0); let spinner = Either::new( |sate: &AppState, _env| sate.busy, Spinner::new(), Label::new(""), ) .padding(5.0); Flex::column() .with_spacer(5_f64) .with_child( Flex::row() .with_child(target_label) .with_spacer(5_f64) .with_child(target_input) .with_child(target_button), ) .with_child(fonts_checkbox) .with_child(frames_checkbox) .with_child(images_checkbox) .with_child(scripts_checkbox) .with_child(styles_checkbox) .with_child( Flex::row() .with_child(output_path_label) .with_spacer(5_f64) .with_child(output_path_input) .with_child(output_path_button), ) .with_child( Flex::row() .with_child(isolate_checkbox) .with_child(unwrap_noscript_checkbox), ) .with_child(start_stop_button) .with_child(spinner) .with_spacer(5_f64) } impl AppDelegate for Delegate { fn command( &mut self, _ctx: &mut DelegateCtx, _target: Target, cmd: &Command, state: &mut AppState, _env: &Env, ) -> Handled { // Handle "Open file" button next to target input if let Some(file_info) = cmd.get(commands::OPEN_FILE) { state.target = file_info.path().display().to_string(); return Handled::Yes; } // Handle "Browse" button next to output path input else if let Some(file_info) = cmd.get(commands::SAVE_FILE_AS) { state.output_path = file_info.path().display().to_string(); return Handled::Yes; } // Write output else if let Some(result) = cmd.get(MONOLITH_GUI_WRITE_OUTPUT) { let (html, title) = result; if !state.output_path.is_empty() { match fs::File::create(format_output_path( &state.output_path, &title.clone().unwrap_or_default(), MonolithOutputFormat::HTML, )) { Ok(mut file) => { let _ = file.write(&html); } Err(_) => { eprintln!("Error: could not write output"); } } } else { eprintln!("Error: no output specified"); } state.busy = false; return Handled::Yes; } // Handle errors else if let Some(_error) = cmd.get(MONOLITH_GUI_ERROR) { state.busy = false; return Handled::Yes; } Handled::No } } ================================================ FILE: src/html.rs ================================================ use base64::{prelude::BASE64_STANDARD, Engine}; use chrono::{SecondsFormat, Utc}; use encoding_rs::Encoding; use html5ever::interface::{Attribute, QualName}; use html5ever::parse_document; use html5ever::serialize::{serialize, SerializeOpts}; use html5ever::tendril::{format_tendril, TendrilSink}; use html5ever::tree_builder::{create_element, TreeSink}; use html5ever::{namespace_url, ns, LocalName}; use markup5ever_rcdom::{Handle, NodeData, RcDom, SerializableHandle}; use regex::Regex; use sha2::{Digest, Sha256, Sha384, Sha512}; use std::default::Default; use crate::core::{parse_content_type, MonolithOptions}; use crate::css::embed_css; use crate::js::attr_is_event_handler; use crate::session::Session; use crate::url::{ clean_url, create_data_url, is_url_and_has_protocol, resolve_url, Url, EMPTY_IMAGE_DATA_URL, }; const FAVICON_VALUES: &[&str] = &["icon", "shortcut icon"]; const WHITESPACES: &[char] = &[' ', '\t', '\n', '\x0c', '\r']; // ASCII whitespaces #[derive(PartialEq, Eq)] pub enum LinkType { Alternate, AppleTouchIcon, DnsPrefetch, Favicon, Preload, Stylesheet, } pub struct SrcSetItem<'a> { pub path: &'a str, pub descriptor: &'a str, // Width or pixel density descriptor } pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom { let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(document.clone()), SerializeOpts::default(), ) .expect("unable to serialize DOM into buffer"); let dom = html_to_dom(&buf, "utf-8".to_string()); for head in find_nodes(&dom.document, vec!["html", "head"]).iter() { let favicon_node = create_element( &dom, QualName::new(None, ns!(), LocalName::from("link")), vec![ Attribute { name: QualName::new(None, ns!(), LocalName::from("rel")), value: format_tendril!("icon"), }, Attribute { name: QualName::new(None, ns!(), LocalName::from("href")), value: format_tendril!("{}", favicon_data_url), }, ], ); // Insert favicon LINK tag into HEAD head.children.borrow_mut().push(favicon_node.clone()); } dom } pub fn check_integrity(data: &[u8], integrity: &str) -> bool { if integrity.starts_with("sha256-") { let mut hasher = Sha256::new(); hasher.update(data); BASE64_STANDARD.encode(hasher.finalize()) == integrity[7..] } else if integrity.starts_with("sha384-") { let mut hasher = Sha384::new(); hasher.update(data); BASE64_STANDARD.encode(hasher.finalize()) == integrity[7..] } else if integrity.starts_with("sha512-") { let mut hasher = Sha512::new(); hasher.update(data); BASE64_STANDARD.encode(hasher.finalize()) == integrity[7..] } else { false } } pub fn compose_csp(options: &MonolithOptions) -> String { let mut string_list = vec![]; if options.isolate { string_list.push("default-src 'unsafe-eval' 'unsafe-inline' data:;"); } if options.no_css { string_list.push("style-src 'none';"); } if options.no_fonts { string_list.push("font-src 'none';"); } if options.no_frames { string_list.push("frame-src 'none';"); string_list.push("child-src 'none';"); } if options.no_js { string_list.push("script-src 'none';"); } if options.no_images { // Note: "data:" is required for transparent pixel images to work string_list.push("img-src data:;"); } string_list.join(" ") } pub fn create_metadata_tag(url: &Url) -> String { let datetime: &str = &Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true); let mut clean_url: Url = clean_url(url.clone()); // Prevent credentials from getting into metadata if clean_url.scheme() == "http" || clean_url.scheme() == "https" { // Only HTTP(S) URLs can contain credentials clean_url.set_username("").unwrap(); clean_url.set_password(None).unwrap(); } format!( "", if clean_url.scheme() == "http" || clean_url.scheme() == "https" { clean_url.as_str() } else { "local source" }, datetime, env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"), ) } pub fn embed_srcset(session: &mut Session, document_url: &Url, srcset: &str) -> String { let srcset_items: Vec = parse_srcset(srcset); // Embed assets let mut result: String = "".to_string(); let mut i: usize = srcset_items.len(); for srcset_item in srcset_items { if session.options.no_images { result.push_str(EMPTY_IMAGE_DATA_URL); } else { let image_full_url: Url = resolve_url(document_url, srcset_item.path); match session.retrieve_asset(document_url, &image_full_url) { Ok((image_data, image_final_url, image_media_type, image_charset)) => { let mut image_data_url = create_data_url( &image_media_type, &image_charset, &image_data, &image_final_url, ); // Append retrieved asset as a data URL image_data_url.set_fragment(image_full_url.fragment()); result.push_str(image_data_url.as_ref()); } Err(_) => { // Keep remote reference if unable to retrieve the asset if image_full_url.scheme() == "http" || image_full_url.scheme() == "https" { result.push_str(image_full_url.as_ref()); } else { // Avoid breaking the structure in case if not an HTTP(S) URL result.push_str(EMPTY_IMAGE_DATA_URL); } } } } if !srcset_item.descriptor.is_empty() { result.push(' '); result.push_str(srcset_item.descriptor); } if i > 1 { result.push_str(", "); } i -= 1; } result } pub fn find_nodes(node: &Handle, mut path: Vec<&str>) -> Vec { let mut result = vec![]; while !path.is_empty() { match node.data { NodeData::Document | NodeData::Element { .. } => { // Dig deeper for child in node.children.borrow().iter() { if get_node_name(child) .unwrap_or_default() .eq_ignore_ascii_case(path[0]) { if path.len() == 1 { result.push(child.clone()); } else { result.append(&mut find_nodes(child, path[1..].to_vec())); } } } } _ => {} } path.remove(0); } result } pub fn get_base_url(handle: &Handle) -> Option { for base_node in find_nodes(handle, vec!["html", "head", "base"]).iter() { // Only the first base tag matters (we ignore the rest, if there's any) return get_node_attr(base_node, "href"); } None } pub fn get_charset(node: &Handle) -> Option { for meta_node in find_nodes(node, vec!["html", "head", "meta"]).iter() { if let Some(meta_charset_node_attr_value) = get_node_attr(meta_node, "charset") { // Processing return Some(meta_charset_node_attr_value); } if get_node_attr(meta_node, "http-equiv") .unwrap_or_default() .eq_ignore_ascii_case("content-type") { if let Some(meta_content_type_node_attr_value) = get_node_attr(meta_node, "content") { // Processing let (_media_type, charset, _is_base64) = parse_content_type(&meta_content_type_node_attr_value); return Some(charset); } } } None } // TODO: get rid of this function (replace with find_nodes) pub fn get_child_node_by_name(parent: &Handle, node_name: &str) -> Option { let children = parent.children.borrow(); let matching_children = children.iter().find(|child| match child.data { NodeData::Element { ref name, .. } => &*name.local == node_name, _ => false, }); matching_children.cloned() } pub fn get_node_attr(node: &Handle, attr_name: &str) -> Option { match &node.data { NodeData::Element { attrs, .. } => { for attr in attrs.borrow().iter() { if &*attr.name.local == attr_name { return Some(attr.value.to_string()); } } None } _ => None, } } pub fn get_node_name(node: &Handle) -> Option<&'_ str> { match &node.data { NodeData::Element { name, .. } => Some(name.local.as_ref()), _ => None, } } pub fn get_parent_node(child: &Handle) -> Handle { let parent = child.parent.take().clone(); parent.and_then(|node| node.upgrade()).unwrap() } pub fn get_robots(handle: &Handle) -> Option { for meta_node in find_nodes(handle, vec!["html", "head", "meta"]).iter() { // Only the first base tag matters (we ignore the rest, if there's any) if get_node_attr(meta_node, "name") .unwrap_or_default() .eq_ignore_ascii_case("robots") { return get_node_attr(meta_node, "content"); } } None } pub fn get_title(node: &Handle) -> Option { for title_node in find_nodes(node, vec!["html", "head", "title"]).iter() { for child_node in title_node.children.borrow().iter() { if let NodeData::Text { ref contents } = child_node.data { return Some(contents.borrow().to_string()); } } } None } pub fn has_favicon(handle: &Handle) -> bool { let mut found_favicon: bool = false; for link_node in find_nodes(handle, vec!["html", "head", "link"]).iter() { if let Some(attr_value) = get_node_attr(link_node, "rel") { if is_favicon(attr_value.trim()) { found_favicon = true; break; } } } found_favicon } pub fn html_to_dom(data: &Vec, document_encoding: String) -> RcDom { let s: String; if let Some(encoding) = Encoding::for_label(document_encoding.as_bytes()) { let (string, _, _) = encoding.decode(data); s = string.to_string(); } else { s = String::from_utf8_lossy(data).to_string(); } parse_document(RcDom::default(), Default::default()) .from_utf8() .read_from(&mut s.as_bytes()) .unwrap() } pub fn is_favicon(attr_value: &str) -> bool { FAVICON_VALUES.contains(&attr_value.to_lowercase().as_str()) } pub fn parse_link_type(link_attr_rel_value: &str) -> Vec { let mut types: Vec = vec![]; for link_attr_rel_type in link_attr_rel_value.split_whitespace() { if link_attr_rel_type.eq_ignore_ascii_case("alternate") { types.push(LinkType::Alternate); } else if link_attr_rel_type.eq_ignore_ascii_case("dns-prefetch") { types.push(LinkType::DnsPrefetch); } else if link_attr_rel_type.eq_ignore_ascii_case("preload") { types.push(LinkType::Preload); } else if link_attr_rel_type.eq_ignore_ascii_case("stylesheet") { types.push(LinkType::Stylesheet); } else if is_favicon(link_attr_rel_type) { types.push(LinkType::Favicon); } else if link_attr_rel_type.eq_ignore_ascii_case("apple-touch-icon") { types.push(LinkType::AppleTouchIcon); } } types } pub fn parse_srcset(srcset: &str) -> Vec { let mut srcset_items: Vec = vec![]; // Parse srcset let mut partials: Vec<&str> = srcset.split(WHITESPACES).collect(); let mut path: Option<&str> = None; let mut descriptor: Option<&str> = None; let mut i = 0; while i < partials.len() { let partial = partials[i]; i += 1; // Skip empty strings if partial.is_empty() { continue; } if partial.ends_with(',') { if path.is_none() { path = Some(partial.strip_suffix(',').unwrap()); descriptor = Some("") } else { descriptor = Some(partial.strip_suffix(',').unwrap()); } } else if path.is_none() { path = Some(partial); } else { let mut chunks: Vec<&str> = partial.split(',').collect(); if !chunks.is_empty() && chunks.first().unwrap().ends_with(['x', 'w']) { descriptor = Some(chunks.first().unwrap()); chunks.remove(0); } if !chunks.is_empty() { if descriptor.is_some() { partials.insert(0, &partial[descriptor.unwrap().len()..]); } else { partials.insert(0, partial); } } } if path.is_some() && descriptor.is_some() { srcset_items.push(SrcSetItem { path: path.unwrap(), descriptor: descriptor.unwrap(), }); path = None; descriptor = None; } } // Final attempt to process what was found if path.is_some() { srcset_items.push(SrcSetItem { path: path.unwrap(), descriptor: descriptor.unwrap_or_default(), }); } srcset_items } pub fn set_base_url(document: &Handle, base_href_value: String) -> RcDom { let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(document.clone()), SerializeOpts::default(), ) .expect("unable to serialize DOM into buffer"); let dom = html_to_dom(&buf, "utf-8".to_string()); if let Some(html_node) = get_child_node_by_name(&dom.document, "html") { if let Some(head_node) = get_child_node_by_name(&html_node, "head") { // Check if BASE node already exists in the DOM tree if let Some(base_node) = get_child_node_by_name(&head_node, "base") { set_node_attr(&base_node, "href", Some(base_href_value)); } else { let base_node = create_element( &dom, QualName::new(None, ns!(), LocalName::from("base")), vec![Attribute { name: QualName::new(None, ns!(), LocalName::from("href")), value: format_tendril!("{}", base_href_value), }], ); // Insert newly created BASE node into HEAD head_node.children.borrow_mut().push(base_node.clone()); } } } dom } pub fn set_charset(dom: RcDom, charset: String) -> RcDom { for meta_node in find_nodes(&dom.document, vec!["html", "head", "meta"]).iter() { if get_node_attr(meta_node, "charset").is_some() { set_node_attr(meta_node, "charset", Some(charset)); return dom; } if get_node_attr(meta_node, "http-equiv") .unwrap_or_default() .eq_ignore_ascii_case("content-type") && get_node_attr(meta_node, "content").is_some() { set_node_attr( meta_node, "content", Some(format!("text/html;charset={}", charset)), ); return dom; } } // Manually append charset META node to HEAD { let meta_charset_node: Handle = create_element( &dom, QualName::new(None, ns!(), LocalName::from("meta")), vec![Attribute { name: QualName::new(None, ns!(), LocalName::from("charset")), value: format_tendril!("{}", charset), }], ); // Insert newly created META charset node into HEAD for head_node in find_nodes(&dom.document, vec!["html", "head"]).iter() { head_node .children .borrow_mut() .push(meta_charset_node.clone()); break; } } dom } pub fn set_node_attr(node: &Handle, attr_name: &str, attr_value: Option) { if let NodeData::Element { attrs, .. } = &node.data { let attrs_mut = &mut attrs.borrow_mut(); let mut i = 0; let mut found_existing_attr: bool = false; while i < attrs_mut.len() { if &attrs_mut[i].name.local == attr_name { found_existing_attr = true; if let Some(attr_value) = attr_value.clone() { let _ = &attrs_mut[i].value.clear(); let _ = &attrs_mut[i].value.push_slice(attr_value.as_str()); } else { // Remove attr completely if attr_value is not defined attrs_mut.remove(i); continue; } } i += 1; } if !found_existing_attr { // Add new attribute (since originally the target node didn't have it) if let Some(attr_value) = attr_value.clone() { let name = LocalName::from(attr_name); attrs_mut.push(Attribute { name: QualName::new(None, ns!(), name), value: format_tendril!("{}", attr_value), }); } } }; } pub fn set_robots(dom: RcDom, content_value: &str) -> RcDom { for meta_node in find_nodes(&dom.document, vec!["html", "head", "meta"]).iter() { if get_node_attr(meta_node, "name") .unwrap_or_default() .eq_ignore_ascii_case("robots") { set_node_attr(meta_node, "content", Some(content_value.to_string())); return dom; } } // Manually append robots META node to HEAD { let meta_charset_node: Handle = create_element( &dom, QualName::new(None, ns!(), LocalName::from("meta")), vec![ Attribute { name: QualName::new(None, ns!(), LocalName::from("name")), value: format_tendril!("robots"), }, Attribute { name: QualName::new(None, ns!(), LocalName::from("content")), value: format_tendril!("{}", content_value), }, ], ); // Insert newly created META charset node into HEAD for head_node in find_nodes(&dom.document, vec!["html", "head"]).iter() { head_node .children .borrow_mut() .push(meta_charset_node.clone()); break; } } dom } pub fn serialize_document( dom: RcDom, document_encoding: String, options: &MonolithOptions, ) -> Vec { let mut buf: Vec = Vec::new(); if options.isolate || options.no_css || options.no_fonts || options.no_frames || options.no_js || options.no_images { // Take care of CSP if let Some(html) = get_child_node_by_name(&dom.document, "html") { if let Some(head) = get_child_node_by_name(&html, "head") { let meta = create_element( &dom, QualName::new(None, ns!(), LocalName::from("meta")), vec![ Attribute { name: QualName::new(None, ns!(), LocalName::from("http-equiv")), value: format_tendril!("Content-Security-Policy"), }, Attribute { name: QualName::new(None, ns!(), LocalName::from("content")), value: format_tendril!("{}", compose_csp(options)), }, ], ); // The CSP meta-tag has to be prepended, never appended, // since there already may be one defined in the original document, // and browsers don't allow re-defining them (for obvious reasons) head.children.borrow_mut().reverse(); head.children.borrow_mut().push(meta.clone()); head.children.borrow_mut().reverse(); } } } let serializable: SerializableHandle = dom.document.into(); serialize(&mut buf, &serializable, SerializeOpts::default()) .expect("Unable to serialize DOM into buffer"); // Unwrap NOSCRIPT elements if options.unwrap_noscript { let s: &str = &String::from_utf8_lossy(&buf); let noscript_re = Regex::new(r"<(?P/?noscript[^>]*)>").unwrap(); buf = noscript_re.replace_all(s, "").as_bytes().to_vec(); } if !document_encoding.is_empty() { if let Some(encoding) = Encoding::for_label(document_encoding.as_bytes()) { let s: &str = &String::from_utf8_lossy(&buf); let (data, _, _) = encoding.encode(s); buf = data.to_vec(); } } buf } pub fn retrieve_and_embed_asset( session: &mut Session, document_url: &Url, node: &Handle, attr_name: &str, attr_value: &str, ) { let resolved_url: Url = resolve_url(document_url, attr_value); match session.retrieve_asset(&document_url.clone(), &resolved_url) { Ok((data, final_url, media_type, charset)) => { let node_name: &str = get_node_name(node).unwrap(); // Check integrity if it's a LINK or SCRIPT element let mut ok_to_include: bool = true; if node_name == "link" || node_name == "script" { // Check integrity if let Some(node_integrity_attr_value) = get_node_attr(node, "integrity") { if !node_integrity_attr_value.is_empty() { ok_to_include = check_integrity(&data, &node_integrity_attr_value); } // Wipe the integrity attribute set_node_attr(node, "integrity", None); } } if ok_to_include { if node_name == "link" && parse_link_type(&get_node_attr(node, "rel").unwrap_or(String::from(""))) .contains(&LinkType::Stylesheet) { let stylesheet: String; if let Some(encoding) = Encoding::for_label(charset.as_bytes()) { let (string, _, _) = encoding.decode(&data); stylesheet = string.to_string(); } else { stylesheet = String::from_utf8_lossy(&data).to_string(); } // Stylesheet LINK elements require special treatment let css: String = embed_css(session, &final_url, &stylesheet); // Create and embed data URL let css_data_url = create_data_url(&media_type, &charset, css.as_bytes(), &final_url); set_node_attr(node, attr_name, Some(css_data_url.to_string())); } else if node_name == "frame" || node_name == "iframe" { // (I)FRAMEs are also quite different from conventional resources let frame_dom = html_to_dom(&data, charset.clone()); walk(session, &final_url, &frame_dom.document); let mut frame_data: Vec = Vec::new(); let serializable: SerializableHandle = frame_dom.document.into(); serialize(&mut frame_data, &serializable, SerializeOpts::default()).unwrap(); // Create and embed data URL let mut frame_data_url = create_data_url(&media_type, &charset, &frame_data, &final_url); frame_data_url.set_fragment(resolved_url.fragment()); set_node_attr(node, attr_name, Some(frame_data_url.to_string())); } else { // Every other type of element gets processed here // Parse media type for SCRIPT elements if node_name == "script" { let script_media_type = get_node_attr(node, "type").unwrap_or(String::from("text/javascript")); if script_media_type == "text/javascript" || script_media_type == "application/javascript" { // Embed javascript code instead of using data URLs let script_dom: RcDom = parse_document(RcDom::default(), Default::default()) .one(""); for script_node in find_nodes(&script_dom.document, vec!["html", "head", "script"]) .iter() { let text_node = &script_node.children.borrow()[0]; if let NodeData::Text { ref contents } = text_node.data { let mut tendril = contents.borrow_mut(); tendril.clear(); tendril.push_slice( &String::from_utf8_lossy(&data) .replace("", "<\\/script>"), ); } node.children.borrow_mut().push(text_node.clone()); set_node_attr(node, attr_name, None); } } else { // Create and embed data URL let mut data_url = create_data_url(&script_media_type, &charset, &data, &final_url); data_url.set_fragment(resolved_url.fragment()); set_node_attr(node, attr_name, Some(data_url.to_string())); } } else { // Create and embed data URL let mut data_url = create_data_url(&media_type, &charset, &data, &final_url); data_url.set_fragment(resolved_url.fragment()); set_node_attr(node, attr_name, Some(data_url.to_string())); } } } } Err(_) => { if resolved_url.scheme() == "http" || resolved_url.scheme() == "https" { // Keep remote references if unable to retrieve the asset set_node_attr(node, attr_name, Some(resolved_url.to_string())); } else { // Remove local references if they can't be successfully embedded as data URLs set_node_attr(node, attr_name, None); } } } } pub fn walk(session: &mut Session, document_url: &Url, node: &Handle) { match node.data { NodeData::Document => { // Dig deeper for child_node in node.children.borrow().iter() { walk(session, document_url, child_node); } } NodeData::Element { ref name, ref attrs, .. } => { match name.local.as_ref() { "meta" => { if let Some(meta_attr_http_equiv_value) = get_node_attr(node, "http-equiv") { let meta_attr_http_equiv_value: &str = &meta_attr_http_equiv_value; if meta_attr_http_equiv_value.eq_ignore_ascii_case("refresh") || meta_attr_http_equiv_value.eq_ignore_ascii_case("location") { // Remove http-equiv attributes from META nodes if they're able to control the page set_node_attr(node, "http-equiv", None); } } } "link" => { let link_node_types: Vec = parse_link_type(&get_node_attr(node, "rel").unwrap_or(String::from(""))); if link_node_types.contains(&LinkType::Favicon) || link_node_types.contains(&LinkType::AppleTouchIcon) { // Find and resolve LINK's href attribute if let Some(link_attr_href_value) = get_node_attr(node, "href") { if !session.options.no_images && !link_attr_href_value.is_empty() { retrieve_and_embed_asset( session, document_url, node, "href", &link_attr_href_value, ); } else { set_node_attr(node, "href", None); } } } else if link_node_types.contains(&LinkType::Stylesheet) { // Resolve LINK's href attribute if let Some(link_attr_href_value) = get_node_attr(node, "href") { if session.options.no_css { set_node_attr(node, "href", None); // Wipe integrity attribute set_node_attr(node, "integrity", None); } else if !link_attr_href_value.is_empty() { retrieve_and_embed_asset( session, document_url, node, "href", &link_attr_href_value, ); } } } else if link_node_types.contains(&LinkType::Preload) || link_node_types.contains(&LinkType::DnsPrefetch) { // Since all resources are embedded as data URLs, preloading and prefetching are not necessary set_node_attr(node, "rel", None); } else { // Make sure that all other LINKs' href attributes are full URLs if let Some(link_attr_href_value) = get_node_attr(node, "href") { let href_full_url: Url = resolve_url(document_url, &link_attr_href_value); set_node_attr(node, "href", Some(href_full_url.to_string())); } } } "base" => { if document_url.scheme() == "http" || document_url.scheme() == "https" { // Ensure the BASE node doesn't have a relative URL if let Some(base_attr_href_value) = get_node_attr(node, "href") { let href_full_url: Url = resolve_url(document_url, &base_attr_href_value); set_node_attr(node, "href", Some(href_full_url.to_string())); } } } "body" => { // Read and remember background attribute value of this BODY node if let Some(body_attr_background_value) = get_node_attr(node, "background") { // Remove background BODY node attribute by default set_node_attr(node, "background", None); if !session.options.no_images && !body_attr_background_value.is_empty() { retrieve_and_embed_asset( session, document_url, node, "background", &body_attr_background_value, ); } } } "img" => { // Find src and data-src attribute(s) let img_attr_src_value: Option = get_node_attr(node, "src"); let img_attr_data_src_value: Option = get_node_attr(node, "data-src"); if session.options.no_images { // Put empty images into src and data-src attributes if img_attr_src_value.is_some() { set_node_attr(node, "src", Some(EMPTY_IMAGE_DATA_URL.to_string())); } if img_attr_data_src_value.is_some() { set_node_attr(node, "data-src", Some(EMPTY_IMAGE_DATA_URL.to_string())); } } else if img_attr_src_value.clone().unwrap_or_default().is_empty() && img_attr_data_src_value .clone() .unwrap_or_default() .is_empty() { // Add empty src attribute set_node_attr(node, "src", Some("".to_string())); } else { // Add data URL src attribute let img_full_url: String = if !img_attr_data_src_value .clone() .unwrap_or_default() .is_empty() { img_attr_data_src_value.unwrap_or_default() } else { img_attr_src_value.unwrap_or_default() }; retrieve_and_embed_asset(session, document_url, node, "src", &img_full_url); } // Resolve srcset attribute if let Some(img_srcset) = get_node_attr(node, "srcset") { if !img_srcset.is_empty() { let resolved_srcset: String = embed_srcset(session, document_url, &img_srcset); set_node_attr(node, "srcset", Some(resolved_srcset)); } } } "input" => { if let Some(input_attr_type_value) = get_node_attr(node, "type") { if input_attr_type_value.eq_ignore_ascii_case("image") { if let Some(input_attr_src_value) = get_node_attr(node, "src") { if session.options.no_images || input_attr_src_value.is_empty() { let value = if input_attr_src_value.is_empty() { "" } else { EMPTY_IMAGE_DATA_URL }; set_node_attr(node, "src", Some(value.to_string())); } else { retrieve_and_embed_asset( session, document_url, node, "src", &input_attr_src_value, ); } } } } } "svg" => { if session.options.no_images { // Remove all children node.children.borrow_mut().clear(); } } "image" => { let attr_names: [&str; 2] = ["href", "xlink:href"]; for attr_name in attr_names.into_iter() { if let Some(image_attr_href_value) = get_node_attr(node, attr_name) { if session.options.no_images { set_node_attr(node, attr_name, None); } else { retrieve_and_embed_asset( session, document_url, node, attr_name, &image_attr_href_value, ); } } } } "use" => { let attr_names: [&str; 2] = ["href", "xlink:href"]; for attr_name in attr_names.into_iter() { if let Some(use_attr_href_value) = get_node_attr(node, attr_name) { if session.options.no_images { set_node_attr(node, attr_name, None); } else { let image_asset_url: Url = resolve_url(document_url, &use_attr_href_value); match session.retrieve_asset(document_url, &image_asset_url) { Ok((data, final_url, media_type, charset)) => { if media_type == "image/svg+xml" { // Parse SVG let svg_dom: RcDom = parse_document( RcDom::default(), Default::default(), ) .from_utf8() .read_from(&mut data.as_slice()) .unwrap(); if image_asset_url.fragment().is_some() { // Take only that one #fragment symbol from SVG and replace this image|use with that node let single_symbol_node = create_element( &svg_dom, QualName::new( None, ns!(), LocalName::from("symbol"), ), vec![], ); for symbol_node in find_nodes( &svg_dom.document, vec!["html", "body", "svg", "defs", "symbol"], ) .iter() { if get_node_attr(symbol_node, "id") .unwrap_or_default() == image_asset_url.fragment().unwrap() { svg_dom.reparent_children( symbol_node, &single_symbol_node, ); set_node_attr( &single_symbol_node, "id", Some( image_asset_url .fragment() .unwrap() .to_string(), ), ); set_node_attr( node, attr_name, Some(format!( "#{}", image_asset_url.fragment().unwrap() )), ); break; } } node.children .borrow_mut() .push(single_symbol_node.clone()); } else { // Replace this image|use with whole DOM of that SVG file for svg_node in find_nodes( &svg_dom.document, vec!["html", "body", "svg"], ) .iter() { svg_dom.reparent_children(svg_node, node); break; } // TODO: decide if we resort to using data URL here or stick with embedding the DOM } } else { // It's likely a raster image; embed it as data URL let image_asset_data: Url = create_data_url( &media_type, &charset, &data, &final_url, ); set_node_attr( node, attr_name, Some(image_asset_data.to_string()), ); } } Err(_) => { set_node_attr( node, attr_name, Some(image_asset_url.to_string()), ); } } } } } } "source" => { let parent_node = get_parent_node(node); let parent_node_name: &str = get_node_name(&parent_node).unwrap_or_default(); if let Some(source_attr_src_value) = get_node_attr(node, "src") { if parent_node_name == "audio" { if session.options.no_audio { set_node_attr(node, "src", None); } else { retrieve_and_embed_asset( session, document_url, node, "src", &source_attr_src_value, ); } } else if parent_node_name == "video" { if session.options.no_video { set_node_attr(node, "src", None); } else { retrieve_and_embed_asset( session, document_url, node, "src", &source_attr_src_value, ); } } } if let Some(source_attr_srcset_value) = get_node_attr(node, "srcset") { if parent_node_name == "picture" && !source_attr_srcset_value.is_empty() { if session.options.no_images { set_node_attr( node, "srcset", Some(EMPTY_IMAGE_DATA_URL.to_string()), ); } else { let resolved_srcset: String = embed_srcset(session, document_url, &source_attr_srcset_value); set_node_attr(node, "srcset", Some(resolved_srcset)); } } } } "a" | "area" => { if let Some(anchor_attr_href_value) = get_node_attr(node, "href") { if anchor_attr_href_value .clone() .trim() .starts_with("javascript:") { if session.options.no_js { // Replace with empty JS call to preserve original behavior set_node_attr(node, "href", Some("javascript:;".to_string())); } } else { // Don't touch mailto: links or hrefs which begin with a hash sign if !anchor_attr_href_value.clone().starts_with('#') && !is_url_and_has_protocol(&anchor_attr_href_value.clone()) { let href_full_url: Url = resolve_url(document_url, &anchor_attr_href_value); set_node_attr(node, "href", Some(href_full_url.to_string())); } } } } "script" => { // Read values of integrity and src attributes let script_attr_src: &str = &get_node_attr(node, "src").unwrap_or_default(); if session.options.no_js { // Empty inner content node.children.borrow_mut().clear(); // Remove src attribute if !script_attr_src.is_empty() { set_node_attr(node, "src", None); // Wipe integrity attribute set_node_attr(node, "integrity", None); } } else if !script_attr_src.is_empty() { retrieve_and_embed_asset( session, document_url, node, "src", script_attr_src, ); } } "style" => { if session.options.no_css { // Empty inner content of STYLE tags node.children.borrow_mut().clear(); } else { for child_node in node.children.borrow_mut().iter_mut() { if let NodeData::Text { ref contents } = child_node.data { let mut tendril = contents.borrow_mut(); let replacement = embed_css(session, document_url, tendril.as_ref()); tendril.clear(); tendril.push_slice(&replacement); } } } } "form" => { if let Some(form_attr_action_value) = get_node_attr(node, "action") { // Modify action property to ensure it's a full URL let form_action_full_url: Url = resolve_url(document_url, &form_attr_action_value); set_node_attr(node, "action", Some(form_action_full_url.to_string())); } } "frame" | "iframe" => { if let Some(frame_attr_src_value) = get_node_attr(node, "src") { if session.options.no_frames { // Empty the src attribute set_node_attr(node, "src", Some("".to_string())); } else { // Ignore (i)frames with empty source (they cause infinite loops) if !frame_attr_src_value.trim().is_empty() { retrieve_and_embed_asset( session, document_url, node, "src", &frame_attr_src_value, ); } } } } "audio" => { // Embed audio source if let Some(audio_attr_src_value) = get_node_attr(node, "src") { if session.options.no_audio { set_node_attr(node, "src", None); } else { retrieve_and_embed_asset( session, document_url, node, "src", &audio_attr_src_value, ); } } } "video" => { // Embed video source if let Some(video_attr_src_value) = get_node_attr(node, "src") { if session.options.no_video { set_node_attr(node, "src", None); } else { retrieve_and_embed_asset( session, document_url, node, "src", &video_attr_src_value, ); } } // Embed poster images if let Some(video_attr_poster_value) = get_node_attr(node, "poster") { // Skip posters with empty source if !video_attr_poster_value.is_empty() { if session.options.no_images { set_node_attr( node, "poster", Some(EMPTY_IMAGE_DATA_URL.to_string()), ); } else { retrieve_and_embed_asset( session, document_url, node, "poster", &video_attr_poster_value, ); } } } } "noscript" => { for child_node in node.children.borrow_mut().iter_mut() { if let NodeData::Text { ref contents } = child_node.data { // Get contents of NOSCRIPT node let mut noscript_contents = contents.borrow_mut(); // Parse contents of NOSCRIPT node as DOM let noscript_contents_dom: RcDom = html_to_dom(&noscript_contents.as_bytes().to_vec(), "".to_string()); // Embed assets of NOSCRIPT node contents walk(session, document_url, &noscript_contents_dom.document); // Get rid of original contents noscript_contents.clear(); // Insert HTML containing embedded assets into NOSCRIPT node if let Some(html) = get_child_node_by_name(&noscript_contents_dom.document, "html") { if let Some(body) = get_child_node_by_name(&html, "body") { let mut buf: Vec = Vec::new(); let serializable: SerializableHandle = body.into(); serialize(&mut buf, &serializable, SerializeOpts::default()) .expect("Unable to serialize DOM into buffer"); let result = String::from_utf8_lossy(&buf); noscript_contents.push_slice(&result); } } } } } _ => {} } // Process style attributes if session.options.no_css { // Get rid of style attributes set_node_attr(node, "style", None); } else { // Embed URLs found within the style attribute of this node if let Some(node_attr_style_value) = get_node_attr(node, "style") { let embedded_style = embed_css(session, document_url, &node_attr_style_value); set_node_attr(node, "style", Some(embedded_style)); } } // Strip all JS from document if session.options.no_js { let attrs_mut = &mut attrs.borrow_mut(); // Get rid of JS event attributes let mut js_attr_indexes = Vec::new(); for (i, attr) in attrs_mut.iter().enumerate() { if attr_is_event_handler(&attr.name.local) { js_attr_indexes.push(i); } } js_attr_indexes.reverse(); for attr_index in js_attr_indexes { attrs_mut.remove(attr_index); } } // Dig deeper for child_node in node.children.borrow().iter() { walk(session, document_url, child_node); } } _ => { // Note: in case of options.no_js being set to true, there's no need to worry about // getting rid of comments that may contain scripts, e.g. \n\n" ); // Exit code should be 0 out.assert().code(0); } #[test] fn unwrap_noscript_contents_nested() { let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let path_html: &Path = Path::new("tests/_data_/noscript/nested.html"); let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg"); let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap(); // STDERR should contain target HTML and embedded SVG files assert_eq!( String::from_utf8_lossy(&out.stderr), format!( "\ {file_url_html}\n\ {file_url_svg}\n\ ", file_url_html = Url::from_file_path(fs::canonicalize(path_html).unwrap()).unwrap(), file_url_svg = Url::from_file_path(fs::canonicalize(path_svg).unwrap()).unwrap(), ) ); // STDOUT should contain HTML with no CSS assert_eq!( String::from_utf8_lossy(&out.stdout), "

JS is not active

\n\n" ); // Exit code should be 0 out.assert().code(0); } #[test] fn unwrap_noscript_contents_with_script() { let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let path_html: &Path = Path::new("tests/_data_/noscript/script.html"); let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg"); let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap(); // STDERR should contain target HTML and embedded SVG files assert_eq!( String::from_utf8_lossy(&out.stderr), format!( "\ {file_url_html}\n\ {file_url_svg}\n\ ", file_url_html = Url::from_file_path(fs::canonicalize(path_html).unwrap()).unwrap(), file_url_svg = Url::from_file_path(fs::canonicalize(path_svg).unwrap()).unwrap(), ) ); // STDOUT should contain HTML with no CSS assert_eq!( String::from_utf8_lossy(&out.stdout), r#" "# ); // Exit code should be 0 out.assert().code(0); } #[test] fn unwrap_noscript_contents_attr_data_url() { let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg("-n") .arg("data:text/html,") .output() .unwrap(); // STDERR should be empty assert_eq!(String::from_utf8_lossy(&out.stderr), ""); // STDOUT should contain unwrapped contents of NOSCRIPT element assert_eq!( String::from_utf8_lossy(&out.stdout), r#"test "# ); // Exit code should be 0 out.assert().code(0); } } ================================================ FILE: tests/cli/unusual_encodings.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use assert_cmd::prelude::*; use encoding_rs::Encoding; use std::env; use std::path::MAIN_SEPARATOR; use std::process::{Command, Stdio}; #[test] fn properly_save_document_with_gb2312() { let cwd = env::current_dir().unwrap(); let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/"); let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg(format!( "tests{s}_data_{s}unusual_encodings{s}gb2312.html", s = MAIN_SEPARATOR )) .output() .unwrap(); let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" }; // STDERR should contain only the target file assert_eq!( String::from_utf8_lossy(&out.stderr), format!( "{file}{cwd}/tests/_data_/unusual_encodings/gb2312.html\n", file = file_url_protocol, cwd = cwd_normalized, ) ); // STDOUT should contain original document without any modifications let s: String; if let Some(encoding) = Encoding::for_label(b"gb2312") { let (string, _, _) = encoding.decode(&out.stdout); s = string.to_string(); } else { s = String::from_utf8_lossy(&out.stdout).to_string(); } assert_eq!( s, r##" 近七成人减少线下需求 银行数字化转型提速--经济·科技--人民网

近七成人减少线下需求 银行数字化转型提速

"## ); // Exit code should be 0 out.assert().code(0); } #[test] fn properly_save_document_with_gb2312_from_stdin() { let mut echo = Command::new("cat") .arg(format!( "tests{s}_data_{s}unusual_encodings{s}gb2312.html", s = MAIN_SEPARATOR )) .stdout(Stdio::piped()) .spawn() .unwrap(); let echo_out = echo.stdout.take().unwrap(); echo.wait().unwrap(); let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); cmd.stdin(echo_out); let out = cmd.arg("-M").arg("-").output().unwrap(); // STDERR should be empty assert_eq!(String::from_utf8_lossy(&out.stderr), ""); // STDOUT should contain HTML created out of STDIN let s: String; if let Some(encoding) = Encoding::for_label(b"gb2312") { let (string, _, _) = encoding.decode(&out.stdout); s = string.to_string(); } else { s = String::from_utf8_lossy(&out.stdout).to_string(); } assert_eq!( s, r##" 近七成人减少线下需求 银行数字化转型提速--经济·科技--人民网

近七成人减少线下需求 银行数字化转型提速

"## ); // Exit code should be 0 out.assert().code(0); } #[test] fn properly_save_document_with_gb2312_custom_charset() { let cwd = env::current_dir().unwrap(); let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/"); let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg("-E") .arg("utf8") .arg(format!( "tests{s}_data_{s}unusual_encodings{s}gb2312.html", s = MAIN_SEPARATOR )) .output() .unwrap(); let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" }; // STDERR should contain only the target file assert_eq!( String::from_utf8_lossy(&out.stderr), format!( "{file}{cwd}/tests/_data_/unusual_encodings/gb2312.html\n", file = file_url_protocol, cwd = cwd_normalized, ) ); // STDOUT should contain original document without any modifications assert_eq!( String::from_utf8_lossy(&out.stdout).to_string(), r#" 近七成人减少线下需求 银行数字化转型提速--经济·科技--人民网

近七成人减少线下需求 银行数字化转型提速

"# ); // Exit code should be 0 out.assert().code(0); } #[test] fn properly_save_document_with_gb2312_custom_charset_bad() { let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg("-E") .arg("utf0") .arg(format!( "tests{s}_data_{s}unusual_encodings{s}gb2312.html", s = MAIN_SEPARATOR )) .output() .unwrap(); // STDERR should contain error message assert_eq!( String::from_utf8_lossy(&out.stderr), "Error: unknown encoding \"utf0\"\n" ); // STDOUT should be empty assert_eq!(String::from_utf8_lossy(&out.stdout).to_string(), ""); // Exit code should be 1 out.assert().code(1); } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use assert_cmd::prelude::*; use std::env; use std::path::MAIN_SEPARATOR; use std::process::Command; #[test] fn change_iso88591_to_utf8_to_properly_display_html_entities() { let cwd = env::current_dir().unwrap(); let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/"); let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg(format!( "tests{s}_data_{s}unusual_encodings{s}iso-8859-1.html", s = MAIN_SEPARATOR )) .output() .unwrap(); let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" }; // STDERR should contain only the target file assert_eq!( String::from_utf8_lossy(&out.stderr), format!( "{file}{cwd}/tests/_data_/unusual_encodings/iso-8859-1.html\n", file = file_url_protocol, cwd = cwd_normalized, ) ); // STDOUT should contain original document but with UTF-8 charset assert_eq!( String::from_utf8_lossy(&out.stdout), r##" � Some Company "## ); // Exit code should be 0 out.assert().code(0); } } ================================================ FILE: tests/cookies/cookie/is_expired.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::cookies; #[test] fn never_expires() { let cookie = cookies::Cookie { domain: String::from("127.0.0.1"), include_subdomains: true, path: String::from("/"), https_only: false, expires: 0, name: String::from(""), value: String::from(""), }; assert!(!cookie.is_expired()); } #[test] fn expires_long_from_now() { let cookie = cookies::Cookie { domain: String::from("127.0.0.1"), include_subdomains: true, path: String::from("/"), https_only: false, expires: 9999999999, name: String::from(""), value: String::from(""), }; assert!(!cookie.is_expired()); } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use monolith::cookies; #[test] fn expired() { let cookie = cookies::Cookie { domain: String::from("127.0.0.1"), include_subdomains: true, path: String::from("/"), https_only: false, expires: 1, name: String::from(""), value: String::from(""), }; assert!(cookie.is_expired()); } } ================================================ FILE: tests/cookies/cookie/matches_url.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::cookies; #[test] fn secure_url() { let cookie = cookies::Cookie { domain: String::from("127.0.0.1"), include_subdomains: true, path: String::from("/"), https_only: true, expires: 0, name: String::from(""), value: String::from(""), }; assert!(cookie.matches_url("https://127.0.0.1/something")); } #[test] fn non_secure_url() { let cookie = cookies::Cookie { domain: String::from("127.0.0.1"), include_subdomains: true, path: String::from("/"), https_only: false, expires: 0, name: String::from(""), value: String::from(""), }; assert!(cookie.matches_url("http://127.0.0.1/something")); } #[test] fn subdomain() { let cookie = cookies::Cookie { domain: String::from(".somethingsomething.com"), include_subdomains: true, path: String::from("/"), https_only: true, expires: 0, name: String::from(""), value: String::from(""), }; assert!(cookie.matches_url("https://cdn.somethingsomething.com/something")); } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use monolith::cookies; #[test] fn empty_url() { let cookie = cookies::Cookie { domain: String::from("127.0.0.1"), include_subdomains: true, path: String::from("/"), https_only: false, expires: 0, name: String::from(""), value: String::from(""), }; assert!(!cookie.matches_url("")); } #[test] fn wrong_hostname() { let cookie = cookies::Cookie { domain: String::from("127.0.0.1"), include_subdomains: true, path: String::from("/"), https_only: false, expires: 0, name: String::from(""), value: String::from(""), }; assert!(!cookie.matches_url("http://0.0.0.0/")); } #[test] fn wrong_path() { let cookie = cookies::Cookie { domain: String::from("127.0.0.1"), include_subdomains: false, path: String::from("/"), https_only: false, expires: 0, name: String::from(""), value: String::from(""), }; assert!(!cookie.matches_url("http://0.0.0.0/path")); } } ================================================ FILE: tests/cookies/cookie/mod.rs ================================================ mod is_expired; mod matches_url; ================================================ FILE: tests/cookies/mod.rs ================================================ mod cookie; mod parse_cookie_file_contents; ================================================ FILE: tests/cookies/parse_cookie_file_contents.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::cookies; #[test] fn parse_file() { let file_contents = r#"# Netscape HTTP Cookie File 127.0.0.1 FALSE / FALSE 0 USER_TOKEN in"#; let result = cookies::parse_cookie_file_contents(file_contents).unwrap(); assert_eq!(result.len(), 1); assert_eq!(result[0].domain, "127.0.0.1"); assert!(!result[0].include_subdomains); assert_eq!(result[0].path, "/"); assert!(!result[0].https_only); assert_eq!(result[0].expires, 0); assert_eq!(result[0].name, "USER_TOKEN"); assert_eq!(result[0].value, "in"); } #[test] fn parse_multiline_file() { let file_contents = r#"# HTTP Cookie File 127.0.0.1 FALSE / FALSE 0 USER_TOKEN in 127.0.0.1 TRUE / TRUE 9 USER_TOKEN out "#; let result = cookies::parse_cookie_file_contents(file_contents).unwrap(); assert_eq!(result.len(), 2); assert_eq!(result[0].domain, "127.0.0.1"); assert!(!result[0].include_subdomains); assert_eq!(result[0].path, "/"); assert!(!result[0].https_only); assert_eq!(result[0].expires, 0); assert_eq!(result[0].name, "USER_TOKEN"); assert_eq!(result[0].value, "in"); assert_eq!(result[1].domain, "127.0.0.1"); assert!(result[1].include_subdomains); assert_eq!(result[1].path, "/"); assert!(result[1].https_only); assert_eq!(result[1].expires, 9); assert_eq!(result[1].name, "USER_TOKEN"); assert_eq!(result[1].value, "out"); } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use monolith::cookies; #[test] fn empty() { let file_contents = ""; let result = cookies::parse_cookie_file_contents(file_contents).unwrap(); assert_eq!(result.len(), 0); } #[test] fn no_header() { let file_contents = "127.0.0.1 FALSE / FALSE 0 USER_TOKEN in"; match cookies::parse_cookie_file_contents(file_contents) { Ok(_result) => { assert!(false); } Err(_e) => { assert!(true); } } } #[test] fn spaces_instead_of_tabs() { let file_contents = "# HTTP Cookie File\n127.0.0.1 FALSE / FALSE 0 USER_TOKEN in"; let result = cookies::parse_cookie_file_contents(file_contents).unwrap(); assert_eq!(result.len(), 0); } } ================================================ FILE: tests/core/detect_media_type.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use reqwest::Url; use monolith::core::detect_media_type; #[test] fn image_gif87() { let dummy_url: Url = Url::parse("data:,").unwrap(); assert_eq!(detect_media_type(b"GIF87a", &dummy_url), "image/gif"); } #[test] fn image_gif89() { let dummy_url: Url = Url::parse("data:,").unwrap(); assert_eq!(detect_media_type(b"GIF89a", &dummy_url), "image/gif"); } #[test] fn image_jpeg() { let dummy_url: Url = Url::parse("data:,").unwrap(); assert_eq!(detect_media_type(b"\xFF\xD8\xFF", &dummy_url), "image/jpeg"); } #[test] fn image_png() { let dummy_url: Url = Url::parse("data:,").unwrap(); assert_eq!( detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", &dummy_url), "image/png" ); } #[test] fn image_svg() { let dummy_url: Url = Url::parse("data:,").unwrap(); assert_eq!(detect_media_type(b":"|?/%title%.html"#, r#"/\<>:"|?"#, MonolithOutputFormat::HTML, ); assert_eq!( final_destination, r#"/home/username/Downloads/<>:"|?/__[] - -.html"# ); } #[test] fn level_up() { let final_destination = format_output_path("../%title%.html", ".Title", MonolithOutputFormat::HTML); assert_eq!(final_destination, r#"../Title.html"#); } #[test] fn file_name_extension() { let final_destination = format_output_path("%title%.%extension%", "Title", MonolithOutputFormat::HTML); assert_eq!(final_destination, r#"Title.html"#); } #[test] fn file_name_extension_mhtml() { let final_destination = format_output_path("%title%.%extension%", "Title", MonolithOutputFormat::MHTML); assert_eq!(final_destination, r#"Title.mhtml"#); } #[test] fn file_name_extension_short() { let final_destination = format_output_path("%title%.%ext%", "Title", MonolithOutputFormat::HTML); assert_eq!(final_destination, r#"Title.htm"#); } #[test] fn file_name_extension_short_mhtml() { let final_destination = format_output_path("%title%.%ext%", "Title", MonolithOutputFormat::MHTML); assert_eq!(final_destination, r#"Title.mht"#); } } ================================================ FILE: tests/core/mod.rs ================================================ mod detect_media_type; mod format_output_path; mod options; mod parse_content_type; ================================================ FILE: tests/core/options.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::core::{MonolithOptions, MonolithOutputFormat}; #[test] fn defaults() { let options: MonolithOptions = MonolithOptions::default(); assert!(!options.no_audio); assert_eq!(options.base_url, None); assert!(!options.no_css); assert_eq!(options.encoding, None); assert!(!options.no_frames); assert!(!options.no_fonts); assert!(!options.no_images); assert!(!options.isolate); assert!(!options.no_js); assert!(!options.insecure); assert!(!options.no_metadata); assert_eq!(options.output_format, MonolithOutputFormat::HTML); assert!(!options.silent); assert_eq!(options.timeout, 0); assert_eq!(options.user_agent, None); assert!(!options.no_video); } } ================================================ FILE: tests/core/parse_content_type.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::core::parse_content_type; #[test] fn text_plain_utf8() { let (media_type, charset, is_base64) = parse_content_type("text/plain;charset=utf8"); assert_eq!(media_type, "text/plain"); assert_eq!(charset, "utf8"); assert!(!is_base64); } #[test] fn text_plain_utf8_spaces() { let (media_type, charset, is_base64) = parse_content_type(" text/plain ; charset=utf8 "); assert_eq!(media_type, "text/plain"); assert_eq!(charset, "utf8"); assert!(!is_base64); } #[test] fn empty() { let (media_type, charset, is_base64) = parse_content_type(""); assert_eq!(media_type, "text/plain"); assert_eq!(charset, "US-ASCII"); assert!(!is_base64); } #[test] fn base64() { let (media_type, charset, is_base64) = parse_content_type(";base64"); assert_eq!(media_type, "text/plain"); assert_eq!(charset, "US-ASCII"); assert!(is_base64); } #[test] fn text_html_base64() { let (media_type, charset, is_base64) = parse_content_type("text/html;base64"); assert_eq!(media_type, "text/html"); assert_eq!(charset, "US-ASCII"); assert!(is_base64); } #[test] fn only_media_type() { let (media_type, charset, is_base64) = parse_content_type("text/html"); assert_eq!(media_type, "text/html"); assert_eq!(charset, "US-ASCII"); assert!(!is_base64); } #[test] fn only_media_type_colon() { let (media_type, charset, is_base64) = parse_content_type("text/html;"); assert_eq!(media_type, "text/html"); assert_eq!(charset, "US-ASCII"); assert!(!is_base64); } #[test] fn media_type_gb2312_filename() { let (media_type, charset, is_base64) = parse_content_type("text/html;charset=GB2312;filename=index.html"); assert_eq!(media_type, "text/html"); assert_eq!(charset, "GB2312"); assert!(!is_base64); } #[test] fn media_type_filename_gb2312() { let (media_type, charset, is_base64) = parse_content_type("text/html;filename=index.html;charset=GB2312"); assert_eq!(media_type, "text/html"); assert_eq!(charset, "GB2312"); assert!(!is_base64); } } ================================================ FILE: tests/css/embed_css.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use reqwest::Url; use monolith::core::MonolithOptions; use monolith::css; use monolith::session::Session; use monolith::url::EMPTY_IMAGE_DATA_URL; #[test] fn empty_input() { let document_url: Url = Url::parse("data:,").unwrap(); let options = MonolithOptions::default(); let mut session: Session = Session::new(None, None, options); assert_eq!(css::embed_css(&mut session, &document_url, ""), ""); } #[test] fn trim_if_empty() { let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap(); let options = MonolithOptions::default(); let mut session: Session = Session::new(None, None, options); assert_eq!( css::embed_css(&mut session, &document_url, "\t \t "), "" ); } #[test] fn style_exclude_unquoted_images() { let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap(); let mut options = MonolithOptions::default(); options.no_images = true; options.silent = true; let mut session: Session = Session::new(None, None, options); const STYLE: &str = "/* border: none;*/\ background-image: url(https://somewhere.com/bg.png); \ list-style: url(/assets/images/bullet.svg);\ width:99.998%; \ margin-top: -20px; \ line-height: -1; \ height: calc(100vh - 10pt)"; assert_eq!( css::embed_css(&mut session, &document_url, STYLE), format!( "/* border: none;*/\ background-image: url(\"{empty_image}\"); \ list-style: url(\"{empty_image}\");\ width:99.998%; \ margin-top: -20px; \ line-height: -1; \ height: calc(100vh - 10pt)", empty_image = EMPTY_IMAGE_DATA_URL ) ); } #[test] fn style_exclude_single_quoted_images() { let document_url: Url = Url::parse("data:,").unwrap(); let mut options = MonolithOptions::default(); options.no_images = true; options.silent = true; let mut session: Session = Session::new(None, None, options); const STYLE: &str = "/* border: none;*/\ background-image: url('https://somewhere.com/bg.png'); \ list-style: url('/assets/images/bullet.svg');\ width:99.998%; \ margin-top: -20px; \ line-height: -1; \ height: calc(100vh - 10pt)"; assert_eq!( css::embed_css(&mut session, &document_url, STYLE), format!( "/* border: none;*/\ background-image: url(\"{empty_image}\"); \ list-style: url(\"{empty_image}\");\ width:99.998%; \ margin-top: -20px; \ line-height: -1; \ height: calc(100vh - 10pt)", empty_image = EMPTY_IMAGE_DATA_URL ) ); } #[test] fn style_block() { let document_url: Url = Url::parse("file:///").unwrap(); let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); const CSS: &str = "\ #id.class-name:not(:nth-child(3n+0)) {\n \ // border: none;\n \ background-image: url(\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\");\n\ }\n\ \n\ html > body {}"; assert_eq!(css::embed_css(&mut session, &document_url, CSS), CSS); } #[test] fn attribute_selectors() { let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap(); let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); const CSS: &str = "\ [data-value] { /* Attribute exists */ } [data-value=\"foo\"] { /* Attribute has this exact value */ } [data-value*=\"foo\"] { /* Attribute value contains this value somewhere in it */ } [data-value~=\"foo\"] { /* Attribute has this value in a space-separated list somewhere */ } [data-value^=\"foo\"] { /* Attribute value starts with this */ } [data-value|=\"foo\"] { /* Attribute value starts with this in a dash-separated list */ } [data-value$=\"foo\"] { /* Attribute value ends with this */ } "; assert_eq!(css::embed_css(&mut session, &document_url, CSS), CSS); } #[test] fn import_string() { let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap(); let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); const CSS: &str = "\ @charset 'UTF-8';\n\ \n\ @import 'data:text/css,html{background-color:%23000}';\n\ \n\ @import url('data:text/css,html{color:%23fff}')\n\ "; assert_eq!( css::embed_css(&mut session, &document_url, CSS), "\ @charset \"UTF-8\";\n\ \n\ @import \"data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9\";\n\ \n\ @import url(\"data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==\")\n\ " ); } #[test] fn hash_urls() { let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap(); let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); const CSS: &str = "\ body {\n \ behavior: url(#default#something);\n\ }\n\ \n\ .scissorHalf {\n \ offset-path: url(#somePath);\n\ }\n\ "; assert_eq!(css::embed_css(&mut session, &document_url, CSS), CSS); } #[test] fn transform_percentages_and_degrees() { let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap(); let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); const CSS: &str = "\ div {\n \ transform: translate(-50%, -50%) rotate(-45deg);\n\ transform: translate(50%, 50%) rotate(45deg);\n\ transform: translate(+50%, +50%) rotate(+45deg);\n\ }\n\ "; assert_eq!(css::embed_css(&mut session, &document_url, CSS), CSS); } #[test] fn unusual_indents() { let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap(); let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); const CSS: &str = "\ .is\\:good:hover {\n \ color: green\n\ }\n\ \n\ #\\~\\!\\@\\$\\%\\^\\&\\*\\(\\)\\+\\=\\,\\.\\/\\\\\\'\\\"\\;\\:\\?\\>\\<\\[\\]\\{\\}\\|\\`\\# {\n \ color: black\n\ }\n\ "; assert_eq!(css::embed_css(&mut session, &document_url, CSS), CSS); } #[test] fn exclude_fonts() { let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap(); let mut options = MonolithOptions::default(); options.no_fonts = true; options.silent = true; let mut session: Session = Session::new(None, None, options); const CSS: &str = "\ @font-face {\n \ font-family: 'My Font';\n \ src: url(my_font.woff);\n\ }\n\ \n\ #identifier {\n \ font-family: 'My Font' Arial\n\ }\n\ \n\ @font-face {\n \ font-family: 'My Font';\n \ src: url(my_font.woff);\n\ }\n\ \n\ div {\n \ font-family: 'My Font' Verdana\n\ }\n\ "; const CSS_OUT: &str = " \ \n\ \n\ #identifier {\n \ font-family: \"My Font\" Arial\n\ }\n\ \n \ \n\ \n\ div {\n \ font-family: \"My Font\" Verdana\n\ }\n\ "; assert_eq!(css::embed_css(&mut session, &document_url, CSS), CSS_OUT); } #[test] fn content() { let document_url: Url = Url::parse("data:,").unwrap(); let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); const CSS: &str = "\ #language a[href=\"#translations\"]:before {\n\ content: url(data:,) \"\\A\";\n\ white-space: pre }\n\ "; const CSS_OUT: &str = "\ #language a[href=\"#translations\"]:before {\n\ content: url(\"data:text/plain;base64,\") \"\\a \";\n\ white-space: pre }\n\ "; assert_eq!(css::embed_css(&mut session, &document_url, CSS), CSS_OUT); } #[test] fn ie_css_hack() { let document_url: Url = Url::parse("data:,").unwrap(); let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); const CSS: &str = "\ div#p>svg>foreignObject>section:not(\\9) {\n\ width: 300px;\n\ width: 500px\\9;\n\ }\n\ "; const CSS_OUT: &str = "\ div#p>svg>foreignObject>section:not(\\9) {\n\ width: 300px;\n\ width: 500px\t;\n\ }\n\ "; assert_eq!(css::embed_css(&mut session, &document_url, CSS), CSS_OUT); } } ================================================ FILE: tests/css/is_image_url_prop.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::css; #[test] fn background() { assert!(css::is_image_url_prop("background")); } #[test] fn background_image() { assert!(css::is_image_url_prop("background-image")); } #[test] fn background_image_uppercase() { assert!(css::is_image_url_prop("BACKGROUND-IMAGE")); } #[test] fn border_image() { assert!(css::is_image_url_prop("border-image")); } #[test] fn content() { assert!(css::is_image_url_prop("content")); } #[test] fn cursor() { assert!(css::is_image_url_prop("cursor")); } #[test] fn list_style() { assert!(css::is_image_url_prop("list-style")); } #[test] fn list_style_image() { assert!(css::is_image_url_prop("list-style-image")); } #[test] fn mask_image() { assert!(css::is_image_url_prop("mask-image")); } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use monolith::css; #[test] fn empty() { assert!(!css::is_image_url_prop("")); } #[test] fn width() { assert!(!css::is_image_url_prop("width")); } #[test] fn color() { assert!(!css::is_image_url_prop("color")); } #[test] fn z_index() { assert!(!css::is_image_url_prop("z-index")); } } ================================================ FILE: tests/css/mod.rs ================================================ mod embed_css; mod is_image_url_prop; ================================================ FILE: tests/html/add_favicon.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use html5ever::serialize::{serialize, SerializeOpts}; use markup5ever_rcdom::SerializableHandle; use monolith::html; #[test] fn basic() { let html = "
text
"; let mut dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string()); let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(dom.document.clone()), SerializeOpts::default(), ) .unwrap(); assert_eq!( buf.iter().map(|&c| c as char).collect::(), "
text
" ); } } ================================================ FILE: tests/html/check_integrity.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::html; #[test] fn empty_input_sha256() { assert!(html::check_integrity( "".as_bytes(), "sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=" )); } #[test] fn sha256() { assert!(html::check_integrity( "abcdef0123456789".as_bytes(), "sha256-9EWAHgy4mSYsm54hmDaIDXPKLRsLnBX7lZyQ6xISNOM=" )); } #[test] fn sha384() { assert!(html::check_integrity( "abcdef0123456789".as_bytes(), "sha384-gc9l7omltke8C33bedgh15E12M7RrAQa5t63Yb8APlpe7ZhiqV23+oqiulSJl3Kw" )); } #[test] fn sha512() { assert!(html::check_integrity( "abcdef0123456789".as_bytes(), "sha512-zG5B88cYMqcdiMi9gz0XkOFYw2BpjeYdn5V6+oFrMgSNjRpqL7EF8JEwl17ztZbK3N7I/tTwp3kxQbN1RgFBww==" )); } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use monolith::html; #[test] fn empty_hash() { assert!(!html::check_integrity("abcdef0123456789".as_bytes(), "")); } #[test] fn empty_input_empty_hash() { assert!(!html::check_integrity("".as_bytes(), "")); } #[test] fn sha256() { assert!(!html::check_integrity( "abcdef0123456789".as_bytes(), "sha256-badhash" )); } #[test] fn sha384() { assert!(!html::check_integrity( "abcdef0123456789".as_bytes(), "sha384-badhash" )); } #[test] fn sha512() { assert!(!html::check_integrity( "abcdef0123456789".as_bytes(), "sha512-badhash" )); } } ================================================ FILE: tests/html/compose_csp.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::core::MonolithOptions; use monolith::html; #[test] fn isolated() { let mut options = MonolithOptions::default(); options.isolate = true; let csp_content = html::compose_csp(&options); assert_eq!( csp_content, "default-src 'unsafe-eval' 'unsafe-inline' data:;" ); } #[test] fn no_css() { let mut options = MonolithOptions::default(); options.no_css = true; let csp_content = html::compose_csp(&options); assert_eq!(csp_content, "style-src 'none';"); } #[test] fn no_fonts() { let mut options = MonolithOptions::default(); options.no_fonts = true; let csp_content = html::compose_csp(&options); assert_eq!(csp_content, "font-src 'none';"); } #[test] fn no_frames() { let mut options = MonolithOptions::default(); options.no_frames = true; let csp_content = html::compose_csp(&options); assert_eq!(csp_content, "frame-src 'none'; child-src 'none';"); } #[test] fn no_js() { let mut options = MonolithOptions::default(); options.no_js = true; let csp_content = html::compose_csp(&options); assert_eq!(csp_content, "script-src 'none';"); } #[test] fn no_images() { let mut options = MonolithOptions::default(); options.no_images = true; let csp_content = html::compose_csp(&options); assert_eq!(csp_content, "img-src data:;"); } #[test] fn all() { let mut options = MonolithOptions::default(); options.isolate = true; options.no_css = true; options.no_fonts = true; options.no_frames = true; options.no_js = true; options.no_images = true; let csp_content = html::compose_csp(&options); assert_eq!( csp_content, "default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;" ); } } ================================================ FILE: tests/html/create_metadata_tag.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use chrono::prelude::*; use reqwest::Url; use monolith::html; #[test] fn http_url() { let url: Url = Url::parse("http://192.168.1.1/").unwrap(); let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true); let metadata_comment: String = html::create_metadata_tag(&url); assert_eq!( metadata_comment, format!( "", &url, timestamp, env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"), ) ); } #[test] fn file_url() { let url: Url = Url::parse("file:///home/monolith/index.html").unwrap(); let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true); let metadata_comment: String = html::create_metadata_tag(&url); assert_eq!( metadata_comment, format!( "", timestamp, env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"), ) ); } #[test] fn data_url() { let url: Url = Url::parse("data:text/html,Hello%2C%20World!").unwrap(); let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true); let metadata_comment: String = html::create_metadata_tag(&url); assert_eq!( metadata_comment, format!( "", timestamp, env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"), ) ); } } ================================================ FILE: tests/html/embed_srcset.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use reqwest::Url; use monolith::core::MonolithOptions; use monolith::html; use monolith::session::Session; use monolith::url::EMPTY_IMAGE_DATA_URL; #[test] fn small_medium_large() { let srcset_value = "small.png 1x, medium.png 1.5x, large.png 2x"; let mut options = MonolithOptions::default(); options.no_images = true; options.silent = true; let mut session: Session = Session::new(None, None, options); let embedded_css = html::embed_srcset(&mut session, &Url::parse("data:,").unwrap(), srcset_value); assert_eq!( embedded_css, format!( "{dataurl} 1x, {dataurl} 1.5x, {dataurl} 2x", dataurl = EMPTY_IMAGE_DATA_URL, ), ); } #[test] fn small_medium_only_medium_has_scale() { let srcset_value = "small.png, medium.png 1.5x"; let mut options = MonolithOptions::default(); options.no_images = true; options.silent = true; let mut session: Session = Session::new(None, None, options); let embedded_css = html::embed_srcset(&mut session, &Url::parse("data:,").unwrap(), srcset_value); assert_eq!( embedded_css, format!("{dataurl}, {dataurl} 1.5x", dataurl = EMPTY_IMAGE_DATA_URL), ); } #[test] fn commas_within_file_names() { let srcset_value = "small,s.png 1x, large,l.png 2x"; let mut options = MonolithOptions::default(); options.no_images = true; options.silent = true; let mut session: Session = Session::new(None, None, options); let embedded_css = html::embed_srcset(&mut session, &Url::parse("data:,").unwrap(), srcset_value); assert_eq!( embedded_css, format!("{dataurl} 1x, {dataurl} 2x", dataurl = EMPTY_IMAGE_DATA_URL), ); } #[test] fn narrow_whitespaces_within_file_names() { let srcset_value = "small\u{202f}s.png 1x, large\u{202f}l.png 2x"; let mut options = MonolithOptions::default(); options.no_images = true; options.silent = true; let mut session: Session = Session::new(None, None, options); let embedded_css = html::embed_srcset(&mut session, &Url::parse("data:,").unwrap(), srcset_value); assert_eq!( embedded_css, format!("{dataurl} 1x, {dataurl} 2x", dataurl = EMPTY_IMAGE_DATA_URL), ); } #[test] fn tabs_and_newlines_after_commas() { let srcset_value = "small-s.png 1x,\tmedium,m.png 2x,\nlarge-l.png 3x"; let mut options = MonolithOptions::default(); options.no_images = true; options.silent = true; let mut session: Session = Session::new(None, None, options); let embedded_css = html::embed_srcset(&mut session, &Url::parse("data:,").unwrap(), srcset_value); assert_eq!( embedded_css, format!( "{dataurl} 1x, {dataurl} 2x, {dataurl} 3x", dataurl = EMPTY_IMAGE_DATA_URL ), ); } #[test] fn no_whitespace_after_commas() { let srcset_value = "small-s.png 1x,medium-m.png 2x,large-l.png 3x"; let mut options = MonolithOptions::default(); options.no_images = true; options.silent = true; let mut session: Session = Session::new(None, None, options); let embedded_css = html::embed_srcset(&mut session, &Url::parse("data:,").unwrap(), srcset_value); assert_eq!( embedded_css, format!( "{dataurl} 1x, {dataurl} 2x, {dataurl} 3x", dataurl = EMPTY_IMAGE_DATA_URL ), ); } #[test] fn last_without_descriptor() { let srcset_value = "small-s.png 400w, medium-m.png 800w, large-l.png"; let mut options = MonolithOptions::default(); options.no_images = true; options.silent = true; let mut session: Session = Session::new(None, None, options); let embedded_css = html::embed_srcset(&mut session, &Url::parse("data:,").unwrap(), srcset_value); assert_eq!( embedded_css, format!( "{dataurl} 400w, {dataurl} 800w, {dataurl}", dataurl = EMPTY_IMAGE_DATA_URL ), ); } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use reqwest::Url; use monolith::core::MonolithOptions; use monolith::html; use monolith::session::Session; use monolith::url::EMPTY_IMAGE_DATA_URL; #[test] fn trailing_comma() { let srcset_value = "small.png 1x, large.png 2x,"; let mut options = MonolithOptions::default(); options.no_images = true; options.silent = true; let mut session: Session = Session::new(None, None, options); let embedded_css = html::embed_srcset(&mut session, &Url::parse("data:,").unwrap(), srcset_value); assert_eq!( embedded_css, format!("{dataurl} 1x, {dataurl} 2x", dataurl = EMPTY_IMAGE_DATA_URL), ); } } ================================================ FILE: tests/html/get_base_url.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::html; #[test] fn present() { let html = " "; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); assert_eq!( html::get_base_url(&dom.document), Some("https://musicbrainz.org".to_string()) ); } #[test] fn multiple_tags() { let html = " "; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); assert_eq!( html::get_base_url(&dom.document), Some("https://www.discogs.com/".to_string()) ); } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use monolith::html; #[test] fn absent() { let html = " "; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); assert_eq!(html::get_base_url(&dom.document), None); } #[test] fn no_href() { let html = " "; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); assert_eq!(html::get_base_url(&dom.document), None); } #[test] fn empty_href() { let html = " "; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); assert_eq!(html::get_base_url(&dom.document), Some("".to_string())); } } ================================================ FILE: tests/html/get_charset.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::html; #[test] fn meta_content_type() { let html = " "; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string())); } #[test] fn meta_charset() { let html = " "; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string())); } #[test] fn multiple_conflicting_meta_charset_first() { let html = " "; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); assert_eq!(html::get_charset(&dom.document), Some("utf-8".to_string())); } #[test] fn multiple_conflicting_meta_content_type_first() { let html = " "; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string())); } } ================================================ FILE: tests/html/get_node_attr.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use markup5ever_rcdom::{Handle, NodeData}; use monolith::html; #[test] fn div_two_style_attributes() { let html = "
"; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let mut count = 0; fn test_walk(node: &Handle, i: &mut i8) { *i += 1; match &node.data { NodeData::Document => { // Dig deeper for child in node.children.borrow().iter() { test_walk(child, &mut *i); } } NodeData::Element { name, .. } => { let node_name = name.local.as_ref().to_string(); if node_name == "body" { assert_eq!(html::get_node_attr(node, "class"), None); } else if node_name == "div" { assert_eq!( html::get_node_attr(node, "style"), Some("color: blue;".to_string()) ); } for child in node.children.borrow().iter() { test_walk(child, &mut *i); } } _ => (), }; } test_walk(&dom.document, &mut count); assert_eq!(count, 6); } } ================================================ FILE: tests/html/get_node_name.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use markup5ever_rcdom::{Handle, NodeData}; use monolith::html; #[test] fn parent_node_names() { let html = "

"; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let mut count = 0; fn test_walk(node: &Handle, i: &mut i8) { *i += 1; match &node.data { NodeData::Document => { for child in node.children.borrow().iter() { test_walk(child, &mut *i); } } NodeData::Element { name, .. } => { let node_name = name.local.as_ref().to_string(); let parent = html::get_parent_node(node); let parent_node_name = html::get_node_name(&parent); if node_name == "head" || node_name == "body" { assert_eq!(parent_node_name, Some("html")); } else if node_name == "div" { assert_eq!(parent_node_name, Some("body")); } else if node_name == "p" { assert_eq!(parent_node_name, Some("div")); } for child in node.children.borrow().iter() { test_walk(child, &mut *i); } } _ => (), }; } test_walk(&dom.document, &mut count); assert_eq!(count, 7); } } ================================================ FILE: tests/html/has_favicon.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::html; #[test] fn icon() { let html = r#"
text
"#; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let res: bool = html::has_favicon(&dom.document); assert!(res); } #[test] fn shortcut_icon() { let html = r#"
text
"#; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let res: bool = html::has_favicon(&dom.document); assert!(res); } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use monolith::html; #[test] fn absent() { let html = "
text
"; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let res: bool = html::has_favicon(&dom.document); assert!(!res); } } ================================================ FILE: tests/html/is_favicon.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::html::is_favicon; #[test] fn icon() { assert!(is_favicon("icon")); } #[test] fn shortcut_icon_capitalized() { assert!(is_favicon("Shortcut Icon")); } #[test] fn icon_uppercase() { assert!(is_favicon("ICON")); } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use monolith::html::is_favicon; #[test] fn apple_touch_icon() { assert!(!is_favicon("apple-touch-icon")); } #[test] fn mask_icon() { assert!(!is_favicon("mask-icon")); } #[test] fn fluid_icon() { assert!(!is_favicon("fluid-icon")); } #[test] fn stylesheet() { assert!(!is_favicon("stylesheet")); } #[test] fn empty_string() { assert!(!is_favicon("")); } } ================================================ FILE: tests/html/mod.rs ================================================ mod add_favicon; mod check_integrity; mod compose_csp; mod create_metadata_tag; mod embed_srcset; mod get_base_url; mod get_charset; mod get_node_attr; mod get_node_name; mod has_favicon; mod is_favicon; mod parse_link_type; mod parse_srcset; mod serialize_document; mod set_node_attr; mod walk; ================================================ FILE: tests/html/parse_link_type.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::html; #[test] fn icon() { assert!(html::parse_link_type("icon").contains(&html::LinkType::Favicon)); } #[test] fn shortcut_icon_capitalized() { assert!(html::parse_link_type("Shortcut Icon").contains(&html::LinkType::Favicon)); } #[test] fn stylesheet() { assert!(html::parse_link_type("stylesheet").contains(&html::LinkType::Stylesheet)); } #[test] fn preload_stylesheet() { assert!(html::parse_link_type("preload stylesheet").contains(&html::LinkType::Stylesheet)); } #[test] fn apple_touch_icon() { assert!(html::parse_link_type("apple-touch-icon").contains(&html::LinkType::AppleTouchIcon)); } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use monolith::html; #[test] fn mask_icon() { assert!(html::parse_link_type("mask-icon").is_empty()); } #[test] fn fluid_icon() { assert!(html::parse_link_type("fluid-icon").is_empty()); } #[test] fn empty_string() { assert!(html::parse_link_type("").is_empty()); } } ================================================ FILE: tests/html/parse_srcset.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::html::{parse_srcset, SrcSetItem}; #[test] fn three_items_with_width_descriptors_and_newlines() { let srcset = r#"https://some-site.com/width/600/https://media2.some-site.com/2021/07/some-image-073362.jpg 600w, https://some-site.com/width/960/https://media2.some-site.com/2021/07/some-image-073362.jpg 960w, https://some-site.com/width/1200/https://media2.some-site.com/2021/07/some-image-073362.jpg 1200w"#; let srcset_items: Vec = parse_srcset(srcset); assert_eq!(srcset_items.len(), 3); assert_eq!(srcset_items[0].path, "https://some-site.com/width/600/https://media2.some-site.com/2021/07/some-image-073362.jpg"); assert_eq!(srcset_items[0].descriptor, "600w"); assert_eq!(srcset_items[1].path, "https://some-site.com/width/960/https://media2.some-site.com/2021/07/some-image-073362.jpg"); assert_eq!(srcset_items[1].descriptor, "960w"); assert_eq!(srcset_items[2].path, "https://some-site.com/width/1200/https://media2.some-site.com/2021/07/some-image-073362.jpg"); assert_eq!(srcset_items[2].descriptor, "1200w"); } } ================================================ FILE: tests/html/serialize_document.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::core::MonolithOptions; use monolith::html; #[test] fn div_as_root_element() { let html = "
"; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let options = MonolithOptions::default(); assert_eq!( String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)), "
" ); } #[test] fn full_page_with_no_html_head_or_body() { let html = "Isolated document\ \ \
"; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let mut options = MonolithOptions::default(); options.isolate = true; assert_eq!( String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)), "\ \ \ Isolated document\ \ \ \ \
\ \
\ \ " ); } #[test] fn doctype_and_the_rest_no_html_head_or_body() { let html = "\ Unstyled document\ \
"; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let mut options = MonolithOptions::default(); options.no_css = true; assert_eq!( String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)), "\ \ \ \ Unstyled document\ \ \
\ " ); } #[test] fn doctype_and_the_rest_no_html_head_or_body_forbid_frames() { let html = "\ Frameless document\ \
"; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let mut options = MonolithOptions::default(); options.no_frames = true; assert_eq!( String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)), "\ \ \ \ Frameless document\ \ \
\ " ); } #[test] fn doctype_and_the_rest_all_forbidden() { let html = "\ no-frame no-css no-js no-image isolated document\ \ \
\ \ \ \
"; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let mut options = MonolithOptions::default(); options.isolate = true; options.no_css = true; options.no_fonts = true; options.no_frames = true; options.no_js = true; options.no_images = true; assert_eq!( String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)), "\ \ \ \ no-frame no-css no-js no-image isolated document\ \ \ \ \
\ \ \ \
\ \ " ); } } ================================================ FILE: tests/html/set_node_attr.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use markup5ever_rcdom::{Handle, NodeData}; use monolith::html; #[test] fn html_lang_and_body_style() { let html = ""; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let mut count = 0; fn test_walk(node: &Handle, i: &mut i8) { *i += 1; match &node.data { NodeData::Document => { // Dig deeper for child in node.children.borrow().iter() { test_walk(child, &mut *i); } } NodeData::Element { name, .. } => { let node_name = name.local.as_ref().to_string(); if node_name == "html" { assert_eq!(html::get_node_attr(node, "lang"), Some("en".to_string())); html::set_node_attr(node, "lang", Some("de".to_string())); assert_eq!(html::get_node_attr(node, "lang"), Some("de".to_string())); html::set_node_attr(node, "lang", None); assert_eq!(html::get_node_attr(node, "lang"), None); html::set_node_attr(node, "lang", Some("".to_string())); assert_eq!(html::get_node_attr(node, "lang"), Some("".to_string())); } else if node_name == "body" { assert_eq!(html::get_node_attr(node, "style"), None); html::set_node_attr(node, "style", Some("display: none;".to_string())); assert_eq!( html::get_node_attr(node, "style"), Some("display: none;".to_string()) ); } for child in node.children.borrow().iter() { test_walk(child, &mut *i); } } _ => (), }; } test_walk(&dom.document, &mut count); assert_eq!(count, 5); } #[test] fn body_background() { let html = ""; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let mut count = 0; fn test_walk(node: &Handle, i: &mut i8) { *i += 1; match &node.data { NodeData::Document => { // Dig deeper for child in node.children.borrow().iter() { test_walk(child, &mut *i); } } NodeData::Element { name, .. } => { let node_name = name.local.as_ref().to_string(); if node_name == "body" { assert_eq!( html::get_node_attr(node, "background"), Some("1".to_string()) ); html::set_node_attr(node, "background", None); assert_eq!(html::get_node_attr(node, "background"), None); } for child in node.children.borrow().iter() { test_walk(child, &mut *i); } } _ => (), }; } test_walk(&dom.document, &mut count); assert_eq!(count, 5); } } ================================================ FILE: tests/html/walk.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use html5ever::serialize::{serialize, SerializeOpts}; use markup5ever_rcdom::SerializableHandle; use url::Url; use monolith::core::MonolithOptions; use monolith::html; use monolith::session::Session; use monolith::url::EMPTY_IMAGE_DATA_URL; #[test] fn basic() { let html: &str = "

"; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let url: Url = Url::parse("http://localhost").unwrap(); let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); html::walk(&mut session, &url, &dom.document); let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(dom.document.clone()), SerializeOpts::default(), ) .unwrap(); assert_eq!( buf.iter().map(|&c| c as char).collect::(), "

" ); } #[test] fn ensure_no_recursive_iframe() { let html = "

"; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let url: Url = Url::parse("http://localhost").unwrap(); let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); html::walk(&mut session, &url, &dom.document); let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(dom.document.clone()), SerializeOpts::default(), ) .unwrap(); assert_eq!( buf.iter().map(|&c| c as char).collect::(), "

" ); } #[test] fn ensure_no_recursive_frame() { let html = ""; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let url: Url = Url::parse("http://localhost").unwrap(); let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); html::walk(&mut session, &url, &dom.document); let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(dom.document.clone()), SerializeOpts::default(), ) .unwrap(); assert_eq!( buf.iter().map(|&c| c as char).collect::(), "" ); } #[test] fn no_css() { let html = "\ \ \ \
\ "; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let url: Url = Url::parse("http://localhost").unwrap(); let mut options = MonolithOptions::default(); options.no_css = true; options.silent = true; let mut session: Session = Session::new(None, None, options); html::walk(&mut session, &url, &dom.document); let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(dom.document.clone()), SerializeOpts::default(), ) .unwrap(); assert_eq!( buf.iter().map(|&c| c as char).collect::(), "\ \ \ \ \ \ \ \
\ \ \ " ); } #[test] fn no_images() { let html = "\
"; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let url: Url = Url::parse("http://localhost").unwrap(); let mut options = MonolithOptions::default(); options.no_images = true; options.silent = true; let mut session: Session = Session::new(None, None, options); html::walk(&mut session, &url, &dom.document); let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(dom.document.clone()), SerializeOpts::default(), ) .unwrap(); assert_eq!( buf.iter().map(|&c| c as char).collect::(), format!( "\ \ \ \ \
\ \
\ \ ", empty_image = EMPTY_IMAGE_DATA_URL ) ); } #[test] fn no_body_background_images() { let html = ""; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let url: Url = Url::parse("http://localhost").unwrap(); let mut options = MonolithOptions::default(); options.no_images = true; options.silent = true; let mut session: Session = Session::new(None, None, options); html::walk(&mut session, &url, &dom.document); let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(dom.document.clone()), SerializeOpts::default(), ) .unwrap(); assert_eq!( buf.iter().map(|&c| c as char).collect::(), "" ); } #[test] fn no_frames() { let html = ""; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let url: Url = Url::parse("http://localhost").unwrap(); let mut options = MonolithOptions::default(); options.no_frames = true; options.silent = true; let mut session: Session = Session::new(None, None, options); html::walk(&mut session, &url, &dom.document); let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(dom.document.clone()), SerializeOpts::default(), ) .unwrap(); assert_eq!( buf.iter().map(|&c| c as char).collect::(), "\ \ \ \ \ \ \ \ " ); } #[test] fn no_iframes() { let html = ""; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let url: Url = Url::parse("http://localhost").unwrap(); let mut options = MonolithOptions::default(); options.no_frames = true; options.silent = true; let mut session: Session = Session::new(None, None, options); html::walk(&mut session, &url, &dom.document); let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(dom.document.clone()), SerializeOpts::default(), ) .unwrap(); assert_eq!( buf.iter().map(|&c| c as char).collect::(), "\ \ \ \ \ \ \ " ); } #[test] fn no_js() { let html = "\
\ \ \
\ "; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let url: Url = Url::parse("http://localhost").unwrap(); let mut options = MonolithOptions::default(); options.no_js = true; options.silent = true; let mut session: Session = Session::new(None, None, options); html::walk(&mut session, &url, &dom.document); let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(dom.document.clone()), SerializeOpts::default(), ) .unwrap(); assert_eq!( buf.iter().map(|&c| c as char).collect::(), "\ \ \ \
\ \ \
\ \ \ " ); } #[test] fn keeps_integrity_for_unfamiliar_links() { let html = "Has integrity\ "; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let url: Url = Url::parse("http://localhost").unwrap(); let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); html::walk(&mut session, &url, &dom.document); let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(dom.document.clone()), SerializeOpts::default(), ) .unwrap(); assert_eq!( buf.iter().map(|&c| c as char).collect::(), "\ \ \ Has integrity\ \ \ \ \ " ); } #[test] fn discards_integrity_for_known_links_nojs_nocss() { let html = "\ No integrity\ \ \ "; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let url: Url = Url::parse("http://localhost").unwrap(); let mut options = MonolithOptions::default(); options.no_css = true; options.no_js = true; options.silent = true; let mut session: Session = Session::new(None, None, options); html::walk(&mut session, &url, &dom.document); let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(dom.document.clone()), SerializeOpts::default(), ) .unwrap(); assert_eq!( buf.iter().map(|&c| c as char).collect::(), "\ \ \ No integrity\ \ \ \ \ \ " ); } #[test] fn discards_integrity_for_embedded_assets() { let html = "\ No integrity\ \ \ "; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let url: Url = Url::parse("http://localhost").unwrap(); let mut options = MonolithOptions::default(); options.no_css = true; options.no_js = true; options.silent = true; let mut session: Session = Session::new(None, None, options); html::walk(&mut session, &url, &dom.document); let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(dom.document.clone()), SerializeOpts::default(), ) .unwrap(); assert_eq!( buf.iter().map(|&c| c as char).collect::(), "\ \ \ No integrity\ \ \ \ \ \ \ " ); } #[test] fn removes_unwanted_meta_tags() { let html = "\ \ \ \ \ \ \ \ \ "; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let url: Url = Url::parse("http://localhost").unwrap(); let mut options = MonolithOptions::default(); options.no_css = true; options.no_frames = true; options.no_js = true; options.no_images = true; options.silent = true; let mut session: Session = Session::new(None, None, options); html::walk(&mut session, &url, &dom.document); let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(dom.document.clone()), SerializeOpts::default(), ) .unwrap(); assert_eq!( buf.iter().map(|&c| c as char).collect::(), "\ \ \ \ \ \ \ \ " ); } #[test] fn processes_noscript_tags() { let html = "\ \ \ \ \ "; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let url: Url = Url::parse("http://localhost").unwrap(); let mut options = MonolithOptions::default(); options.no_images = true; options.silent = true; let mut session: Session = Session::new(None, None, options); html::walk(&mut session, &url, &dom.document); let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(dom.document.clone()), SerializeOpts::default(), ) .unwrap(); assert_eq!( buf.iter().map(|&c| c as char).collect::(), format!( "\ \ \ \ \ \ \ ", EMPTY_IMAGE_DATA_URL, ) ); } #[test] fn preserves_script_type_json() { let html = ""; let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string()); let url: Url = Url::parse("http://localhost").unwrap(); let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); html::walk(&mut session, &url, &dom.document); let mut buf: Vec = Vec::new(); serialize( &mut buf, &SerializableHandle::from(dom.document.clone()), SerializeOpts::default(), ) .unwrap(); assert_eq!( buf.iter().map(|&c| c as char).collect::(), "\ \ \ \ \ \ \ " ); } } ================================================ FILE: tests/js/attr_is_event_handler.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::js; #[test] fn onblur_camelcase() { assert!(js::attr_is_event_handler("onBlur")); } #[test] fn onclick_lowercase() { assert!(js::attr_is_event_handler("onclick")); } #[test] fn onclick_camelcase() { assert!(js::attr_is_event_handler("onClick")); } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use monolith::js; #[test] fn href() { assert!(!js::attr_is_event_handler("href")); } #[test] fn empty_string() { assert!(!js::attr_is_event_handler("")); } #[test] fn class() { assert!(!js::attr_is_event_handler("class")); } } ================================================ FILE: tests/js/mod.rs ================================================ mod attr_is_event_handler; ================================================ FILE: tests/mod.rs ================================================ mod cli; mod cookies; mod core; mod css; mod html; mod js; mod session; mod url; ================================================ FILE: tests/session/mod.rs ================================================ mod retrieve_asset; ================================================ FILE: tests/session/retrieve_asset.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use reqwest::Url; use std::env; use monolith::core::MonolithOptions; use monolith::session::Session; use monolith::url; #[test] fn read_data_url() { let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); // If both source and target are data URLs, // ensure the result contains target data URL let (data, final_url, media_type, charset) = session .retrieve_asset( &Url::parse("data:text/html;base64,c291cmNl").unwrap(), &Url::parse("data:text/html;base64,dGFyZ2V0").unwrap(), ) .unwrap(); assert_eq!(&media_type, "text/html"); assert_eq!(&charset, "US-ASCII"); assert_eq!( url::create_data_url(&media_type, &charset, &data, &final_url), Url::parse("data:text/html;base64,dGFyZ2V0").unwrap(), ); assert_eq!( final_url, Url::parse("data:text/html;base64,dGFyZ2V0").unwrap(), ); } #[test] fn read_local_file_with_file_url_parent() { let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" }; // Inclusion of local assets from local sources should be allowed let cwd = env::current_dir().unwrap(); let (data, final_url, media_type, charset) = session .retrieve_asset( &Url::parse(&format!( "{file}{cwd}/tests/_data_/basic/local-file.html", file = file_url_protocol, cwd = cwd.to_str().unwrap() )) .unwrap(), &Url::parse(&format!( "{file}{cwd}/tests/_data_/basic/local-script.js", file = file_url_protocol, cwd = cwd.to_str().unwrap() )) .unwrap(), ) .unwrap(); assert_eq!(&media_type, "text/javascript"); assert_eq!(&charset, ""); let data_url = "data:text/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg=="; assert_eq!( url::create_data_url(&media_type, &charset, &data, &final_url), Url::parse(data_url).unwrap() ); assert_eq!( final_url, Url::parse(&format!( "{file}{cwd}/tests/_data_/basic/local-script.js", file = file_url_protocol, cwd = cwd.to_str().unwrap() )) .unwrap() ); } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use reqwest::Url; use monolith::core::MonolithOptions; use monolith::session::Session; #[test] fn read_local_file_with_data_url_parent() { let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); // Inclusion of local assets from data URL sources should not be allowed match session.retrieve_asset( &Url::parse("data:text/html;base64,SoUrCe").unwrap(), &Url::parse("file:///etc/passwd").unwrap(), ) { Ok((..)) => { assert!(false); } Err(_) => { assert!(true); } } } #[test] fn read_local_file_with_https_parent() { let mut options = MonolithOptions::default(); options.silent = true; let mut session: Session = Session::new(None, None, options); // Inclusion of local assets from remote sources should not be allowed match session.retrieve_asset( &Url::parse("https://kernel.org/").unwrap(), &Url::parse("file:///etc/passwd").unwrap(), ) { Ok((..)) => { assert!(false); } Err(_) => { assert!(true); } } } } ================================================ FILE: tests/url/clean_url.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use reqwest::Url; use monolith::url; #[test] fn preserve_original() { let u: Url = Url::parse("https://somewhere.com/font.eot#iefix").unwrap(); let clean_u: Url = url::clean_url(u.clone()); assert_eq!(clean_u.as_str(), "https://somewhere.com/font.eot"); assert_eq!(u.as_str(), "https://somewhere.com/font.eot#iefix"); } #[test] fn removes_fragment() { assert_eq!( url::clean_url(Url::parse("https://somewhere.com/font.eot#iefix").unwrap()).as_str(), "https://somewhere.com/font.eot" ); } #[test] fn removes_empty_fragment() { assert_eq!( url::clean_url(Url::parse("https://somewhere.com/font.eot#").unwrap()).as_str(), "https://somewhere.com/font.eot" ); } #[test] fn removes_empty_fragment_and_keeps_empty_query() { assert_eq!( url::clean_url(Url::parse("https://somewhere.com/font.eot?#").unwrap()).as_str(), "https://somewhere.com/font.eot?" ); } #[test] fn removes_empty_fragment_and_keeps_query() { assert_eq!( url::clean_url(Url::parse("https://somewhere.com/font.eot?a=b&#").unwrap()).as_str(), "https://somewhere.com/font.eot?a=b&" ); } #[test] fn keeps_credentials() { assert_eq!( url::clean_url(Url::parse("https://cookie:monster@gibson.internet/").unwrap()).as_str(), "https://cookie:monster@gibson.internet/" ); } } ================================================ FILE: tests/url/create_data_url.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use reqwest::Url; use monolith::url; #[test] fn encode_string_with_specific_media_type() { let media_type = "application/javascript"; let data = "var word = 'hello';\nalert(word);\n"; let data_url = url::create_data_url( media_type, "", data.as_bytes(), &Url::parse("data:,").unwrap(), ); assert_eq!( data_url.as_str(), "data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK" ); } #[test] fn encode_append_fragment() { let data = "\n"; let data_url = url::create_data_url( "image/svg+xml", "", data.as_bytes(), &Url::parse("data:,").unwrap(), ); assert_eq!( data_url.as_str(), "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K" ); } #[test] fn encode_string_with_specific_media_type_and_charset() { let media_type = "application/javascript"; let charset = "utf8"; let data = "var word = 'hello';\nalert(word);\n"; let data_url = url::create_data_url( media_type, charset, data.as_bytes(), &Url::parse("data:,").unwrap(), ); assert_eq!( data_url.as_str(), "data:application/javascript;charset=utf8;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK" ); } #[test] fn create_data_url_with_us_ascii_charset() { let media_type = ""; let charset = "us-ascii"; let data = ""; let data_url = url::create_data_url( media_type, charset, data.as_bytes(), &Url::parse("data:,").unwrap(), ); assert_eq!(data_url.as_str(), "data:;base64,"); } #[test] fn create_data_url_with_utf8_charset() { let media_type = ""; let charset = "utf8"; let data = ""; let data_url = url::create_data_url( media_type, charset, data.as_bytes(), &Url::parse("data:,").unwrap(), ); assert_eq!(data_url.as_str(), "data:;charset=utf8;base64,"); } #[test] fn create_data_url_with_media_type_text_plain_and_utf8_charset() { let media_type = "text/plain"; let charset = "utf8"; let data = ""; let data_url = url::create_data_url( media_type, charset, data.as_bytes(), &Url::parse("data:,").unwrap(), ); assert_eq!(data_url.as_str(), "data:text/plain;charset=utf8;base64,"); } } ================================================ FILE: tests/url/domain_is_within_domain.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::url::domain_is_within_domain; #[test] fn sub_domain_is_within_dotted_sub_domain() { assert!(domain_is_within_domain( "news.ycombinator.com", ".news.ycombinator.com" )); } #[test] fn domain_is_within_dotted_domain() { assert!(domain_is_within_domain( "ycombinator.com", ".ycombinator.com" )); } #[test] fn sub_domain_is_within_dotted_domain() { assert!(domain_is_within_domain( "news.ycombinator.com", ".ycombinator.com" )); } #[test] fn sub_domain_is_within_dotted_top_level_domain() { assert!(domain_is_within_domain("news.ycombinator.com", ".com")); } #[test] fn domain_is_within_itself() { assert!(domain_is_within_domain( "ycombinator.com", "ycombinator.com" )); } #[test] fn domain_with_trailing_dot_is_within_itself() { assert!(domain_is_within_domain( "ycombinator.com.", "ycombinator.com" )); } #[test] fn domain_with_trailing_dot_is_within_single_dot() { assert!(domain_is_within_domain("ycombinator.com.", ".")); } #[test] fn domain_matches_single_dot() { assert!(domain_is_within_domain("ycombinator.com", ".")); } #[test] fn dotted_domain_must_be_within_dotted_domain() { assert!(domain_is_within_domain( ".ycombinator.com", ".ycombinator.com" )); } #[test] fn empty_is_within_dot() { assert!(domain_is_within_domain("", ".")); } #[test] fn both_dots() { assert!(domain_is_within_domain(".", ".")); } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use monolith::url::domain_is_within_domain; #[test] fn sub_domain_must_not_be_within_domain() { assert!(!domain_is_within_domain( "news.ycombinator.com", "ycombinator.com" )); } #[test] fn domain_must_not_be_within_top_level_domain() { assert!(!domain_is_within_domain("ycombinator.com", "com")); } #[test] fn different_domains_must_not_be_within_one_another() { assert!(!domain_is_within_domain( "news.ycombinator.com", "kernel.org" )); } #[test] fn sub_domain_is_not_within_wrong_top_level_domain() { assert!(!domain_is_within_domain("news.ycombinator.com", "org")); } #[test] fn dotted_domain_is_not_within_domain() { assert!(!domain_is_within_domain( ".ycombinator.com", "ycombinator.com" )); } #[test] fn different_domain_is_not_within_dotted_domain() { assert!(!domain_is_within_domain( "www.doodleoptimize.com", ".ycombinator.com" )); } #[test] fn no_domain_can_be_within_empty_domain() { assert!(!domain_is_within_domain("ycombinator.com", "")); } #[test] fn both_can_not_be_empty() { assert!(!domain_is_within_domain("", "")); } } ================================================ FILE: tests/url/get_referer_url.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use reqwest::Url; use monolith::url; #[test] fn preserve_original() { let original_url: Url = Url::parse("https://somewhere.com/font.eot#iefix").unwrap(); let referer_url: Url = url::get_referer_url(original_url.clone()); assert_eq!(referer_url.as_str(), "https://somewhere.com/font.eot"); assert_eq!( original_url.as_str(), "https://somewhere.com/font.eot#iefix" ); } #[test] fn removes_fragment() { assert_eq!( url::get_referer_url(Url::parse("https://somewhere.com/font.eot#iefix").unwrap()) .as_str(), "https://somewhere.com/font.eot" ); } #[test] fn removes_empty_fragment() { assert_eq!( url::get_referer_url(Url::parse("https://somewhere.com/font.eot#").unwrap()).as_str(), "https://somewhere.com/font.eot" ); } #[test] fn removes_empty_fragment_and_keeps_empty_query() { assert_eq!( url::get_referer_url(Url::parse("https://somewhere.com/font.eot?#").unwrap()).as_str(), "https://somewhere.com/font.eot?" ); } #[test] fn removes_empty_fragment_and_keeps_query() { assert_eq!( url::get_referer_url(Url::parse("https://somewhere.com/font.eot?a=b&#").unwrap()) .as_str(), "https://somewhere.com/font.eot?a=b&" ); } #[test] fn removes_credentials() { assert_eq!( url::get_referer_url(Url::parse("https://cookie:monster@gibson.lan/path").unwrap()) .as_str(), "https://gibson.lan/path" ); } #[test] fn removes_empty_credentials() { assert_eq!( url::get_referer_url(Url::parse("https://@gibson.lan/path").unwrap()).as_str(), "https://gibson.lan/path" ); } #[test] fn removes_empty_username_credentials() { assert_eq!( url::get_referer_url(Url::parse("https://:monster@gibson.lan/path").unwrap()).as_str(), "https://gibson.lan/path" ); } #[test] fn removes_empty_password_credentials() { assert_eq!( url::get_referer_url(Url::parse("https://cookie@gibson.lan/path").unwrap()).as_str(), "https://gibson.lan/path" ); } } ================================================ FILE: tests/url/is_url_and_has_protocol.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use monolith::url; #[test] fn mailto() { assert!(url::is_url_and_has_protocol( "mailto:somebody@somewhere.com?subject=hello" )); } #[test] fn tel() { assert!(url::is_url_and_has_protocol("tel:5551234567")); } #[test] fn ftp_no_slashes() { assert!(url::is_url_and_has_protocol("ftp:some-ftp-server.com")); } #[test] fn ftp_with_credentials() { assert!(url::is_url_and_has_protocol( "ftp://user:password@some-ftp-server.com" )); } #[test] fn javascript() { assert!(url::is_url_and_has_protocol("javascript:void(0)")); } #[test] fn http() { assert!(url::is_url_and_has_protocol("http://news.ycombinator.com")); } #[test] fn https() { assert!(url::is_url_and_has_protocol("https://github.com")); } #[test] fn file() { assert!(url::is_url_and_has_protocol("file:///tmp/image.png")); } #[test] fn mailto_uppercase() { assert!(url::is_url_and_has_protocol( "MAILTO:somebody@somewhere.com?subject=hello" )); } #[test] fn empty_data_url() { assert!(url::is_url_and_has_protocol("data:text/html,")); } #[test] fn empty_data_url_surrounded_by_spaces() { assert!(url::is_url_and_has_protocol(" data:text/html, ")); } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use monolith::url; #[test] fn url_with_no_protocol() { assert!(!url::is_url_and_has_protocol( "//some-hostname.com/some-file.html" )); } #[test] fn relative_path() { assert!(!url::is_url_and_has_protocol( "some-hostname.com/some-file.html" )); } #[test] fn relative_to_root_path() { assert!(!url::is_url_and_has_protocol("/some-file.html")); } #[test] fn empty_string() { assert!(!url::is_url_and_has_protocol("")); } } ================================================ FILE: tests/url/mod.rs ================================================ mod clean_url; mod create_data_url; mod domain_is_within_domain; mod get_referer_url; mod is_url_and_has_protocol; mod parse_data_url; mod resolve_url; ================================================ FILE: tests/url/parse_data_url.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use reqwest::Url; use monolith::url; #[test] fn parse_text_html_base64() { let (media_type, charset, data) = url::parse_data_url(&Url::parse("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==").unwrap()); assert_eq!(media_type, "text/html"); assert_eq!(charset, "US-ASCII"); assert_eq!( String::from_utf8_lossy(&data), "Work expands so as to fill the time available for its completion" ); } #[test] fn parse_text_html_utf8() { let (media_type, charset, data) = url::parse_data_url( &Url::parse("data:text/html;charset=utf8,Work expands so as to fill the time available for its completion").unwrap(), ); assert_eq!(media_type, "text/html"); assert_eq!(charset, "utf8"); assert_eq!( String::from_utf8_lossy(&data), "Work expands so as to fill the time available for its completion" ); } #[test] fn parse_text_html_plaintext() { let (media_type, charset, data) = url::parse_data_url( &Url::parse( "data:text/html,Work expands so as to fill the time available for its completion", ) .unwrap(), ); assert_eq!(media_type, "text/html"); assert_eq!(charset, "US-ASCII"); assert_eq!( String::from_utf8_lossy(&data), "Work expands so as to fill the time available for its completion" ); } #[test] fn parse_text_css_url_encoded() { let (media_type, charset, data) = url::parse_data_url(&Url::parse("data:text/css,div{background-color:%23000}").unwrap()); assert_eq!(media_type, "text/css"); assert_eq!(charset, "US-ASCII"); assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}"); } #[test] fn parse_no_media_type_base64() { let (media_type, charset, data) = url::parse_data_url(&Url::parse("data:;base64,dGVzdA==").unwrap()); assert_eq!(media_type, "text/plain"); assert_eq!(charset, "US-ASCII"); assert_eq!(String::from_utf8_lossy(&data), "test"); } #[test] fn parse_no_media_type_no_encoding() { let (media_type, charset, data) = url::parse_data_url(&Url::parse("data:;,test%20test").unwrap()); assert_eq!(media_type, "text/plain"); assert_eq!(charset, "US-ASCII"); assert_eq!(String::from_utf8_lossy(&data), "test test"); } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use reqwest::Url; use monolith::url; #[test] fn empty_data_url() { let (media_type, charset, data) = url::parse_data_url(&Url::parse("data:,").unwrap()); assert_eq!(media_type, "text/plain"); assert_eq!(charset, "US-ASCII"); assert_eq!(String::from_utf8_lossy(&data), ""); } } ================================================ FILE: tests/url/resolve_url.rs ================================================ // ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ // ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ // ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ // ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod passing { use reqwest::Url; use monolith::url; #[test] fn basic_httsp_relative() { assert_eq!( url::resolve_url( &Url::parse("https://www.kernel.org").unwrap(), "category/signatures.html" ) .as_str(), Url::parse("https://www.kernel.org/category/signatures.html") .unwrap() .as_str() ); } #[test] fn basic_httsp_absolute() { assert_eq!( url::resolve_url( &Url::parse("https://www.kernel.org").unwrap(), "/category/signatures.html" ) .as_str(), Url::parse("https://www.kernel.org/category/signatures.html") .unwrap() .as_str() ); } #[test] fn from_https_to_level_up_relative() { assert_eq!( url::resolve_url( &Url::parse("https://www.kernel.org").unwrap(), "../category/signatures.html" ) .as_str(), Url::parse("https://www.kernel.org/category/signatures.html") .unwrap() .as_str() ); } #[test] fn from_https_url_to_url_with_no_protocol() { assert_eq!( url::resolve_url( &Url::parse("https://www.kernel.org").unwrap(), "//www.kernel.org/theme/images/logos/tux.png", ) .as_str(), "https://www.kernel.org/theme/images/logos/tux.png" ); } #[test] fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() { assert_eq!( url::resolve_url( &Url::parse("https://www.kernel.org").unwrap(), "//another-host.org/theme/images/logos/tux.png", ) .as_str(), "https://another-host.org/theme/images/logos/tux.png" ); } #[test] fn from_https_url_to_absolute_path() { assert_eq!( url::resolve_url( &Url::parse("https://www.kernel.org/category/signatures.html").unwrap(), "/theme/images/logos/tux.png", ) .as_str(), "https://www.kernel.org/theme/images/logos/tux.png" ); } #[test] fn from_https_to_just_filename() { assert_eq!( url::resolve_url( &Url::parse("https://www.w3schools.com/html/html_iframe.asp").unwrap(), "default.asp", ) .as_str(), "https://www.w3schools.com/html/default.asp" ); } #[test] fn from_data_url_to_https() { assert_eq!( url::resolve_url( &Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h") .unwrap(), "https://www.kernel.org/category/signatures.html", ) .as_str(), "https://www.kernel.org/category/signatures.html" ); } #[test] fn from_data_url_to_data_url() { assert_eq!( url::resolve_url( &Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h") .unwrap(), "data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K", ) .as_str(), "data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K" ); } #[test] fn from_file_url_to_relative_path() { assert_eq!( url::resolve_url( &Url::parse("file:///home/user/Websites/my-website/index.html").unwrap(), "assets/images/logo.png", ) .as_str(), "file:///home/user/Websites/my-website/assets/images/logo.png" ); } #[test] fn from_file_url_to_relative_path_with_backslashes() { assert_eq!( url::resolve_url( &Url::parse("file:\\\\\\home\\user\\Websites\\my-website\\index.html").unwrap(), "assets\\images\\logo.png", ) .as_str(), "file:///home/user/Websites/my-website/assets/images/logo.png" ); } #[test] fn from_data_url_to_file_url() { assert_eq!( url::resolve_url( &Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h") .unwrap(), "file:///etc/passwd", ) .as_str(), "file:///etc/passwd" ); } #[test] fn preserve_fragment() { assert_eq!( url::resolve_url( &Url::parse("http://doesnt-matter.local/").unwrap(), "css/fonts/fontmarvelous.svg#fontmarvelous", ) .as_str(), "http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous" ); } #[test] fn resolve_from_file_url_to_file_url() { if cfg!(windows) { assert_eq!( url::resolve_url( &Url::parse("file:///c:/index.html").unwrap(), "file:///c:/image.png" ) .as_str(), "file:///c:/image.png" ); } else { assert_eq!( url::resolve_url( &Url::parse("file:///tmp/index.html").unwrap(), "file:///tmp/image.png" ) .as_str(), "file:///tmp/image.png" ); } } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ // ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ // ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ // ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ #[cfg(test)] mod failing { use reqwest::Url; use monolith::url; #[test] fn from_data_url_to_url_with_no_protocol() { assert_eq!( url::resolve_url( &Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h") .unwrap(), "//www.w3schools.com/html/html_iframe.asp", ) .as_str(), "data:," ); } }