Repository: pemistahl/grex Branch: main Commit: 99cc34770737 Files: 44 Total size: 513.5 KB Directory structure: gitextract_b41iglrb/ ├── .editorconfig ├── .github/ │ ├── dependabot.yml │ └── workflows/ │ ├── python-build.yml │ ├── release.yml │ └── rust-build.yml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── README_PYPI.md ├── RELEASE_NOTES.md ├── benches/ │ ├── benchmark.rs │ └── testcases.txt ├── demo.tape ├── grex.pyi ├── pyproject.toml ├── requirements.txt ├── src/ │ ├── builder.rs │ ├── char_range.rs │ ├── cluster.rs │ ├── component.rs │ ├── config.rs │ ├── dfa.rs │ ├── expression.rs │ ├── format.rs │ ├── grapheme.rs │ ├── lib.rs │ ├── macros.rs │ ├── main.rs │ ├── python.rs │ ├── quantifier.rs │ ├── regexp.rs │ ├── substring.rs │ ├── unicode_tables/ │ │ ├── decimal.rs │ │ ├── mod.rs │ │ ├── space.rs │ │ └── word.rs │ └── wasm.rs └── tests/ ├── cli_integration_tests.rs ├── lib_integration_tests.rs ├── property_tests.rs ├── python/ │ └── test_grex.py ├── wasm_browser_tests.rs └── wasm_node_tests.rs ================================================ FILE CONTENTS ================================================ ================================================ FILE: .editorconfig ================================================ # Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. # See the License for the specific language governing permissions and # limitations under the License. # Editor configuration, see http://editorconfig.org root = true [*.rs] charset = utf-8 indent_style = space indent_size = 4 insert_final_newline = true trim_trailing_whitespace = false max_line_length = 100 [*.md] max_line_length = off trim_trailing_whitespace = false ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: "cargo" directory: "/" schedule: interval: "daily" - package-ecosystem: "github-actions" directory: "/" schedule: interval: "daily" ================================================ FILE: .github/workflows/python-build.yml ================================================ # # Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. # See the License for the specific language governing permissions and # limitations under the License. name: Python Build on: push: branches: - main paths: - 'Cargo.lock' - 'Cargo.toml' - 'pyproject.toml' - 'requirements.txt' - 'src/**' - 'tests/**' - '**.yml' pull_request: branches: - main paths: - 'Cargo.lock' - 'Cargo.toml' - 'pyproject.toml' - 'requirements.txt' - 'src/**' - 'tests/**' - '**.yml' jobs: python-build: name: Python ${{ matrix.python-version }} on ${{ matrix.name }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ ubuntu-latest, macos-latest, windows-latest ] python-version: [ '3.12', '3.13', '3.14' ] include: - os: ubuntu-latest name: Linux 64-Bit - os: macos-latest name: MacOS 64-Bit - os: windows-latest name: Windows 64-Bit steps: - name: Check out repository uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} cache: 'pip' - name: Install maturin and pytest run: pip install -r requirements.txt - name: Build Python extension run: maturin build - name: Install Python extension run: pip install --find-links=target/wheels grex - name: Run Python unit tests run: pytest tests/python/test_grex.py ================================================ FILE: .github/workflows/release.yml ================================================ # # Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. # See the License for the specific language governing permissions and # limitations under the License. name: Release on: push: tags: - v1.* jobs: rust-release-build: name: ${{ matrix.name }} runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] include: - os: ubuntu-latest name: Rust Release Build on Linux x86_64-target: x86_64-unknown-linux-musl aarch64-target: aarch64-unknown-linux-musl - os: macos-latest name: Rust Release Build on MacOS x86_64-target: x86_64-apple-darwin aarch64-target: aarch64-apple-darwin - os: windows-latest name: Rust Release Build on Windows x86_64-target: x86_64-pc-windows-msvc aarch64-target: aarch64-pc-windows-msvc steps: - name: Check out repository uses: actions/checkout@v6 - name: Build x86_64 target in release mode uses: houseabsolute/actions-rust-cross@v1 with: target: ${{ matrix.x86_64-target }} args: '--release --locked' - name: Build aarch64 target in release mode uses: houseabsolute/actions-rust-cross@v1 with: target: ${{ matrix.aarch64-target }} args: '--release --locked' - name: Get latest release version number id: get_version uses: battila7/get-version-action@v2 - name: Create x86_64 zip file on Windows if: ${{ matrix.os == 'windows-latest' }} run: | choco install zip cd target/${{ matrix.x86_64-target }}/release zip grex-${{ steps.get_version.outputs.version }}-${{ matrix.x86_64-target }}.zip grex.exe cd ../../.. - name: Create aarch64 zip file on Windows if: ${{ matrix.os == 'windows-latest' }} run: | cd target/${{ matrix.aarch64-target }}/release zip grex-${{ steps.get_version.outputs.version }}-${{ matrix.aarch64-target }}.zip grex.exe cd ../../.. - name: Create x86_64 tar.gz file on Linux and macOS if: ${{ matrix.os != 'windows-latest' }} run: | chmod +x target/${{ matrix.x86_64-target }}/release/grex tar -zcf target/${{ matrix.x86_64-target }}/release/grex-${{ steps.get_version.outputs.version }}-${{ matrix.x86_64-target }}.tar.gz -C target/${{ matrix.x86_64-target }}/release grex - name: Create aarch64 tar.gz file on Linux and macOS if: ${{ matrix.os != 'windows-latest' }} run: | chmod +x target/${{ matrix.aarch64-target }}/release/grex tar -zcf target/${{ matrix.aarch64-target }}/release/grex-${{ steps.get_version.outputs.version }}-${{ matrix.aarch64-target }}.tar.gz -C target/${{ matrix.aarch64-target }}/release grex - name: Upload release and assets to GitHub uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} tag: ${{ github.ref }} release_name: grex ${{ steps.get_version.outputs.version-without-v }} file_glob: true file: target/*/release/grex-${{ steps.get_version.outputs.version }}-*.{zip,tar.gz} python-linux-release-build: name: Python Release Build on Linux and target ${{ matrix.target }} needs: rust-release-build runs-on: ubuntu-latest strategy: matrix: target: [ x86_64, aarch64 ] linux: [ auto, musllinux_1_2 ] steps: - name: Check out repository uses: actions/checkout@v6 - name: Build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.target }} args: --release --out dist -i 3.12 3.13 3.14 pypy3.11 sccache: 'true' manylinux: ${{ matrix.linux }} - name: Upload wheels uses: actions/upload-artifact@v5 with: name: linux-${{ matrix.linux }}-${{ matrix.target }}-wheels path: dist python-windows-release-build: name: Python Release Build on Windows and target ${{ matrix.target }} needs: rust-release-build runs-on: windows-latest strategy: matrix: target: [ x86_64, aarch64 ] steps: - name: Check out repository uses: actions/checkout@v6 - name: Build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.target }} args: --release --out dist -i 3.12 3.13 3.14 sccache: 'true' - name: Upload wheels uses: actions/upload-artifact@v5 with: name: windows-${{ matrix.target }}-wheels path: dist python-macos-release-build: name: Python Release Build on MacOS and target ${{ matrix.target }} needs: rust-release-build runs-on: macos-latest strategy: matrix: target: [ x86_64, aarch64 ] steps: - name: Check out repository uses: actions/checkout@v6 - name: Build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.target }} args: --release --out dist -i 3.12 3.13 3.14 pypy3.11 sccache: 'true' - name: Upload wheels uses: actions/upload-artifact@v5 with: name: macos-${{ matrix.target }}-wheels path: dist python-release-upload: name: Publish wheels to PyPI needs: [ python-linux-release-build, python-windows-release-build, python-macos-release-build ] runs-on: ubuntu-latest steps: - name: Download wheels from previous jobs uses: actions/download-artifact@v6 with: path: wheels merge-multiple: true - name: Upload to PyPI uses: PyO3/maturin-action@v1 env: MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} with: command: upload args: --skip-existing wheels/*.whl rust-release-upload: name: Upload to crates.io needs: [ python-linux-release-build, python-windows-release-build, python-macos-release-build ] runs-on: ubuntu-latest steps: - name: Check out repository uses: actions/checkout@v6 - name: Upload release to crates.io uses: katyo/publish-crates@v2 with: registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }} ================================================ FILE: .github/workflows/rust-build.yml ================================================ # # Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. # See the License for the specific language governing permissions and # limitations under the License. name: Rust Build on: push: branches: - main paths: - 'Cargo.lock' - 'Cargo.toml' - 'src/**' - 'tests/**' - '**.yml' pull_request: branches: - main paths: - 'Cargo.lock' - 'Cargo.toml' - 'src/**' - 'tests/**' - '**.yml' jobs: rust-build: name: Rust on ${{ matrix.name }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] include: - os: ubuntu-latest name: Linux 64-Bit target: x86_64-unknown-linux-musl - os: macos-latest name: MacOS 64-Bit target: x86_64-apple-darwin env: MACOSX_DEPLOYMENT_TARGET: 10.7 - os: windows-latest name: Windows 64-Bit target: x86_64-pc-windows-msvc steps: - name: Check out repository uses: actions/checkout@v6 - name: Add rustup target run: rustup target add ${{ matrix.target }} - name: Store or retrieve cargo caches uses: actions/cache@v4 with: path: | ~/.cargo/bin/ ~/.cargo/registry/index/ ~/.cargo/registry/cache/ ~/.cargo/git/db/ target/ key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - name: Build target in debug mode run: cargo build --target ${{ matrix.target }} --locked - name: Test target in debug mode run: cargo test --target ${{ matrix.target }} - name: Check Clippy lints run: cargo clippy --target ${{ matrix.target }} -- -Dwarnings wasm-build: name: WASM Build needs: rust-build runs-on: macos-latest steps: - name: Check out repository uses: actions/checkout@v6 - name: Install wasm-pack run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh - name: Install Firefox and Geckodriver # not available anymore in macos-latest run: | brew install --cask firefox brew install geckodriver #- name: Enable Safari web driver # run: sudo safaridriver --enable - name: Run WASM integration tests on NodeJS run: wasm-pack test --node -- --no-default-features - name: Run WASM integration tests in Chrome run: wasm-pack test --headless --chrome -- --no-default-features - name: Run WASM integration tests in Firefox run: wasm-pack test --headless --firefox -- --no-default-features # Safari WASM tests not working, reason unclear # Increasing driver timeout does not seem to work # https://github.com/wasm-bindgen/wasm-bindgen/pull/4320 #- name: Run WASM integration tests in Safari # env: # WASM_BINDGEN_TEST_DRIVER_TIMEOUT: 10 # run: wasm-pack test --headless --safari -- --no-default-features coverage-report: name: Coverage Report needs: rust-build if: ${{ github.event_name == 'push' }} runs-on: ubuntu-latest container: image: xd009642/tarpaulin:develop-nightly options: --security-opt seccomp=unconfined steps: - name: Check out repository uses: actions/checkout@v6 - name: Generate coverage report run: cargo +nightly tarpaulin --ignore-config --ignore-panics --ignore-tests --exclude-files src/python.rs src/main.rs src/wasm.rs --verbose --timeout 900 --out xml - name: Workaround for codecov/feedback#263 run: git config --global --add safe.directory "$GITHUB_WORKSPACE" - name: Upload coverage report uses: codecov/codecov-action@v4 with: token: ${{ secrets.CODECOV_TOKEN }} fail_ci_if_error: true ================================================ FILE: .gitignore ================================================ # Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. # See the License for the specific language governing permissions and # limitations under the License. /pkg/ /target/ **/*.rs.bk .idea .project .c9/ *.launch .settings/ .metadata/ .venv *.sublime-workspace bin/ tmp/ out/ *.iml *.ipr *.iws *.bak *.tmp *.class *.html .buildpath .classpath .vscode/* !.vscode/settings.json !.vscode/tasks.json !.vscode/launch.json !.vscode/extensions.json .DS_Store Thumbs.db $RECYCLE.BIN/ ._* .AppleDouble .LSOverride *.lnk Desktop.ini ehthumbs.db *.proptest-regressions ================================================ FILE: Cargo.toml ================================================ # # Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. # See the License for the specific language governing permissions and # limitations under the License. [package] name = "grex" version = "1.4.6" authors = ["Peter M. Stahl "] description = """ grex generates regular expressions from user-provided test cases. """ homepage = "https://github.com/pemistahl/grex" repository = "https://github.com/pemistahl/grex" documentation = "https://docs.rs/grex" license = "Apache-2.0" readme = "README.md" edition = "2021" categories = ["command-line-utilities", "parsing"] keywords = ["pattern", "regex", "regexp"] [lib] crate-type = ["cdylib", "rlib"] [dependencies] itertools = "0.14.0" ndarray = "0.17.1" petgraph = {version = "0.8.3", default-features = false, features = ["stable_graph"]} regex = "1.12.2" unicode-general-category = "1.1.0" unicode-segmentation = "1.12.0" [target.'cfg(not(target_family = "wasm"))'.dependencies] clap = {version = "4.5.53", features = ["derive", "wrap_help"], optional = true} pyo3 = {version = "0.27.1", optional = true} [target.'cfg(target_family = "wasm")'.dependencies] wasm-bindgen = "0.2.105" [dev-dependencies] indoc = "2.0.7" rstest = "0.26.1" [target.'cfg(not(target_family = "wasm"))'.dev-dependencies] assert_cmd = "2.1.1" criterion = "0.7.0" predicates = "3.1.3" proptest = "1.9.0" tempfile = "3.23.0" [target.'cfg(target_family = "wasm")'.dev-dependencies] wasm-bindgen-test = "0.3.55" [features] default = ["cli"] cli = ["clap"] python = ["pyo3"] [[bench]] name = "benchmark" harness = false [profile.bench] debug = true ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================
![grex](https://raw.githubusercontent.com/pemistahl/grex/main/logo.png)
[![rust build status](https://github.com/pemistahl/grex/actions/workflows/rust-build.yml/badge.svg)](https://github.com/pemistahl/grex/actions/workflows/rust-build.yml) [![python build status](https://github.com/pemistahl/grex/actions/workflows/python-build.yml/badge.svg)](https://github.com/pemistahl/grex/actions/workflows/python-build.yml) [![docs.rs](https://docs.rs/grex/badge.svg)](https://docs.rs/grex) [![codecov](https://codecov.io/gh/pemistahl/grex/branch/main/graph/badge.svg)](https://codecov.io/gh/pemistahl/grex) [![dependency status](https://deps.rs/crate/grex/1.4.6/status.svg)](https://deps.rs/crate/grex/1.4.6) [![demo](https://img.shields.io/badge/-Demo%20Website-orange?logo=HTML5&labelColor=white)](https://pemistahl.github.io/grex-js/) [![downloads](https://img.shields.io/crates/d/grex.svg)](https://crates.io/crates/grex) [![crates.io](https://img.shields.io/crates/v/grex.svg)](https://crates.io/crates/grex) [![lib.rs](https://img.shields.io/badge/lib.rs-v1.4.6-blue)](https://lib.rs/crates/grex) ![supported Python versions](https://img.shields.io/badge/Python-%3E%3D%203.12-blue?logo=Python&logoColor=yellow) [![pypi](https://img.shields.io/badge/PYPI-v1.0.2-blue?logo=PyPI&logoColor=yellow)](https://pypi.org/project/grex) [![license](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://www.apache.org/licenses/LICENSE-2.0) [![Linux 64-bit Download](https://img.shields.io/badge/Linux%2064bit%20Download-v1.4.6-blue?logo=Linux)](https://github.com/pemistahl/grex/releases/download/v1.4.6/grex-v1.4.6-x86_64-unknown-linux-musl.tar.gz) [![Linux ARM64 Download](https://img.shields.io/badge/Linux%20ARM64%20Download-v1.4.6-blue?logo=Linux)](https://github.com/pemistahl/grex/releases/download/v1.4.6/grex-v1.4.6-aarch64-unknown-linux-musl.tar.gz) [![MacOS 64-bit Download](https://img.shields.io/badge/macOS%2064bit%20Download-v1.4.6-blue?logo=Apple)](https://github.com/pemistahl/grex/releases/download/v1.4.6/grex-v1.4.6-x86_64-apple-darwin.tar.gz) [![MacOS ARM64 Download](https://img.shields.io/badge/macOS%20ARM64%20Download-v1.4.6-blue?logo=Apple)](https://github.com/pemistahl/grex/releases/download/v1.4.6/grex-v1.4.6-aarch64-apple-darwin.tar.gz) [![Windows 64-bit Download](https://img.shields.io/badge/Windows%2064bit%20Download-v1.4.6-blue?logo=Windows)](https://github.com/pemistahl/grex/releases/download/v1.4.6/grex-v1.4.6-x86_64-pc-windows-msvc.zip) [![Windows ARM64 Download](https://img.shields.io/badge/Windows%20ARM64%20Download-v1.4.6-blue?logo=Windows)](https://github.com/pemistahl/grex/releases/download/v1.4.6/grex-v1.4.6-aarch64-pc-windows-msvc.zip)

![grex demo](https://raw.githubusercontent.com/pemistahl/grex/main/demo.gif)
## 1. What does this tool do? *grex* is a library as well as a command-line utility that is meant to simplify the often complicated and tedious task of creating regular expressions. It does so by automatically generating a single regular expression from user-provided test cases. The resulting expression is guaranteed to match the test cases which it was generated from. This project has started as a Rust port of the JavaScript tool [*regexgen*](https://github.com/devongovett/regexgen) written by [Devon Govett](https://github.com/devongovett). Although a lot of further useful features could be added to it, its development was apparently ceased several years ago. The plan is now to add these new features to *grex* as Rust really shines when it comes to command-line tools. *grex* offers all features that *regexgen* provides, and more. The philosophy of this project is to generate the most specific regular expression possible by default which exactly matches the given input only and nothing else. With the use of command-line flags (in the CLI tool) or preprocessing methods (in the library), more generalized expressions can be created. The produced expressions are [Perl-compatible regular expressions](https://www.pcre.org) which are also compatible with the regular expression parser in Rust's [*regex* crate](https://crates.io/crates/regex). Other regular expression parsers or respective libraries from other programming languages have not been tested so far, but they ought to be mostly compatible as well. ## 2. Do I still need to learn to write regexes then? **Definitely, yes!** Using the standard settings, *grex* produces a regular expression that is guaranteed to match only the test cases given as input and nothing else. This has been verified by [property tests](https://github.com/pemistahl/grex/blob/main/tests/property_tests.rs). However, if the conversion to shorthand character classes such as `\w` is enabled, the resulting regex matches a much wider scope of test cases. Knowledge about the consequences of this conversion is essential for finding a correct regular expression for your business domain. *grex* uses an algorithm that tries to find the shortest possible regex for the given test cases. Very often though, the resulting expression is still longer or more complex than it needs to be. In such cases, a more compact or elegant regex can be created only by hand. Also, every regular expression engine has different built-in optimizations. *grex* does not know anything about those and therefore cannot optimize its regexes for a specific engine. **So, please learn how to write regular expressions!** The currently best use case for *grex* is to find an initial correct regex which should be inspected by hand if further optimizations are possible. ## 3. Current Features - literals - character classes - detection of common prefixes and suffixes - detection of repeated substrings and conversion to `{min,max}` quantifier notation - alternation using `|` operator - optionality using `?` quantifier - escaping of non-ascii characters, with optional conversion of astral code points to surrogate pairs - case-sensitive or case-insensitive matching - capturing or non-capturing groups - optional anchors `^` and `$` - fully compliant to [Unicode Standard 16.0](https://unicode.org/versions/Unicode15.0.0) - fully compatible with [*regex* crate 1.11.0+](https://crates.io/crates/regex) - correctly handles graphemes consisting of multiple Unicode symbols - reads input strings from the command-line or from a file - produces more readable expressions indented on multiple using optional verbose mode - optional syntax highlighting for nicer output in supported terminals ## 4. How to install? ### 4.1 The command-line tool You can download the self-contained executable for your platform above and put it in a place of your choice. Alternatively, pre-compiled 64-Bit binaries are available within the package managers [Scoop](https://scoop.sh) (for Windows), [Homebrew](https://brew.sh) (for macOS and Linux), [MacPorts](https://www.macports.org) (for macOS), and [Huber](https://github.com/innobead/huber) (for macOS, Linux and Windows). [Raúl Piracés](https://github.com/piraces) has contributed a [Chocolatey Windows package](https://community.chocolatey.org/packages/grex). *grex* is also hosted on [crates.io](https://crates.io/crates/grex), the official Rust package registry. If you are a Rust developer and already have the Rust toolchain installed, you can install by compiling from source using [*cargo*](https://doc.rust-lang.org/cargo/), the Rust package manager. So the summary of your installation options is: ``` ( brew | cargo | choco | huber | port | scoop ) install grex ``` ### 4.2 The library In order to use *grex* as a library, simply add it as a dependency to your `Cargo.toml` file: ```toml [dependencies] grex = { version = "1.4.6", default-features = false } ``` The dependency *clap* is only needed for the command-line tool. By disabling the default features, the download and compilation of clap is prevented for the library. ## 5. How to use? Detailed explanations of the available settings are provided in the [library section](#52-the-library). All settings can be freely combined with each other. ### 5.1 The command-line tool Test cases are passed either directly (`grex a b c`) or from a file (`grex -f test_cases.txt`). *grex* is able to receive its input from Unix pipelines as well, e.g. `cat test_cases.txt | grex -`. The following table shows all available flags and options: ``` $ grex -h grex 1.4.6 © 2019-today Peter M. Stahl Licensed under the Apache License, Version 2.0 Downloadable from https://crates.io/crates/grex Source code at https://github.com/pemistahl/grex grex generates regular expressions from user-provided test cases. Usage: grex [OPTIONS] {INPUT...|--file } Input: [INPUT]... One or more test cases separated by blank space -f, --file Reads test cases on separate lines from a file Digit Options: -d, --digits Converts any Unicode decimal digit to \d -D, --non-digits Converts any character which is not a Unicode decimal digit to \D Whitespace Options: -s, --spaces Converts any Unicode whitespace character to \s -S, --non-spaces Converts any character which is not a Unicode whitespace character to \S Word Options: -w, --words Converts any Unicode word character to \w -W, --non-words Converts any character which is not a Unicode word character to \W Escaping Options: -e, --escape Replaces all non-ASCII characters with unicode escape sequences --with-surrogates Converts astral code points to surrogate pairs if --escape is set Repetition Options: -r, --repetitions Detects repeated non-overlapping substrings and converts them to {min,max} quantifier notation --min-repetitions Specifies the minimum quantity of substring repetitions to be converted if --repetitions is set [default: 1] --min-substring-length Specifies the minimum length a repeated substring must have in order to be converted if --repetitions is set [default: 1] Anchor Options: --no-start-anchor Removes the caret anchor `^` from the resulting regular expression --no-end-anchor Removes the dollar sign anchor `$` from the resulting regular expression --no-anchors Removes the caret and dollar sign anchors from the resulting regular expression Display Options: -x, --verbose Produces a nicer-looking regular expression in verbose mode -c, --colorize Provides syntax highlighting for the resulting regular expression Miscellaneous Options: -i, --ignore-case Performs case-insensitive matching, letters match both upper and lower case -g, --capture-groups Replaces non-capturing groups with capturing ones -h, --help Prints help information -v, --version Prints version information ``` ### 5.2 The library #### 5.2.1 Default settings Test cases are passed either from a collection via [`RegExpBuilder::from()`](https://docs.rs/grex/1.4.6/grex/struct.RegExpBuilder.html#method.from) or from a file via [`RegExpBuilder::from_file()`](https://docs.rs/grex/1.4.6/grex/struct.RegExpBuilder.html#method.from_file). If read from a file, each test case must be on a separate line. Lines may be ended with either a newline `\n` or a carriage return with a line feed `\r\n`. ```rust use grex::RegExpBuilder; let regexp = RegExpBuilder::from(&["a", "aa", "aaa"]).build(); assert_eq!(regexp, "^a(?:aa?)?$"); ``` #### 5.2.2 Convert to character classes ```rust use grex::RegExpBuilder; let regexp = RegExpBuilder::from(&["a", "aa", "123"]) .with_conversion_of_digits() .with_conversion_of_words() .build(); assert_eq!(regexp, "^(\\d\\d\\d|\\w(?:\\w)?)$"); ``` #### 5.2.3 Convert repeated substrings ```rust use grex::RegExpBuilder; let regexp = RegExpBuilder::from(&["aa", "bcbc", "defdefdef"]) .with_conversion_of_repetitions() .build(); assert_eq!(regexp, "^(?:a{2}|(?:bc){2}|(?:def){3})$"); ``` By default, *grex* converts each substring this way which is at least a single character long and which is subsequently repeated at least once. You can customize these two parameters if you like. In the following example, the test case `aa` is not converted to `a{2}` because the repeated substring `a` has a length of 1, but the minimum substring length has been set to 2. ```rust use grex::RegExpBuilder; let regexp = RegExpBuilder::from(&["aa", "bcbc", "defdefdef"]) .with_conversion_of_repetitions() .with_minimum_substring_length(2) .build(); assert_eq!(regexp, "^(?:aa|(?:bc){2}|(?:def){3})$"); ``` Setting a minimum number of 2 repetitions in the next example, only the test case `defdefdef` will be converted because it is the only one that is repeated twice. ```rust use grex::RegExpBuilder; let regexp = RegExpBuilder::from(&["aa", "bcbc", "defdefdef"]) .with_conversion_of_repetitions() .with_minimum_repetitions(2) .build(); assert_eq!(regexp, "^(?:bcbc|aa|(?:def){3})$"); ``` #### 5.2.4 Escape non-ascii characters ```rust use grex::RegExpBuilder; let regexp = RegExpBuilder::from(&["You smell like 💩."]) .with_escaping_of_non_ascii_chars(false) .build(); assert_eq!(regexp, "^You smell like \\u{1f4a9}\\.$"); ``` Old versions of JavaScript do not support unicode escape sequences for the astral code planes (range `U+010000` to `U+10FFFF`). In order to support these symbols in JavaScript regular expressions, the conversion to surrogate pairs is necessary. More information on that matter can be found [here](https://mathiasbynens.be/notes/javascript-unicode). ```rust use grex::RegExpBuilder; let regexp = RegExpBuilder::from(&["You smell like 💩."]) .with_escaped_non_ascii_chars(true) .build(); assert_eq!(regexp, "^You smell like \\u{d83d}\\u{dca9}\\.$"); ``` #### 5.2.5 Case-insensitive matching The regular expressions that *grex* generates are case-sensitive by default. Case-insensitive matching can be enabled like so: ```rust use grex::RegExpBuilder; let regexp = RegExpBuilder::from(&["big", "BIGGER"]) .with_case_insensitive_matching() .build(); assert_eq!(regexp, "(?i)^big(?:ger)?$"); ``` #### 5.2.6 Capturing Groups Non-capturing groups are used by default. Extending the previous example, you can switch to capturing groups instead. ```rust use grex::RegExpBuilder; let regexp = RegExpBuilder::from(&["big", "BIGGER"]) .with_case_insensitive_matching() .with_capturing_groups() .build(); assert_eq!(regexp, "(?i)^big(ger)?$"); ``` #### 5.2.7 Verbose mode If you find the generated regular expression hard to read, you can enable verbose mode. The expression is then put on multiple lines and indented to make it more pleasant to the eyes. ```rust use grex::RegExpBuilder; use indoc::indoc; let regexp = RegExpBuilder::from(&["a", "b", "bcd"]) .with_verbose_mode() .build(); assert_eq!(regexp, indoc!( r#" (?x) ^ (?: b (?: cd )? | a ) $"# )); ``` #### 5.2.8 Disable anchors By default, the anchors `^` and `$` are put around every generated regular expression in order to ensure that it matches only the test cases given as input. Often enough, however, it is desired to use the generated pattern as part of a larger one. For this purpose, the anchors can be disabled, either separately or both of them. ```rust use grex::RegExpBuilder; let regexp = RegExpBuilder::from(&["a", "aa", "aaa"]) .without_anchors() .build(); assert_eq!(regexp, "a(?:aa?)?"); ``` ### 5.3 Examples The following examples show the various supported regex syntax features: ```shell $ grex a b c ^[a-c]$ $ grex a c d e f ^[ac-f]$ $ grex a b x de ^(?:de|[abx])$ $ grex abc bc ^a?bc$ $ grex a b bc ^(?:bc?|a)$ $ grex [a-z] ^\[a\-z\]$ $ grex -r b ba baa baaa ^b(?:a{1,3})?$ $ grex -r b ba baa baaaa ^b(?:a{1,2}|a{4})?$ $ grex y̆ a z ^(?:y̆|[az])$ Note: Grapheme y̆ consists of two Unicode symbols: U+0079 (Latin Small Letter Y) U+0306 (Combining Breve) $ grex "I ♥ cake" "I ♥ cookies" ^I ♥ c(?:ookies|ake)$ Note: Input containing blank space must be surrounded by quotation marks. ``` The string `"I ♥♥♥ 36 and ٣ and 💩💩."` serves as input for the following examples using the command-line notation: ```shell $ grex ^I ♥♥♥ 36 and ٣ and 💩💩\.$ $ grex -e ^I \u{2665}\u{2665}\u{2665} 36 and \u{663} and \u{1f4a9}\u{1f4a9}\.$ $ grex -e --with-surrogates ^I \u{2665}\u{2665}\u{2665} 36 and \u{663} and \u{d83d}\u{dca9}\u{d83d}\u{dca9}\.$ $ grex -d ^I ♥♥♥ \d\d and \d and 💩💩\.$ $ grex -s ^I\s♥♥♥\s36\sand\s٣\sand\s💩💩\.$ $ grex -w ^\w ♥♥♥ \w\w \w\w\w \w \w\w\w 💩💩\.$ $ grex -D ^\D\D\D\D\D\D36\D\D\D\D\D٣\D\D\D\D\D\D\D\D$ $ grex -S ^\S \S\S\S \S\S \S\S\S \S \S\S\S \S\S\S$ $ grex -dsw ^\w\s♥♥♥\s\d\d\s\w\w\w\s\d\s\w\w\w\s💩💩\.$ $ grex -dswW ^\w\s\W\W\W\s\d\d\s\w\w\w\s\d\s\w\w\w\s\W\W\W$ $ grex -r ^I ♥{3} 36 and ٣ and 💩{2}\.$ $ grex -er ^I \u{2665}{3} 36 and \u{663} and \u{1f4a9}{2}\.$ $ grex -er --with-surrogates ^I \u{2665}{3} 36 and \u{663} and (?:\u{d83d}\u{dca9}){2}\.$ $ grex -dgr ^I ♥{3} \d(\d and ){2}💩{2}\.$ $ grex -rs ^I\s♥{3}\s36\sand\s٣\sand\s💩{2}\.$ $ grex -rw ^\w ♥{3} \w(?:\w \w{3} ){2}💩{2}\.$ $ grex -Dr ^\D{6}36\D{5}٣\D{8}$ $ grex -rS ^\S \S(?:\S{2} ){2}\S{3} \S \S{3} \S{3}$ $ grex -rW ^I\W{5}36\Wand\W٣\Wand\W{4}$ $ grex -drsw ^\w\s♥{3}\s\d(?:\d\s\w{3}\s){2}💩{2}\.$ $ grex -drswW ^\w\s\W{3}\s\d(?:\d\s\w{3}\s){2}\W{3}$ ``` ## 6. How to build? In order to build the source code yourself, you need the [stable Rust toolchain](https://www.rust-lang.org/tools/install) installed on your machine so that [*cargo*](https://doc.rust-lang.org/cargo/), the Rust package manager is available. **Please note**: Rust >= 1.70.0 is required to build the CLI. For the library part, Rust < 1.70.0 is sufficient. ```shell git clone https://github.com/pemistahl/grex.git cd grex cargo build ``` The source code is accompanied by an extensive test suite consisting of unit tests, integration tests and property tests. For running them, simply say: ```shell cargo test ``` Benchmarks measuring the performance of several settings can be run with: ```shell cargo bench ``` ## 7. Python extension module With the help of [PyO3](https://github.com/PyO3/pyo3) and [Maturin](https://github.com/PyO3/maturin), the library has been compiled to a Python extension module so that it can be used within any Python software as well. It is available in the [Python Package Index](https://pypi.org/project/grex) and can be installed with: ```shell pip install grex ``` To build the Python extension module yourself, create a virtual environment and install [Maturin](https://github.com/PyO3/maturin). ```shell python -m venv /path/to/virtual/environment source /path/to/virtual/environment/bin/activate pip install maturin maturin build ``` The Python library contains a single class named `RegExpBuilder` that can be imported like so: ```python from grex import RegExpBuilder ``` ## 8. WebAssembly support This library can be compiled to [WebAssembly (WASM)](https://webassembly.org) which allows to use *grex* in any JavaScript-based project, be it in the browser or in the back end running on [Node.js](https://nodejs.org). The easiest way to compile is to use [`wasm-pack`](https://rustwasm.github.io/wasm-pack). After the installation, you can, for instance, build the library with the web target so that it can be directly used in the browser: wasm-pack build --target web This creates a directory named `pkg` on the top-level of this repository, containing the compiled wasm files and JavaScript and TypeScript bindings. In an HTML file, you can then call *grex* like the following, for instance: ```html ``` There are also some integration tests available both for Node.js and for the browsers Chrome, Firefox and Safari. To run them, simply say: wasm-pack test --node --headless --chrome --firefox --safari If the tests fail to start in Safari, you need to enable Safari's web driver first by running: sudo safaridriver --enable The output of `wasm-pack` will be hosted in a [separate repository](https://github.com/pemistahl/grex-js) which allows to add further JavaScript-related configuration, tests and documentation. *grex* will then be added to the [npm registry](https://www.npmjs.com) as well, allowing for an easy download and installation within every JavaScript or TypeScript project. There is a [demo website](https://pemistahl.github.io/grex-js/) available where you can give grex a try. ![demo website](https://raw.githubusercontent.com/pemistahl/grex/main/website.jpg) ## 9. How does it work? 1. A [deterministic finite automaton](https://en.wikipedia.org/wiki/Deterministic_finite_automaton) (DFA) is created from the input strings. 2. The number of states and transitions between states in the DFA is reduced by applying [Hopcroft's DFA minimization algorithm](https://en.wikipedia.org/wiki/DFA_minimization#Hopcroft.27s_algorithm). 3. The minimized DFA is expressed as a system of linear equations which are solved with [Brzozowski's algebraic method](http://cs.stackexchange.com/questions/2016/how-to-convert-finite-automata-to-regular-expressions#2392), resulting in the final regular expression. ## 10. What's next for version 1.5.0? Take a look at the [planned issues](https://github.com/pemistahl/grex/milestone/5). ## 11. Contributions In case you want to contribute something to *grex*, I encourage you to do so. Do you have ideas for cool features? Or have you found any bugs so far? Feel free to open an issue or send a pull request. It's very much appreciated. :-) ================================================ FILE: README_PYPI.md ================================================
![grex](https://raw.githubusercontent.com/pemistahl/grex/main/logo.png)
[![build status](https://github.com/pemistahl/grex/actions/workflows/python-build.yml/badge.svg)](https://github.com/pemistahl/grex/actions/workflows/python-build.yml) [![codecov](https://codecov.io/gh/pemistahl/grex/branch/main/graph/badge.svg)](https://codecov.io/gh/pemistahl/grex) [![demo](https://img.shields.io/badge/-Demo%20Website-orange?logo=HTML5&labelColor=white)](https://pemistahl.github.io/grex-js/) ![supported Python versions](https://img.shields.io/badge/Python-%3E%3D%203.12-blue?logo=Python&logoColor=yellow) [![pypi](https://img.shields.io/badge/PYPI-v1.0.2-blue?logo=PyPI&logoColor=yellow)](https://pypi.org/project/grex) [![license](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://www.apache.org/licenses/LICENSE-2.0)

## 1. What does this library do? *grex* is a library that is meant to simplify the often complicated and tedious task of creating regular expressions. It does so by automatically generating a single regular expression from user-provided test cases. The resulting expression is guaranteed to match the test cases which it was generated from. This project has started as a [Rust port](https://github.com/pemistahl/grex) of the JavaScript tool [*regexgen*](https://github.com/devongovett/regexgen) written by [Devon Govett](https://github.com/devongovett). Although a lot of further useful features could be added to it, its development was apparently ceased several years ago. The Rust library offers new features and extended Unicode support. With the help of [PyO3](https://github.com/PyO3/pyo3) and [Maturin](https://github.com/PyO3/maturin), the library has been compiled to a Python extension module so that it can be used within any Python software as well. The philosophy of this project is to generate the most specific regular expression possible by default which exactly matches the given input only and nothing else. With the use of preprocessing methods, more generalized expressions can be created. The produced expressions are [Perl-compatible regular expressions](https://www.pcre.org) which are also compatible with the [regular expression module](https://docs.python.org/3/library/re.html) in Python's standard library. There is a [demo website](https://pemistahl.github.io/grex-js/) available where you can give grex a try. ![demo website](https://raw.githubusercontent.com/pemistahl/grex/main/website.jpg) ## 2. Do I still need to learn to write regexes then? **Definitely, yes!** Using the standard settings, *grex* produces a regular expression that is guaranteed to match only the test cases given as input and nothing else. However, if the conversion to shorthand character classes such as `\w` is enabled, the resulting regex matches a much wider scope of test cases. Knowledge about the consequences of this conversion is essential for finding a correct regular expression for your business domain. *grex* uses an algorithm that tries to find the shortest possible regex for the given test cases. Very often though, the resulting expression is still longer or more complex than it needs to be. In such cases, a more compact or elegant regex can be created only by hand. Also, every regular expression engine has different built-in optimizations. *grex* does not know anything about those and therefore cannot optimize its regexes for a specific engine. **So, please learn how to write regular expressions!** The currently best use case for *grex* is to find an initial correct regex which should be inspected by hand if further optimizations are possible. ## 3. Current Features - literals - character classes - detection of common prefixes and suffixes - detection of repeated substrings and conversion to `{min,max}` quantifier notation - alternation using `|` operator - optionality using `?` quantifier - escaping of non-ascii characters, with optional conversion of astral code points to surrogate pairs - case-sensitive or case-insensitive matching - capturing or non-capturing groups - optional anchors `^` and `$` - fully compliant to [Unicode Standard 16.0](https://unicode.org/versions/Unicode16.0.0) - correctly handles graphemes consisting of multiple Unicode symbols - produces more readable expressions indented on multiple using optional verbose mode - optional syntax highlighting for nicer output in supported terminals ## 4. How to install? *grex* is available in the [Python Package Index](https://pypi.org/project/grex) and can be installed with: ``` pip install grex ``` The current version 1.0.2 corresponds to the latest version 1.4.6 of the Rust library and command-line tool. ## 5. How to use? This library contains a single class named `RegExpBuilder` that can be imported like so: ```python from grex import RegExpBuilder ``` ### 5.1 Default settings ```python pattern = RegExpBuilder.from_test_cases(["a", "aa", "aaa"]).build() assert pattern == "^a(?:aa?)?$" ``` ### 5.2 Convert to character classes ```python pattern = (RegExpBuilder.from_test_cases(["a", "aa", "123"]) .with_conversion_of_digits() .with_conversion_of_words() .build()) assert pattern == "^(?:\\d\\d\\d|\\w(?:\\w)?)$" ``` ### 5.3 Convert repeated substrings ```python pattern = (RegExpBuilder.from_test_cases(["aa", "bcbc", "defdefdef"]) .with_conversion_of_repetitions() .build()) assert pattern == "^(?:a{2}|(?:bc){2}|(?:def){3})$" ``` By default, *grex* converts each substring this way which is at least a single character long and which is subsequently repeated at least once. You can customize these two parameters if you like. In the following example, the test case `aa` is not converted to `a{2}` because the repeated substring `a` has a length of 1, but the minimum substring length has been set to 2. ```python pattern = (RegExpBuilder.from_test_cases(["aa", "bcbc", "defdefdef"]) .with_conversion_of_repetitions() .with_minimum_substring_length(2) .build()) assert pattern == "^(?:aa|(?:bc){2}|(?:def){3})$" ``` Setting a minimum number of 2 repetitions in the next example, only the test case `defdefdef` will be converted because it is the only one that is repeated twice. ```python pattern = (RegExpBuilder.from_test_cases(["aa", "bcbc", "defdefdef"]) .with_conversion_of_repetitions() .with_minimum_repetitions(2) .build()) assert pattern == "^(?:bcbc|aa|(?:def){3})$" ``` ### 5.4 Escape non-ascii characters ```python pattern = (RegExpBuilder.from_test_cases(["You smell like 💩."]) .with_escaping_of_non_ascii_chars(use_surrogate_pairs=False) .build()) assert pattern == "^You smell like \\U0001f4a9\\.$" ``` Old versions of JavaScript do not support unicode escape sequences for the astral code planes (range `U+010000` to `U+10FFFF`). In order to support these symbols in JavaScript regular expressions, the conversion to surrogate pairs is necessary. More information on that matter can be found [here](https://mathiasbynens.be/notes/javascript-unicode). ```python pattern = (RegExpBuilder.from_test_cases(["You smell like 💩."]) .with_escaping_of_non_ascii_chars(use_surrogate_pairs=True) .build()) assert pattern == "^You smell like \\ud83d\\udca9\\.$" ``` ### 5.5 Case-insensitive matching The regular expressions that *grex* generates are case-sensitive by default. Case-insensitive matching can be enabled like so: ```python pattern = (RegExpBuilder.from_test_cases(["big", "BIGGER"]) .with_case_insensitive_matching() .build()) assert pattern == "(?i)^big(?:ger)?$" ``` ### 5.6 Capturing Groups Non-capturing groups are used by default. Extending the previous example, you can switch to capturing groups instead. ```python pattern = (RegExpBuilder.from_test_cases(["big", "BIGGER"]) .with_case_insensitive_matching() .with_capturing_groups() .build()) assert pattern == "(?i)^big(ger)?$" ``` ### 5.7 Verbose mode If you find the generated regular expression hard to read, you can enable verbose mode. The expression is then put on multiple lines and indented to make it more pleasant to the eyes. ```python import inspect pattern = (RegExpBuilder.from_test_cases(["a", "b", "bcd"]) .with_verbose_mode() .build()) assert pattern == inspect.cleandoc(""" (?x) ^ (?: b (?: cd )? | a ) $ """ ) ``` ### 5.8 Disable anchors By default, the anchors `^` and `$` are put around every generated regular expression in order to ensure that it matches only the test cases given as input. Often enough, however, it is desired to use the generated pattern as part of a larger one. For this purpose, the anchors can be disabled, either separately or both of them. ```python pattern = (RegExpBuilder.from_test_cases(["a", "aa", "aaa"]) .without_anchors() .build()) assert pattern == "a(?:aa?)?" ``` ## 6. How to build? In order to build the source code yourself, you need the [stable Rust toolchain](https://www.rust-lang.org/tools/install) installed on your machine so that [*cargo*](https://doc.rust-lang.org/cargo/), the Rust package manager is available. ```shell git clone https://github.com/pemistahl/grex.git cd grex cargo build ``` To build the Python extension module, create a virtual environment and install [Maturin](https://github.com/PyO3/maturin). ```shell python -m venv /path/to/virtual/environment source /path/to/virtual/environment/bin/activate pip install maturin maturin build ``` The Rust source code is accompanied by an extensive test suite consisting of unit tests, integration tests and property tests. For running them, simply say: ```shell cargo test ``` Additional Python tests can be run after installing pytest which is an optional dependency: ```shell maturin develop --extras=test pytest tests/python/test_grex.py ``` ================================================ FILE: RELEASE_NOTES.md ================================================ ## grex 1.4.6 (released on 14 Nov 2025) ### Improvements - All characters from the current Unicode standard 16.0 are now fully supported. ### Changes - The unmaintained unic-* dependencies have been replaced by @jqnatividad. (#337) - All other dependencies have been updated to their latest versions. - Support for Python 3.14 has been added. - Support for Python < 3.12 has been dropped. ## grex 1.4.5 (released on 06 Mar 2024) ### Improvements - Type stubs for the Python bindings are now available, allowing better static code analysis, better code completion in supported IDEs and easier understanding of the library's API. - The code for creating regular expressions in verbose mode has been simplified and is more performant now. - ARM64 binaries are now provided for every major platform (Linux, macOs, Windows). ### Bug Fixes - For a small set of special characters, *grex* produced incorrect regular expressions when the case-insensitivity feature was enabled. This has been fixed. ### Changes - All dependencies have been updated to their latest versions. ## grex 1.4.4 (released on 24 Aug 2023) ### Bug Fixes - The Python release workflow was incorrect as it produced too many wheels for upload. This has been fixed. ## grex 1.4.3 (released on 24 Aug 2023) ### Features - Python bindings are now available for the library. Use grex within any Python software. (#172) ### Changes - All dependencies have been updated to their latest versions. ## grex 1.4.2 (released on 26 Jul 2023) ### Improvements - All characters from the current Unicode standard 15.0 are now fully supported. (#128) - A proper exit code is now returned if the provided user input cannot be handled by the CLI. Big thanks to @spenserblack for the respective pull request. (#165) ### Changes - It is not possible anymore to call `RegExpBuilder.with_syntax_highlighting()` in the library as it only makes sense for the CLI. - The dependency `atty` has been removed in favor of `std::io::IsTerminal` in Rust >= 1.70.0. As a result, Rust >= 1.70.0 is now needed to compile the CLI. - All remaining dependencies have been updated to their latest versions. ### Bug Fixes - Several bugs have been fixed that caused incorrect expressions to be generated in rare cases. ## grex 1.4.1 (released on 21 Oct 2022) ### Changes - `clap` has been updated to version 4.0. The help output by `grex -h` now looks a little different. ### Bug Fixes - A bug in the grapheme segmentation was fixed that caused test cases which contain backslashes to produce incorrect regular expressions. ## grex 1.4.0 (released on 26 Jul 2022) ### Features - The library can now be compiled to WebAssembly and be used in any JavaScript project. (#82) - The supported character set for regular expression generation has been updated to the current Unicode Standard 14.0. - `structopt` has been replaced with `clap` providing much nicer help output for the command-line tool. ### Improvements - The regular expression generation performance has been significantly improved, especially for generating very long expressions from a large set of test cases. This has been accomplished by reducing the number of memory allocations, removing deprecated code and applying several minor optimizations. ### Bug Fixes - Several bugs have been fixed that caused incorrect expressions to be generated in rare cases. ## grex 1.3.0 (released on 15 Sep 2021) ### Features - anchors can now be disabled so that the generated expression can be used as part of a larger one (#30) - the command-line tool can now be used within Unix pipelines (#45) ### Changes - Additional methods have been added to `RegExpBuilder` in order to replace the enum `Feature` and make the library API more consistent. (#47) ### Bug Fixes - Under rare circumstances, the conversion of repetitions did not work. This has been fixed. (#36) ## grex 1.2.0 (released on 28 Mar 2021) ### Features - verbose mode is now supported with the `--verbose` flag to produce regular expressions which are easier to read (#17) ## grex 1.1.0 (released on 17 Apr 2020) ### Features - case-insensitive matching regexes are now supported with the `--ignore-case` command-line flag or with `Feature::CaseInsensitivity` in the library (#23) - non-capturing groups are now the default; capturing groups can be enabled with the `--capture-groups` command-line flag or with `Feature::CapturingGroup` in the library (#15) - a lower bound for the conversion of repeated substrings can now be set by specifying `--min-repetitions` and `--min-substring-length` or using the library methods `RegExpBuilder.with_minimum_repetitions()` and `RegExpBuilder.with_minimum_substring_length()` (#10) - test cases can now be passed from a file within the library as well using `RegExpBuilder::from_file()` (#13) ### Changes - the rules for the conversion of test cases to shorthand character classes have been updated to be compliant to the newest Unicode Standard 13.0 (#21) - the dependency on the unmaintained linked-list crate has been removed (#24) ### Bug Fixes - test cases starting with a hyphen are now correctly parsed on the command-line (#12) - the common substring detection algorithm now uses optionality expressions where possible instead of redundant union operations (#22) ### Test Coverage - new unit tests, integration tests and property tests have been added ## grex 1.0.0 (released on 02 Feb 2020) ### Features - conversion to character classes `\d`, `\D`, `\s`, `\S`, `\w`, `\W` is now supported - repetition detection now works with arbitrarily nested expressions. Input strings such as `aaabaaab` which were previously converted to `^(aaab){2}$` are now converted to `^(a{3}b){2}$`. - optional syntax highlighting for the produced regular expressions can now be enabled using the `--colorize` command-line flag or with the library method `RegExpBuilder.with_syntax_highlighting()` ### Test Coverage - new unit tests, integration tests and property tests have been added ## grex 0.3.2 (released on 12 Jan 2020) ### Test Coverage - new property tests have been added that revealed new bugs ### Bug Fixes - entire rewrite of the repetition detection algorithm - the former algorithm produced wrong regular expressions or even panicked for certain test cases ## grex 0.3.1 (released on 06 Jan 2020) ### Test Coverage - property tests have been added using the [proptest](https://crates.io/crates/proptest) crate - big thanks go to [Christophe Biocca](https://github.com/christophebiocca) for pointing me to the concept of property tests in the first place and for writing an initial implementation of these tests ### Bug Fixes - some regular expression specific characters were not escaped correctly in the generated expression - expressions consisting of a single alternation such as `^(abc|xyz)$` were missing the outer parentheses. This caused an erroneous match of strings such as `abc123` or `456xyz` because of precedence rules. - the created DFA was wrong for repetition conversion in some corner cases. The input `a, aa, aaa, aaaa, aaab` previously returned the expression `^a{1,4}b?$` which erroneously matches `aaaab`. Now the correct expression `^(a{3}b|a{1,4})$` is returned. ### Documentation - some minor documentation updates ## grex 0.3.0 (released on 24 Dec 2019) ### Features - *grex* is now also available as a library - escaping of non-ascii characters is now supported with the `-e` flag - astral code points can be converted to surrogate with the `--with-surrogates` flag - repeated non-overlapping substrings can be converted to `{min,max}` quantifier notation using the `-r` flag ### Bug Fixes - many many many bug fixes :-O ## grex 0.2.0 (released on 20 Oct 2019) ### Features - character classes are now supported - input strings can now be read from a text file ### Changes - unicode characters are not escaped anymore by default - the performance of the DFA minimization algorithm has been improved for large DFAs - regular expressions are now always surrounded by anchors `^` and `$` ### Bug Fixes - fixed a bug that caused a panic when giving an empty string as input ## grex 0.1.0 (released on 06 Oct 2019) This is the very first release of *grex*. It aims at simplifying the construction of regular expressions based on matching example input. ### Features - literals - detection of common prefixes and suffixes - alternation using `|` operator - optionality using `?` quantifier - concatenation of all of the former ================================================ FILE: benches/benchmark.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ use criterion::{criterion_group, criterion_main, Criterion}; use grex::RegExpBuilder; use itertools::Itertools; use std::fs::File; use std::io::Read; fn load_test_cases() -> Vec { let mut f = File::open("./benches/testcases.txt").expect("Test cases could not be loaded"); let mut s = String::new(); f.read_to_string(&mut s).unwrap(); s.split("\n") .map(|test_case| test_case.to_string()) .collect_vec() } fn benchmark_grex_with_default_settings(c: &mut Criterion) { let test_cases = load_test_cases(); c.bench_function("grex with default settings", |bencher| { bencher.iter(|| RegExpBuilder::from(&test_cases).build()) }); } fn benchmark_grex_with_conversion_of_repetitions(c: &mut Criterion) { let test_cases = load_test_cases(); c.bench_function("grex with conversion of repetitions", |bencher| { bencher.iter(|| { RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .build() }) }); } fn benchmark_grex_with_conversion_of_digits(c: &mut Criterion) { let test_cases = load_test_cases(); c.bench_function("grex with conversion of digits", |bencher| { bencher.iter(|| { RegExpBuilder::from(&test_cases) .with_conversion_of_digits() .build() }) }); } fn benchmark_grex_with_conversion_of_non_digits(c: &mut Criterion) { let test_cases = load_test_cases(); c.bench_function("grex with conversion of non-digits", |bencher| { bencher.iter(|| { RegExpBuilder::from(&test_cases) .with_conversion_of_non_digits() .build() }) }); } fn benchmark_grex_with_conversion_of_words(c: &mut Criterion) { let test_cases = load_test_cases(); c.bench_function("grex with conversion of words", |bencher| { bencher.iter(|| { RegExpBuilder::from(&test_cases) .with_conversion_of_words() .build() }) }); } fn benchmark_grex_with_conversion_of_non_words(c: &mut Criterion) { let test_cases = load_test_cases(); c.bench_function("grex with conversion of non-words", |bencher| { bencher.iter(|| { RegExpBuilder::from(&test_cases) .with_conversion_of_non_words() .build() }) }); } fn benchmark_grex_with_conversion_of_whitespace(c: &mut Criterion) { let test_cases = load_test_cases(); c.bench_function("grex with conversion of whitespace", |bencher| { bencher.iter(|| { RegExpBuilder::from(&test_cases) .with_conversion_of_whitespace() .build() }) }); } fn benchmark_grex_with_conversion_of_non_whitespace(c: &mut Criterion) { let test_cases = load_test_cases(); c.bench_function("grex with conversion of non-whitespace", |bencher| { bencher.iter(|| { RegExpBuilder::from(&test_cases) .with_conversion_of_non_whitespace() .build() }) }); } fn benchmark_grex_with_case_insensitive_matching(c: &mut Criterion) { let test_cases = load_test_cases(); c.bench_function("grex with case-insensitive matching", |bencher| { bencher.iter(|| { RegExpBuilder::from(&test_cases) .with_case_insensitive_matching() .build() }) }); } fn benchmark_grex_with_verbose_mode(c: &mut Criterion) { let test_cases = load_test_cases(); c.bench_function("grex with verbose mode", |bencher| { bencher.iter(|| RegExpBuilder::from(&test_cases).with_verbose_mode().build()) }); } criterion_group!( benches, benchmark_grex_with_default_settings, benchmark_grex_with_conversion_of_repetitions, benchmark_grex_with_conversion_of_digits, benchmark_grex_with_conversion_of_non_digits, benchmark_grex_with_conversion_of_words, benchmark_grex_with_conversion_of_non_words, benchmark_grex_with_conversion_of_whitespace, benchmark_grex_with_conversion_of_non_whitespace, benchmark_grex_with_case_insensitive_matching, benchmark_grex_with_verbose_mode ); criterion_main!(benches); ================================================ FILE: benches/testcases.txt ================================================ Rocket Sled Elysian Heirloom Kaleb's Favor Blazing Renegade Flash Fire Silence Talir's Favored Timekeeper Oasis Sanctuary Rolant's Favor Mantle of Justice Eilyn's Favor Thunderbird Primal Incarnation Vampire Bat Vara's Favor Devouring Shadow Seat of Order Seat of Fury Seat of Impulse Seat of Vengeance Seat of Glory Seat of Progress Seat of Chaos Seat of Mystery Seat of Cunning Seat of Wisdom Firebomb Grenadin Iron Sword Magmahound Wisp Rhinarc Sentinel Owl Gemblade Frog Snowball Pig Serpent Hatchling Carnosaur Stormdancer Illusionary Dragon Spiteling Vengeful Gargoyle Muertis, Pale Rider Occi, Pale Rider Sangu, Pale Rider Volan, Pale Rider Direwood Beast ================================================ FILE: demo.tape ================================================ # demo.gif created with https://github.com/charmbracelet/vhs on macOS 13 (Ventura) Require grex Output demo.gif Set Shell zsh Set Theme "Whimsy" Set Width 1200 Set Height 850 Set TypingSpeed 150ms Type "grex -c 'regexes are awesome' 'regexes are awful'" Sleep 3s Enter Sleep 10s Up Left 42 Type " --verbose" Sleep 3s Enter Sleep 15s Type "clear" Enter Type "grex -c haha HAHAHA" Sleep 3s Enter Sleep 10s Up Left 12 Type " --repetitions" Sleep 3s Enter Sleep 10s Up Left 12 Type " --verbose" Sleep 3s Enter Sleep 15s Up Left 12 Type " --ignore-case" Sleep 3s Enter Sleep 15s ================================================ FILE: grex.pyi ================================================ # # Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. # See the License for the specific language governing permissions and # limitations under the License. from typing import List class RegExpBuilder: """This class builds regular expressions from user-provided test cases.""" @classmethod def from_test_cases(cls, test_cases: List[str]) -> "RegExpBuilder": """Specify the test cases to build the regular expression from. The test cases need not be sorted because `RegExpBuilder` sorts them internally. Args: test_cases (list[str]): The list of test cases Raises: ValueError: if `test_cases` is empty """ def with_conversion_of_digits(self) -> "RegExpBuilder": """Convert any Unicode decimal digit to character class `\d`. This method takes precedence over `with_conversion_of_words` if both are set. Decimal digits are converted to `\d`, the remaining word characters to `\w`. This method takes precedence over `with_conversion_of_non_whitespace` if both are set. Decimal digits are converted to `\d`, the remaining non-whitespace characters to `\S`. """ def with_conversion_of_non_digits(self) -> "RegExpBuilder": """Convert any character which is not a Unicode decimal digit to character class `\D`. This method takes precedence over `with_conversion_of_non_words` if both are set. Non-digits which are also non-word characters are converted to `\D`. This method takes precedence over `with_conversion_of_non_whitespace` if both are set. Non-digits which are also non-space characters are converted to `\D`. """ def with_conversion_of_whitespace(self) -> "RegExpBuilder": """Convert any Unicode whitespace character to character class `\s`. This method takes precedence over `with_conversion_of_non_digits` if both are set. Whitespace characters are converted to `\s`, the remaining non-digit characters to `\D`. This method takes precedence over `with_conversion_of_non_words` if both are set. Whitespace characters are converted to `\s`, the remaining non-word characters to `\W`. """ def with_conversion_of_non_whitespace(self) -> "RegExpBuilder": """Convert any character which is not a Unicode whitespace character to character class `\S`.""" def with_conversion_of_words(self) -> "RegExpBuilder": """Convert any Unicode word character to character class `\w`. This method takes precedence over `with_conversion_of_non_digits` if both are set. Word characters are converted to `\w`, the remaining non-digit characters to `\D`. This method takes precedence over `with_conversion_of_non_whitespace` if both are set. Word characters are converted to `\w`, the remaining non-space characters to `\S`. """ def with_conversion_of_non_words(self) -> "RegExpBuilder": """Convert any character which is not a Unicode word character to character class `\W`. This method takes precedence over `with_conversion_of_non_whitespace` if both are set. Non-words which are also non-space characters are converted to `\W`. """ def with_conversion_of_repetitions(self) -> "RegExpBuilder": """Detect repeated non-overlapping substrings and to convert them to `{min,max}` quantifier notation.""" def with_case_insensitive_matching(self) -> "RegExpBuilder": """Enable case-insensitive matching of test cases so that letters match both upper and lower case.""" def with_capturing_groups(self) -> "RegExpBuilder": """Replace non-capturing groups with capturing ones.""" def with_minimum_repetitions(self, quantity: int) -> "RegExpBuilder": """Specify the minimum quantity of substring repetitions to be converted if `with_conversion_of_repetitions` is set. If the quantity is not explicitly set with this method, a default value of 1 will be used. Args: quantity (int): The minimum quantity of substring repetitions Raises: ValueError: if `quantity` is zero """ def with_minimum_substring_length(self, length: int) -> "RegExpBuilder": """Specify the minimum length a repeated substring must have in order to be converted if `with_conversion_of_repetitions` is set. If the length is not explicitly set with this method, a default value of 1 will be used. Args: length (int): The minimum substring length Raises: ValueError: if `length` is zero """ def with_escaping_of_non_ascii_chars(self, use_surrogate_pairs: bool) -> "RegExpBuilder": """Convert non-ASCII characters to unicode escape sequences. The parameter `use_surrogate_pairs` specifies whether to convert astral code planes (range `U+010000` to `U+10FFFF`) to surrogate pairs. Args: use_surrogate_pairs (bool): Whether to convert astral code planes to surrogate pairs """ def with_verbose_mode(self) -> "RegExpBuilder": """ Produce a nicer looking regular expression in verbose mode.""" def without_start_anchor(self) -> "RegExpBuilder": """Remove the caret anchor '^' from the resulting regular expression, thereby allowing to match the test cases also when they do not occur at the start of a string. """ def without_end_anchor(self) -> "RegExpBuilder": """Remove the dollar sign anchor '$' from the resulting regular expression, thereby allowing to match the test cases also when they do not occur at the end of a string. """ def without_anchors(self) -> "RegExpBuilder": """Remove the caret and dollar sign anchors from the resulting regular expression, thereby allowing to match the test cases also when they occur within a larger string that contains other content as well. """ def build(self) -> str: """Build the actual regular expression using the previously given settings.""" ================================================ FILE: pyproject.toml ================================================ [project] name = "grex" version = "1.0.2" authors = [{name = "Peter M. Stahl", email = "pemistahl@gmail.com"}] description = "grex generates regular expressions from user-provided test cases." readme = "README_PYPI.md" requires-python = ">=3.12" license = {file = "LICENSE"} keywords = ["pattern", "regex", "regexp"] classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: Science/Research", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", "Programming Language :: Rust", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing" ] [project.urls] homepage = "https://github.com/pemistahl/grex" repository = "https://github.com/pemistahl/grex" [project.optional-dependencies] test = ["pytest == 9.0.1"] [tool.maturin] no-default-features = true features = ["pyo3/extension-module", "pyo3/generate-import-lib", "python"] [build-system] requires = ["maturin>=1.1,<2.0"] build-backend = "maturin" ================================================ FILE: requirements.txt ================================================ maturin == 1.10.1 pytest == 9.0.1 ================================================ FILE: src/builder.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ use crate::config::RegExpConfig; use crate::regexp::RegExp; use itertools::Itertools; use std::io::ErrorKind; use std::path::PathBuf; pub(crate) const MISSING_TEST_CASES_MESSAGE: &str = "No test cases have been provided for regular expression generation"; pub(crate) const MINIMUM_REPETITIONS_MESSAGE: &str = "Quantity of minimum repetitions must be greater than zero"; pub(crate) const MINIMUM_SUBSTRING_LENGTH_MESSAGE: &str = "Minimum substring length must be greater than zero"; /// This struct builds regular expressions from user-provided test cases. #[derive(Clone)] #[cfg_attr(feature = "python", pyo3::prelude::pyclass)] pub struct RegExpBuilder { pub(crate) test_cases: Vec, pub(crate) config: RegExpConfig, } impl RegExpBuilder { /// Specifies the test cases to build the regular expression from. /// /// The test cases need not be sorted because `RegExpBuilder` sorts them internally. /// /// ⚠ Panics if `test_cases` is empty. pub fn from>(test_cases: &[T]) -> Self { if test_cases.is_empty() { panic!("{}", MISSING_TEST_CASES_MESSAGE); } Self { test_cases: test_cases.iter().cloned().map(|it| it.into()).collect_vec(), config: RegExpConfig::new(), } } /// Specifies a text file containing test cases to build the regular expression from. /// /// The test cases need not be sorted because `RegExpBuilder` sorts them internally. /// /// Each test case needs to be on a separate line. /// Lines may be ended with either a newline (`\n`) or /// a carriage return with a line feed (`\r\n`). /// The final line ending is optional. /// /// ⚠ Panics if: /// - the file cannot be found /// - the file's encoding is not valid UTF-8 data /// - the file cannot be opened because of conflicting permissions pub fn from_file>(file_path: T) -> Self { match std::fs::read_to_string(file_path.into()) { Ok(file_content) => Self { test_cases: file_content.lines().map(|it| it.to_string()).collect_vec(), config: RegExpConfig::new(), }, Err(error) => match error.kind() { ErrorKind::NotFound => panic!("The specified file could not be found"), ErrorKind::InvalidData => { panic!("The specified file's encoding is not valid UTF-8") } ErrorKind::PermissionDenied => { panic!("Permission denied: The specified file could not be opened") } _ => panic!("{}", error), }, } } /// Converts any Unicode decimal digit to character class `\d`. /// /// This method takes precedence over /// [`with_conversion_of_words`](Self::with_conversion_of_words) if both are set. /// Decimal digits are converted to `\d`, the remaining word characters to `\w`. /// /// This method takes precedence over /// [`with_conversion_of_non_whitespace`](Self::with_conversion_of_non_whitespace) if both are set. /// Decimal digits are converted to `\d`, the remaining non-whitespace characters to `\S`. pub fn with_conversion_of_digits(&mut self) -> &mut Self { self.config.is_digit_converted = true; self } /// Converts any character which is not a Unicode decimal digit to character class `\D`. /// /// This method takes precedence over /// [`with_conversion_of_non_words`](Self::with_conversion_of_non_words) if both are set. /// Non-digits which are also non-word characters are converted to `\D`. /// /// This method takes precedence over /// [`with_conversion_of_non_whitespace`](Self::with_conversion_of_non_whitespace) if both are set. /// Non-digits which are also non-space characters are converted to `\D`. pub fn with_conversion_of_non_digits(&mut self) -> &mut Self { self.config.is_non_digit_converted = true; self } /// Converts any Unicode whitespace character to character class `\s`. /// /// This method takes precedence over /// [`with_conversion_of_non_digits`](Self::with_conversion_of_non_digits) if both are set. /// Whitespace characters are converted to `\s`, the remaining non-digit characters to `\D`. /// /// This method takes precedence over /// [`with_conversion_of_non_words`](Self::with_conversion_of_non_words) if both are set. /// Whitespace characters are converted to `\s`, the remaining non-word characters to `\W`. pub fn with_conversion_of_whitespace(&mut self) -> &mut Self { self.config.is_space_converted = true; self } /// Converts any character which is not a Unicode whitespace character to character class `\S`. pub fn with_conversion_of_non_whitespace(&mut self) -> &mut Self { self.config.is_non_space_converted = true; self } /// Converts any Unicode word character to character class `\w`. /// /// This method takes precedence over /// [`with_conversion_of_non_digits`](Self::with_conversion_of_non_digits) if both are set. /// Word characters are converted to `\w`, the remaining non-digit characters to `\D`. /// /// This method takes precedence over /// [`with_conversion_of_non_whitespace`](Self::with_conversion_of_non_whitespace) if both are set. /// Word characters are converted to `\w`, the remaining non-space characters to `\S`. pub fn with_conversion_of_words(&mut self) -> &mut Self { self.config.is_word_converted = true; self } /// Converts any character which is not a Unicode word character to character class `\W`. /// /// This method takes precedence over /// [`with_conversion_of_non_whitespace`](Self::with_conversion_of_non_whitespace) if both are set. /// Non-words which are also non-space characters are converted to `\W`. pub fn with_conversion_of_non_words(&mut self) -> &mut Self { self.config.is_non_word_converted = true; self } /// Detects repeated non-overlapping substrings and /// to convert them to `{min,max}` quantifier notation. pub fn with_conversion_of_repetitions(&mut self) -> &mut Self { self.config.is_repetition_converted = true; self } /// Enables case-insensitive matching of test cases /// so that letters match both upper and lower case. pub fn with_case_insensitive_matching(&mut self) -> &mut Self { self.config.is_case_insensitive_matching = true; self } /// Replaces non-capturing groups with capturing ones. pub fn with_capturing_groups(&mut self) -> &mut Self { self.config.is_capturing_group_enabled = true; self } /// Specifies the minimum quantity of substring repetitions to be converted if /// [`with_conversion_of_repetitions`](Self::with_conversion_of_repetitions) is set. /// /// If the quantity is not explicitly set with this method, a default value of 1 will be used. /// /// ⚠ Panics if `quantity` is zero. pub fn with_minimum_repetitions(&mut self, quantity: u32) -> &mut Self { if quantity == 0 { panic!("{}", MINIMUM_REPETITIONS_MESSAGE); } self.config.minimum_repetitions = quantity; self } /// Specifies the minimum length a repeated substring must have in order to be converted if /// [`with_conversion_of_repetitions`](Self::with_conversion_of_repetitions) is set. /// /// If the length is not explicitly set with this method, a default value of 1 will be used. /// /// ⚠ Panics if `length` is zero. pub fn with_minimum_substring_length(&mut self, length: u32) -> &mut Self { if length == 0 { panic!("{}", MINIMUM_SUBSTRING_LENGTH_MESSAGE); } self.config.minimum_substring_length = length; self } /// Converts non-ASCII characters to unicode escape sequences. /// The parameter `use_surrogate_pairs` specifies whether to convert astral code planes /// (range `U+010000` to `U+10FFFF`) to surrogate pairs. pub fn with_escaping_of_non_ascii_chars(&mut self, use_surrogate_pairs: bool) -> &mut Self { self.config.is_non_ascii_char_escaped = true; self.config.is_astral_code_point_converted_to_surrogate = use_surrogate_pairs; self } /// Produces a nicer looking regular expression in verbose mode. pub fn with_verbose_mode(&mut self) -> &mut Self { self.config.is_verbose_mode_enabled = true; self } /// Removes the caret anchor '^' from the resulting regular /// expression, thereby allowing to match the test cases also when they do not occur /// at the start of a string. pub fn without_start_anchor(&mut self) -> &mut Self { self.config.is_start_anchor_disabled = true; self } /// Removes the dollar sign anchor '$' from the resulting regular /// expression, thereby allowing to match the test cases also when they do not occur /// at the end of a string. pub fn without_end_anchor(&mut self) -> &mut Self { self.config.is_end_anchor_disabled = true; self } /// Removes the caret and dollar sign anchors from the resulting /// regular expression, thereby allowing to match the test cases also when they occur /// within a larger string that contains other content as well. pub fn without_anchors(&mut self) -> &mut Self { self.config.is_start_anchor_disabled = true; self.config.is_end_anchor_disabled = true; self } /// Provides syntax highlighting for the resulting regular expression. /// /// ⚠ This method may only be used if the resulting regular expression is meant to /// be printed to the console. The regex string representation returned from enabling /// this setting cannot be fed into the [*regex*](https://crates.io/crates/regex) crate. #[cfg(feature = "cli")] #[doc(hidden)] pub fn with_syntax_highlighting(&mut self) -> &mut Self { self.config.is_output_colorized = true; self } /// Builds the actual regular expression using the previously given settings. pub fn build(&mut self) -> String { RegExp::from(&mut self.test_cases, &self.config).to_string() } } ================================================ FILE: src/char_range.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /// A lightweight replacement for unic_char_range::CharRange /// Represents a closed range of Unicode characters #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) struct CharRange { start: char, end: char, } impl CharRange { /// Creates a closed character range from start to end (inclusive) pub(crate) fn closed(start: char, end: char) -> Self { Self { start, end } } /// Checks if the given character is within this range pub(crate) fn contains(&self, c: char) -> bool { c >= self.start && c <= self.end } /// Returns an iterator over all valid Unicode scalar values /// This includes U+0000 to U+D7FF and U+E000 to U+10FFFF /// (excludes surrogate code points U+D800 to U+DFFF) pub(crate) fn all() -> CharRangeIter { CharRangeIter { current: '\0', done: false, } } } /// Iterator over all valid Unicode scalar values pub(crate) struct CharRangeIter { current: char, done: bool, } impl Iterator for CharRangeIter { type Item = char; fn next(&mut self) -> Option { if self.done { return None; } let result = self.current; // Get the next valid Unicode scalar value let mut next_code_point = self.current as u32 + 1; // Skip over surrogate code points (U+D800 to U+DFFF) and find next valid char loop { if next_code_point > 0x10FFFF { // We've reached the end of valid Unicode code points self.done = true; break; } match char::from_u32(next_code_point) { Some(next_char) => { self.current = next_char; break; } None => { // Invalid code point (likely surrogate), skip to next next_code_point += 1; } } } Some(result) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_char_range_contains() { let range = CharRange::closed('a', 'z'); assert!(range.contains('a')); assert!(range.contains('m')); assert!(range.contains('z')); assert!(!range.contains('A')); assert!(!range.contains('0')); } #[test] fn test_char_range_all() { let all_chars: Vec = CharRange::all().take(10).collect(); assert_eq!(all_chars[0], '\0'); assert_eq!(all_chars.len(), 10); } #[test] fn test_char_range_all_count() { // Valid Unicode scalar values: 0x110000 total code points - 0x800 surrogates = 0x10F800 let count = CharRange::all().count(); assert_eq!(count, 0x10F800); } } ================================================ FILE: src/cluster.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ use crate::char_range::CharRange; use crate::config::RegExpConfig; use crate::grapheme::Grapheme; use crate::unicode_tables::{DECIMAL_NUMBER, WHITE_SPACE, WORD}; use itertools::Itertools; use std::cmp::Ordering; use std::collections::HashMap; use std::ops::Range; use std::sync::LazyLock; use unicode_general_category::GeneralCategory as GC; use unicode_segmentation::UnicodeSegmentation; #[derive(Clone, Debug, Eq, PartialEq)] pub(crate) struct GraphemeCluster<'a> { graphemes: Vec, config: &'a RegExpConfig, } impl<'a> GraphemeCluster<'a> { pub(crate) fn from(s: &str, config: &'a RegExpConfig) -> Self { Self { graphemes: UnicodeSegmentation::graphemes(s, true) .flat_map(|it| { let contains_backslash = it.chars().count() == 2 && it.contains('\\'); let contains_combining_mark_or_unassigned_chars = it.chars().any(|c| { let category = unicode_general_category::get_general_category(c); matches!( category, // Mark categories GC::NonspacingMark | GC::SpacingMark | GC::EnclosingMark | // Other categories GC::Control | GC::Format | GC::Surrogate | GC::PrivateUse | GC::Unassigned ) }); if contains_backslash || contains_combining_mark_or_unassigned_chars { it.chars() .map(|c| { Grapheme::from( &c.to_string(), config.is_capturing_group_enabled, config.is_output_colorized, config.is_verbose_mode_enabled, ) }) .collect_vec() } else { vec![Grapheme::from( it, config.is_capturing_group_enabled, config.is_output_colorized, config.is_verbose_mode_enabled, )] } }) .collect_vec(), config, } } pub(crate) fn from_graphemes(graphemes: Vec, config: &'a RegExpConfig) -> Self { Self { graphemes, config } } pub(crate) fn new(grapheme: Grapheme, config: &'a RegExpConfig) -> Self { Self { graphemes: vec![grapheme], config, } } pub(crate) fn convert_to_char_classes(&mut self) { let is_digit_converted = self.config.is_digit_converted; let is_non_digit_converted = self.config.is_non_digit_converted; let is_space_converted = self.config.is_space_converted; let is_non_space_converted = self.config.is_non_space_converted; let is_word_converted = self.config.is_word_converted; let is_non_word_converted = self.config.is_non_word_converted; for grapheme in self.graphemes.iter_mut() { grapheme.chars = grapheme .chars .iter() .map(|it| { it.chars() .map(|c| { if is_digit_converted && is_digit(c) { "\\d".to_string() } else if is_word_converted && is_word(c) { "\\w".to_string() } else if is_space_converted && is_space(c) { "\\s".to_string() } else if is_non_digit_converted && !is_digit(c) { "\\D".to_string() } else if is_non_word_converted && !is_word(c) { "\\W".to_string() } else if is_non_space_converted && !is_space(c) { "\\S".to_string() } else { c.to_string() } }) .join("") }) .collect_vec(); } } pub(crate) fn convert_repetitions(&mut self) { let mut repetitions = vec![]; convert_repetitions(self.graphemes(), repetitions.as_mut(), self.config); if !repetitions.is_empty() { self.graphemes = repetitions; } } pub(crate) fn merge( first: &GraphemeCluster, second: &GraphemeCluster, config: &'a RegExpConfig, ) -> Self { let mut graphemes = vec![]; graphemes.extend_from_slice(&first.graphemes); graphemes.extend_from_slice(&second.graphemes); Self { graphemes, config } } pub(crate) fn graphemes(&self) -> &Vec { &self.graphemes } pub(crate) fn graphemes_mut(&mut self) -> &mut Vec { &mut self.graphemes } pub(crate) fn size(&self) -> usize { self.graphemes.len() } pub(crate) fn char_count(&self, is_non_ascii_char_escaped: bool) -> usize { self.graphemes .iter() .map(|it| it.char_count(is_non_ascii_char_escaped)) .sum() } pub(crate) fn is_empty(&self) -> bool { self.graphemes.is_empty() } } fn is_digit(c: char) -> bool { static VALID_NUMERIC_CHARS: LazyLock> = LazyLock::new(|| convert_chars_to_range(DECIMAL_NUMBER)); VALID_NUMERIC_CHARS.iter().any(|range| range.contains(c)) } fn is_word(c: char) -> bool { static VALID_ALPHANUMERIC_CHARS: LazyLock> = LazyLock::new(|| convert_chars_to_range(WORD)); VALID_ALPHANUMERIC_CHARS .iter() .any(|range| range.contains(c)) } fn is_space(c: char) -> bool { static VALID_SPACE_CHARS: LazyLock> = LazyLock::new(|| convert_chars_to_range(WHITE_SPACE)); VALID_SPACE_CHARS.iter().any(|range| range.contains(c)) } fn convert_repetitions( graphemes: &[Grapheme], repetitions: &mut Vec, config: &RegExpConfig, ) { let repeated_substrings = collect_repeated_substrings(graphemes); let ranges_of_repetitions = create_ranges_of_repetitions(repeated_substrings, config); let coalesced_repetitions = coalesce_repetitions(ranges_of_repetitions); replace_graphemes_with_repetitions(coalesced_repetitions, graphemes, repetitions, config) } fn collect_repeated_substrings(graphemes: &[Grapheme]) -> HashMap, Vec> { let mut map = HashMap::new(); for i in 0..graphemes.len() { let suffix = &graphemes[i..]; for j in 1..=graphemes.len() / 2 { if suffix.len() >= j { let prefix = suffix[..j].iter().map(|it| it.value()).collect_vec(); let indices = map.entry(prefix).or_insert_with(Vec::new); indices.push(i); } } } map } fn create_ranges_of_repetitions( repeated_substrings: HashMap, Vec>, config: &RegExpConfig, ) -> Vec<(Range, Vec)> { let mut repetitions = Vec::<(Range, Vec)>::new(); for (prefix_length, group) in &repeated_substrings .iter() .filter(|&(prefix, indices)| { indices .iter() .tuple_windows() .all(|(first, second)| (second - first) >= prefix.len()) }) .sorted_by_key(|&(prefix, _)| prefix.len()) .rev() .chunk_by(|&(prefix, _)| prefix.len()) { for (prefix, indices) in group.sorted_by_key(|&(_, indices)| indices[0]) { indices .iter() .map(|it| *it..it + prefix_length) .coalesce(|x, y| { if x.end == y.start { Ok(x.start..y.end) } else { Err((x, y)) } }) .filter(|range| { let count = ((range.end - range.start) / prefix_length) as u32; count > config.minimum_repetitions }) .for_each(|range| repetitions.push((range, prefix.clone()))); } } repetitions } fn coalesce_repetitions( ranges_of_repetitions: Vec<(Range, Vec)>, ) -> Vec<(Range, Vec)> { ranges_of_repetitions .iter() .sorted_by(|&(first_range, _), &(second_range, _)| { match second_range.end.cmp(&first_range.end) { Ordering::Equal => first_range.start.cmp(&second_range.start), other => other, } }) .coalesce(|first_tup, second_tup| { let first_range = &first_tup.0; let second_range = &second_tup.0; if (first_range.contains(&second_range.start) || first_range.contains(&second_range.end)) && second_range.end != first_range.start { Ok(first_tup) } else { Err((first_tup, second_tup)) } }) .map(|(range, substr)| (range.clone(), substr.clone())) .collect_vec() } fn replace_graphemes_with_repetitions( coalesced_repetitions: Vec<(Range, Vec)>, graphemes: &[Grapheme], repetitions: &mut Vec, config: &RegExpConfig, ) { if coalesced_repetitions.is_empty() { return; } for grapheme in graphemes { repetitions.push(grapheme.clone()); } for (range, substr) in coalesced_repetitions.iter() { if range.end > repetitions.len() { break; } let count = ((range.end - range.start) / substr.len()) as u32; if substr.len() < config.minimum_substring_length as usize { continue; } repetitions.splice( range.clone(), [Grapheme::new( substr.clone(), count, count, config.is_capturing_group_enabled, config.is_output_colorized, config.is_verbose_mode_enabled, )] .iter() .cloned(), ); } for new_grapheme in repetitions.iter_mut() { convert_repetitions( &new_grapheme .chars .iter() .map(|it| { Grapheme::from( it, config.is_capturing_group_enabled, config.is_output_colorized, config.is_verbose_mode_enabled, ) }) .collect_vec(), new_grapheme.repetitions.as_mut(), config, ); } } fn convert_chars_to_range(chars: &[(char, char)]) -> Vec { chars .iter() .map(|&(start, end)| CharRange::closed(start, end)) .collect_vec() } ================================================ FILE: src/component.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ use crate::quantifier::Quantifier; use std::fmt::{Display, Formatter, Result}; pub(crate) enum Component { CapturedLeftParenthesis, CapturedParenthesizedExpression(String, bool, bool), Caret(bool), CharClass(String), DollarSign(bool), Hyphen, IgnoreCaseFlag, IgnoreCaseAndVerboseModeFlag, LeftBracket, Pipe, Quantifier(Quantifier, bool), Repetition(u32, bool), RepetitionRange(u32, u32, bool), RightBracket, RightParenthesis, UncapturedLeftParenthesis, UncapturedParenthesizedExpression(String, bool, bool), VerboseModeFlag, } impl Component { pub(crate) fn to_repr(&self, is_output_colorized: bool) -> String { match is_output_colorized { true => self.to_colored_string(false), false => self.to_string(), } } pub(crate) fn to_colored_string(&self, is_escaped: bool) -> String { match self { Component::CapturedLeftParenthesis => Self::green_bold(&self.to_string(), is_escaped), Component::CapturedParenthesizedExpression( expr, is_verbose_mode_enabled, has_final_line_break, ) => { if *is_verbose_mode_enabled { if *has_final_line_break { format!( "\n{}\n{}\n{}\n", Component::CapturedLeftParenthesis.to_colored_string(is_escaped), expr, Component::RightParenthesis.to_colored_string(is_escaped) ) } else { format!( "\n{}\n{}\n{}", Component::CapturedLeftParenthesis.to_colored_string(is_escaped), expr, Component::RightParenthesis.to_colored_string(is_escaped) ) } } else { format!( "{}{}{}", Component::CapturedLeftParenthesis.to_colored_string(is_escaped), expr, Component::RightParenthesis.to_colored_string(is_escaped) ) } } Component::Caret(is_verbose_mode_enabled) => { if *is_verbose_mode_enabled { format!( "{}\n", Self::yellow_bold(&Component::Caret(false).to_string(), is_escaped) ) } else { Self::yellow_bold(&self.to_string(), is_escaped) } } Component::CharClass(value) => Self::black_on_bright_yellow(value, is_escaped), Component::DollarSign(is_verbose_mode_enabled) => { if *is_verbose_mode_enabled { format!( "\n{}", Self::yellow_bold(&Component::DollarSign(false).to_string(), is_escaped) ) } else { Self::yellow_bold(&self.to_string(), is_escaped) } } Component::Hyphen => Self::cyan_bold(&self.to_string(), is_escaped), Component::IgnoreCaseFlag => { Self::bright_yellow_on_black(&self.to_string(), is_escaped) } Component::IgnoreCaseAndVerboseModeFlag => { format!("{}\n", Self::bright_yellow_on_black("(?ix)", is_escaped)) } Component::LeftBracket => Self::cyan_bold(&self.to_string(), is_escaped), Component::Pipe => Self::red_bold(&self.to_string(), is_escaped), Component::Quantifier(quantifier, is_verbose_mode_enabled) => { if *is_verbose_mode_enabled { format!( "{}\n", Self::purple_bold(&quantifier.to_string(), is_escaped) ) } else { Self::purple_bold(&self.to_string(), is_escaped) } } Component::Repetition(num, is_verbose_mode_enabled) => { if *is_verbose_mode_enabled { format!( "{}\n", Self::white_on_bright_blue( &Component::Repetition(*num, false).to_string(), is_escaped ) ) } else { Self::white_on_bright_blue(&self.to_string(), is_escaped) } } Component::RepetitionRange(min, max, is_verbose_mode_enabled) => { if *is_verbose_mode_enabled { format!( "{}\n", Self::white_on_bright_blue( &Component::RepetitionRange(*min, *max, false).to_string(), is_escaped ) ) } else { Self::white_on_bright_blue(&self.to_string(), is_escaped) } } Component::RightBracket => Self::cyan_bold(&self.to_string(), is_escaped), Component::RightParenthesis => Self::green_bold(&self.to_string(), is_escaped), Component::UncapturedLeftParenthesis => Self::green_bold(&self.to_string(), is_escaped), Component::UncapturedParenthesizedExpression( expr, is_verbose_mode_enabled, has_final_line_break, ) => { if *is_verbose_mode_enabled { if *has_final_line_break { format!( "\n{}\n{}\n{}\n", Component::UncapturedLeftParenthesis.to_colored_string(is_escaped), expr, Component::RightParenthesis.to_colored_string(is_escaped) ) } else { format!( "\n{}\n{}\n{}", Component::UncapturedLeftParenthesis.to_colored_string(is_escaped), expr, Component::RightParenthesis.to_colored_string(is_escaped) ) } } else { format!( "{}{}{}", Component::UncapturedLeftParenthesis.to_colored_string(is_escaped), expr, Component::RightParenthesis.to_colored_string(is_escaped) ) } } Component::VerboseModeFlag => { format!("{}\n", Self::bright_yellow_on_black("(?x)", is_escaped)) } } } fn black_on_bright_yellow(value: &str, is_escaped: bool) -> String { Self::color_code("103;30", value, is_escaped) } fn bright_yellow_on_black(value: &str, is_escaped: bool) -> String { Self::color_code("40;93", value, is_escaped) } fn cyan_bold(value: &str, is_escaped: bool) -> String { Self::color_code("1;36", value, is_escaped) } fn green_bold(value: &str, is_escaped: bool) -> String { Self::color_code("1;32", value, is_escaped) } fn purple_bold(value: &str, is_escaped: bool) -> String { Self::color_code("1;35", value, is_escaped) } fn red_bold(value: &str, is_escaped: bool) -> String { Self::color_code("1;31", value, is_escaped) } fn white_on_bright_blue(value: &str, is_escaped: bool) -> String { Self::color_code("104;37", value, is_escaped) } fn yellow_bold(value: &str, is_escaped: bool) -> String { Self::color_code("1;33", value, is_escaped) } fn color_code(code: &str, value: &str, is_escaped: bool) -> String { if is_escaped { format!("\u{1b}\\[{}m\\{}\u{1b}\\[0m", code, value) } else { format!("\u{1b}[{}m{}\u{1b}[0m", code, value) } } } impl Display for Component { fn fmt(&self, f: &mut Formatter<'_>) -> Result { write!( f, "{}", match self { Component::CapturedLeftParenthesis => "(".to_string(), Component::CapturedParenthesizedExpression( expr, is_verbose_mode_enabled, has_final_line_break, ) => if *is_verbose_mode_enabled { if *has_final_line_break { format!( "\n{}\n{}\n{}\n", Component::CapturedLeftParenthesis, expr, Component::RightParenthesis ) } else { format!( "\n{}\n{}\n{}", Component::CapturedLeftParenthesis, expr, Component::RightParenthesis ) } } else { format!( "{}{}{}", Component::CapturedLeftParenthesis, expr, Component::RightParenthesis ) }, Component::Caret(is_verbose_mode_enabled) => if *is_verbose_mode_enabled { "^\n".to_string() } else { "^".to_string() }, Component::CharClass(value) => value.clone(), Component::DollarSign(is_verbose_mode_enabled) => if *is_verbose_mode_enabled { "\n$".to_string() } else { "$".to_string() }, Component::Hyphen => "-".to_string(), Component::IgnoreCaseFlag => "(?i)".to_string(), Component::IgnoreCaseAndVerboseModeFlag => "(?ix)\n".to_string(), Component::LeftBracket => "[".to_string(), Component::Pipe => "|".to_string(), Component::Quantifier(quantifier, is_verbose_mode_enabled) => if *is_verbose_mode_enabled { format!("{}\n", quantifier) } else { quantifier.to_string() }, Component::Repetition(num, is_verbose_mode_enabled) => { if *num == 0 && *is_verbose_mode_enabled { "{\\d+\\}\n".to_string() } else if *num == 0 { "{\\d+\\}".to_string() } else if *is_verbose_mode_enabled { format!("{{{}}}\n", num) } else { format!("{{{}}}", num) } } Component::RepetitionRange(min, max, is_verbose_mode_enabled) => { if *min == 0 && *max == 0 && *is_verbose_mode_enabled { "{\\d+,\\d+\\}\n".to_string() } else if *min == 0 && *max == 0 { "{\\d+,\\d+\\}".to_string() } else if *is_verbose_mode_enabled { format!("{{{},{}}}\n", min, max) } else { format!("{{{},{}}}", min, max) } } Component::RightBracket => "]".to_string(), Component::RightParenthesis => ")".to_string(), Component::UncapturedLeftParenthesis => "(?:".to_string(), Component::UncapturedParenthesizedExpression( expr, is_verbose_mode_enabled, has_final_line_break, ) => { if *is_verbose_mode_enabled { if *has_final_line_break { format!( "\n{}\n{}\n{}\n", Component::UncapturedLeftParenthesis, expr, Component::RightParenthesis ) } else { format!( "\n{}\n{}\n{}", Component::UncapturedLeftParenthesis, expr, Component::RightParenthesis ) } } else { format!( "{}{}{}", Component::UncapturedLeftParenthesis, expr, Component::RightParenthesis ) } } Component::VerboseModeFlag => "(?x)\n".to_string(), } ) } } ================================================ FILE: src/config.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #[derive(Clone, Debug, Hash, Ord, PartialOrd, Eq, PartialEq)] pub(crate) struct RegExpConfig { pub(crate) minimum_repetitions: u32, pub(crate) minimum_substring_length: u32, pub(crate) is_digit_converted: bool, pub(crate) is_non_digit_converted: bool, pub(crate) is_space_converted: bool, pub(crate) is_non_space_converted: bool, pub(crate) is_word_converted: bool, pub(crate) is_non_word_converted: bool, pub(crate) is_repetition_converted: bool, pub(crate) is_case_insensitive_matching: bool, pub(crate) is_capturing_group_enabled: bool, pub(crate) is_non_ascii_char_escaped: bool, pub(crate) is_astral_code_point_converted_to_surrogate: bool, pub(crate) is_verbose_mode_enabled: bool, pub(crate) is_start_anchor_disabled: bool, pub(crate) is_end_anchor_disabled: bool, pub(crate) is_output_colorized: bool, } impl RegExpConfig { pub(crate) fn new() -> Self { Self { minimum_repetitions: 1, minimum_substring_length: 1, is_digit_converted: false, is_non_digit_converted: false, is_space_converted: false, is_non_space_converted: false, is_word_converted: false, is_non_word_converted: false, is_repetition_converted: false, is_case_insensitive_matching: false, is_capturing_group_enabled: false, is_non_ascii_char_escaped: false, is_astral_code_point_converted_to_surrogate: false, is_verbose_mode_enabled: false, is_start_anchor_disabled: false, is_end_anchor_disabled: false, is_output_colorized: false, } } pub(crate) fn is_char_class_feature_enabled(&self) -> bool { self.is_digit_converted || self.is_non_digit_converted || self.is_space_converted || self.is_non_space_converted || self.is_word_converted || self.is_non_word_converted || self.is_case_insensitive_matching || self.is_capturing_group_enabled } } ================================================ FILE: src/dfa.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ use crate::cluster::GraphemeCluster; use crate::config::RegExpConfig; use crate::grapheme::Grapheme; use itertools::Itertools; use petgraph::graph::NodeIndex; use petgraph::stable_graph::{Edges, StableGraph}; use petgraph::visit::Dfs; use petgraph::{Directed, Direction}; use std::cmp::{max, min}; use std::collections::{BTreeSet, HashMap, HashSet}; type State = NodeIndex; type StateLabel = String; type EdgeLabel = Grapheme; pub(crate) struct Dfa<'a> { alphabet: BTreeSet, graph: StableGraph, initial_state: State, final_state_indices: HashSet, config: &'a RegExpConfig, } impl<'a> Dfa<'a> { pub(crate) fn from( grapheme_clusters: &[GraphemeCluster], is_minimized: bool, config: &'a RegExpConfig, ) -> Self { let mut dfa = Self::new(config); for cluster in grapheme_clusters { dfa.insert(cluster); } if is_minimized { dfa.minimize(); } dfa } pub(crate) fn state_count(&self) -> usize { self.graph.node_count() } pub(crate) fn states_in_depth_first_order(&self) -> Vec { let mut depth_first_search = Dfs::new(&self.graph, self.initial_state); let mut states = vec![]; while let Some(state) = depth_first_search.next(&self.graph) { states.push(state); } states } pub(crate) fn outgoing_edges(&self, state: State) -> Edges<'_, Grapheme, Directed> { self.graph.edges_directed(state, Direction::Outgoing) } pub(crate) fn is_final_state(&self, state: State) -> bool { self.final_state_indices.contains(&state.index()) } fn new(config: &'a RegExpConfig) -> Self { let mut graph = StableGraph::new(); let initial_state = graph.add_node("".to_string()); Self { alphabet: BTreeSet::new(), graph, initial_state, final_state_indices: HashSet::new(), config, } } fn insert(&mut self, cluster: &GraphemeCluster) { let mut current_state = self.initial_state; for grapheme in cluster.graphemes() { self.alphabet.insert(grapheme.clone()); current_state = self.return_next_state(current_state, grapheme); } self.final_state_indices.insert(current_state.index()); } fn return_next_state(&mut self, current_state: State, edge_label: &Grapheme) -> State { match self.find_next_state(current_state, edge_label) { Some(next_state) => next_state, None => self.add_new_state(current_state, edge_label), } } fn find_next_state(&mut self, current_state: State, grapheme: &Grapheme) -> Option { for next_state in self.graph.neighbors(current_state) { let edge_idx = self.graph.find_edge(current_state, next_state).unwrap(); let current_grapheme = self.graph.edge_weight(edge_idx).unwrap(); if current_grapheme.value() != grapheme.value() { continue; } if current_grapheme.maximum() == grapheme.maximum() - 1 { let min = min(current_grapheme.minimum(), grapheme.minimum()); let max = max(current_grapheme.maximum(), grapheme.maximum()); let new_grapheme = Grapheme::new( grapheme.chars().clone(), min, max, self.config.is_capturing_group_enabled, self.config.is_output_colorized, self.config.is_verbose_mode_enabled, ); self.graph .update_edge(current_state, next_state, new_grapheme); return Some(next_state); } else if current_grapheme.maximum() == grapheme.maximum() { return Some(next_state); } } None } fn add_new_state(&mut self, current_state: State, edge_label: &Grapheme) -> State { let next_state = self.graph.add_node("".to_string()); self.graph .add_edge(current_state, next_state, edge_label.clone()); next_state } #[allow(clippy::many_single_char_names)] fn minimize(&mut self) { let mut p = self.get_initial_partition(); let mut w = p.iter().cloned().collect_vec(); while !w.is_empty() { let a = w.drain(0..1).next().unwrap(); for edge_label in self.alphabet.iter() { let x = self.get_parent_states(&a, edge_label); let mut replacements = vec![]; let mut is_replacement_needed = true; let mut start_idx = 0; while is_replacement_needed { for (idx, y) in p.iter().enumerate().skip(start_idx) { if x.intersection(y).count() == 0 || y.difference(&x).count() == 0 { is_replacement_needed = false; continue; } let i = x.intersection(y).copied().collect::>(); let d = y.difference(&x).copied().collect::>(); is_replacement_needed = true; start_idx = idx; replacements.push((y.clone(), i, d)); break; } if is_replacement_needed { let (_, i, d) = replacements.last().unwrap(); p.remove(start_idx); p.insert(start_idx, i.clone()); p.insert(start_idx + 1, d.clone()); } } for (y, i, d) in replacements { if w.contains(&y) { let idx = w.iter().position(|it| it == &y).unwrap(); w.remove(idx); w.push(i); w.push(d); } else if i.len() <= d.len() { w.push(i); } else { w.push(d); } } } } self.recreate_graph(p.iter().filter(|&it| !it.is_empty()).collect_vec()); } fn get_initial_partition(&self) -> Vec> { let (final_states, non_final_states): (HashSet, HashSet) = self .graph .node_indices() .partition(|&state| !self.final_state_indices.contains(&state.index())); vec![final_states, non_final_states] } fn get_parent_states(&self, a: &HashSet, label: &Grapheme) -> HashSet { let mut x = HashSet::new(); for &state in a { let direct_parent_states = self.graph.neighbors_directed(state, Direction::Incoming); for parent_state in direct_parent_states { let edge = self.graph.find_edge(parent_state, state).unwrap(); let grapheme = self.graph.edge_weight(edge).unwrap(); if grapheme.value() == label.value() && (grapheme.maximum() == label.maximum() || grapheme.minimum() == label.minimum()) { x.insert(parent_state); break; } } } x } fn recreate_graph(&mut self, p: Vec<&HashSet>) { let mut graph = StableGraph::::new(); let mut final_state_indices = HashSet::new(); let mut state_mappings = HashMap::new(); let mut new_initial_state: Option = None; for equivalence_class in p.iter() { let new_state = graph.add_node("".to_string()); for old_state in equivalence_class.iter() { if self.initial_state == *old_state { new_initial_state = Some(new_state); } state_mappings.insert(*old_state, new_state); } } for equivalence_class in p.iter() { let old_source_state = *equivalence_class.iter().next().unwrap(); let new_source_state = state_mappings.get(&old_source_state).unwrap(); for old_target_state in self.graph.neighbors(old_source_state) { let edge = self .graph .find_edge(old_source_state, old_target_state) .unwrap(); let grapheme = self.graph.edge_weight(edge).unwrap().clone(); let new_target_state = state_mappings.get(&old_target_state).unwrap(); graph.add_edge(*new_source_state, *new_target_state, grapheme.clone()); if self.final_state_indices.contains(&old_target_state.index()) { final_state_indices.insert(new_target_state.index()); } } } self.initial_state = new_initial_state.unwrap(); self.final_state_indices = final_state_indices; self.graph = graph; } } #[cfg(test)] mod tests { use super::*; #[test] fn test_state_count() { let config = RegExpConfig::new(); let mut dfa = Dfa::new(&config); assert_eq!(dfa.state_count(), 1); dfa.insert(&GraphemeCluster::from("abcd", &RegExpConfig::new())); assert_eq!(dfa.state_count(), 5); } #[test] fn test_is_final_state() { let config = RegExpConfig::new(); let dfa = Dfa::from( &[GraphemeCluster::from("abcd", &RegExpConfig::new())], true, &config, ); let intermediate_state = State::new(3); assert_eq!(dfa.is_final_state(intermediate_state), false); let final_state = State::new(4); assert_eq!(dfa.is_final_state(final_state), true); } #[test] fn test_outgoing_edges() { let config = RegExpConfig::new(); let dfa = Dfa::from( &[ GraphemeCluster::from("abcd", &RegExpConfig::new()), GraphemeCluster::from("abxd", &RegExpConfig::new()), ], true, &config, ); let state = State::new(2); let mut edges = dfa.outgoing_edges(state); let first_edge = edges.next(); assert!(first_edge.is_some()); assert_eq!( first_edge.unwrap().weight(), &Grapheme::from("c", false, false, false) ); let second_edge = edges.next(); assert!(second_edge.is_some()); assert_eq!( second_edge.unwrap().weight(), &Grapheme::from("x", false, false, false) ); let third_edge = edges.next(); assert!(third_edge.is_none()); } #[test] fn test_states_in_depth_first_order() { let config = RegExpConfig::new(); let dfa = Dfa::from( &[ GraphemeCluster::from("abcd", &RegExpConfig::new()), GraphemeCluster::from("axyz", &RegExpConfig::new()), ], true, &config, ); let states = dfa.states_in_depth_first_order(); assert_eq!(states.len(), 7); let first_state = states.get(0).unwrap(); let mut edges = dfa.outgoing_edges(*first_state); assert_eq!( edges.next().unwrap().weight(), &Grapheme::from("a", false, false, false) ); assert!(edges.next().is_none()); let second_state = states.get(1).unwrap(); edges = dfa.outgoing_edges(*second_state); assert_eq!( edges.next().unwrap().weight(), &Grapheme::from("b", false, false, false) ); assert_eq!( edges.next().unwrap().weight(), &Grapheme::from("x", false, false, false) ); assert!(edges.next().is_none()); let third_state = states.get(2).unwrap(); edges = dfa.outgoing_edges(*third_state); assert_eq!( edges.next().unwrap().weight(), &Grapheme::from("y", false, false, false) ); assert!(edges.next().is_none()); let fourth_state = states.get(3).unwrap(); edges = dfa.outgoing_edges(*fourth_state); assert_eq!( edges.next().unwrap().weight(), &Grapheme::from("z", false, false, false) ); assert!(edges.next().is_none()); let fifth_state = states.get(4).unwrap(); edges = dfa.outgoing_edges(*fifth_state); assert!(edges.next().is_none()); let sixth_state = states.get(5).unwrap(); edges = dfa.outgoing_edges(*sixth_state); assert_eq!( edges.next().unwrap().weight(), &Grapheme::from("c", false, false, false) ); assert!(edges.next().is_none()); let seventh_state = states.get(6).unwrap(); edges = dfa.outgoing_edges(*seventh_state); assert_eq!( edges.next().unwrap().weight(), &Grapheme::from("d", false, false, false) ); assert!(edges.next().is_none()); } #[test] fn test_minimization_algorithm() { let config = RegExpConfig::new(); let mut dfa = Dfa::new(&config); assert_eq!(dfa.graph.node_count(), 1); assert_eq!(dfa.graph.edge_count(), 0); dfa.insert(&GraphemeCluster::from("abcd", &RegExpConfig::new())); assert_eq!(dfa.graph.node_count(), 5); assert_eq!(dfa.graph.edge_count(), 4); dfa.insert(&GraphemeCluster::from("abxd", &RegExpConfig::new())); assert_eq!(dfa.graph.node_count(), 7); assert_eq!(dfa.graph.edge_count(), 6); dfa.minimize(); assert_eq!(dfa.graph.node_count(), 5); assert_eq!(dfa.graph.edge_count(), 5); } #[test] fn test_dfa_constructor() { let config = RegExpConfig::new(); let dfa = Dfa::from( &[ GraphemeCluster::from("abcd", &RegExpConfig::new()), GraphemeCluster::from("abxd", &RegExpConfig::new()), ], true, &config, ); assert_eq!(dfa.graph.node_count(), 5); assert_eq!(dfa.graph.edge_count(), 5); } } ================================================ FILE: src/expression.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ use crate::cluster::GraphemeCluster; use crate::config::RegExpConfig; use crate::dfa::Dfa; use crate::grapheme::Grapheme; use crate::quantifier::Quantifier; use crate::substring::Substring; use itertools::EitherOrBoth::Both; use itertools::Itertools; use ndarray::{Array1, Array2}; use petgraph::prelude::EdgeRef; use std::cmp::Reverse; use std::collections::BTreeSet; #[derive(Clone, Debug, Eq, PartialEq)] pub(crate) enum Expression<'a> { Alternation(Vec>, bool, bool, bool), CharacterClass(BTreeSet, bool), Concatenation(Box>, Box>, bool, bool, bool), Literal(GraphemeCluster<'a>, bool, bool), Repetition(Box>, Quantifier, bool, bool, bool), } impl<'a> Expression<'a> { pub(crate) fn from(dfa: Dfa, config: &'a RegExpConfig) -> Self { let states = dfa.states_in_depth_first_order(); let state_count = dfa.state_count(); let mut a = Array2::>::default((state_count, state_count)); let mut b = Array1::>::default(state_count); for (i, state) in states.iter().enumerate() { if dfa.is_final_state(*state) { b[i] = Some(Expression::new_literal( GraphemeCluster::from("", config), config, )); } for edge in dfa.outgoing_edges(*state) { let literal = Expression::new_literal( GraphemeCluster::new(edge.weight().clone(), config), config, ); let j = states.iter().position(|&it| it == edge.target()).unwrap(); a[(i, j)] = if a[(i, j)].is_some() { Self::union(&a[(i, j)], &Some(literal), config) } else { Some(literal) } } } for n in (0..state_count).rev() { if a[(n, n)].is_some() { b[n] = Self::concatenate( &Self::repeat_zero_or_more_times(&a[(n, n)], config), &b[n], config, ); for j in 0..n { a[(n, j)] = Self::concatenate( &Self::repeat_zero_or_more_times(&a[(n, n)], config), &a[(n, j)], config, ); } } for i in 0..n { if a[(i, n)].is_some() { b[i] = Self::union(&b[i], &Self::concatenate(&a[(i, n)], &b[n], config), config); for j in 0..n { a[(i, j)] = Self::union( &a[(i, j)], &Self::concatenate(&a[(i, n)], &a[(n, j)], config), config, ); } } } } if !b.is_empty() && b[0].is_some() { b[0].as_ref().unwrap().clone() } else { Expression::new_literal(GraphemeCluster::from("", config), config) } } pub(crate) fn new_alternation(exprs: Vec>, config: &RegExpConfig) -> Self { let mut options: Vec = vec![]; Self::flatten_alternations(&mut options, exprs); options.sort_by_key(|option| Reverse(option.len())); Expression::Alternation( options, config.is_capturing_group_enabled, config.is_output_colorized, config.is_verbose_mode_enabled, ) } fn new_character_class( first_char_set: BTreeSet, second_char_set: BTreeSet, config: &RegExpConfig, ) -> Self { let union_set = first_char_set.union(&second_char_set).copied().collect(); Expression::CharacterClass(union_set, config.is_output_colorized) } fn new_concatenation( expr1: Expression<'a>, expr2: Expression<'a>, config: &RegExpConfig, ) -> Self { Expression::Concatenation( Box::from(expr1), Box::from(expr2), config.is_capturing_group_enabled, config.is_output_colorized, config.is_verbose_mode_enabled, ) } pub(crate) fn new_literal(cluster: GraphemeCluster<'a>, config: &RegExpConfig) -> Self { Expression::Literal( cluster, config.is_non_ascii_char_escaped, config.is_astral_code_point_converted_to_surrogate, ) } fn new_repetition(expr: Expression<'a>, quantifier: Quantifier, config: &RegExpConfig) -> Self { Expression::Repetition( Box::from(expr), quantifier, config.is_capturing_group_enabled, config.is_output_colorized, config.is_verbose_mode_enabled, ) } fn is_empty(&self) -> bool { match self { Expression::Literal(cluster, _, _) => cluster.is_empty(), _ => false, } } pub(crate) fn is_single_codepoint(&self) -> bool { match self { Expression::CharacterClass(_, _) => true, Expression::Literal(cluster, is_non_ascii_char_escaped, _) => { cluster.char_count(*is_non_ascii_char_escaped) == 1 && cluster.graphemes().first().unwrap().maximum() == 1 } _ => false, } } fn len(&self) -> usize { match self { Expression::Alternation(options, _, _, _) => options.first().unwrap().len(), Expression::CharacterClass(_, _) => 1, Expression::Concatenation(expr1, expr2, _, _, _) => expr1.len() + expr2.len(), Expression::Literal(cluster, _, _) => cluster.size(), Expression::Repetition(expr, _, _, _, _) => expr.len(), } } pub(crate) fn precedence(&self) -> u8 { match self { Expression::Alternation(_, _, _, _) | Expression::CharacterClass(_, _) => 1, Expression::Concatenation(_, _, _, _, _) | Expression::Literal(_, _, _) => 2, Expression::Repetition(_, _, _, _, _) => 3, } } pub(crate) fn remove_substring(&mut self, substring: &Substring, length: usize) { match self { Expression::Concatenation(expr1, expr2, _, _, _) => match substring { Substring::Prefix => { if let Expression::Literal(_, _, _) = **expr1 { expr1.remove_substring(substring, length) } } Substring::Suffix => { if let Expression::Literal(_, _, _) = **expr2 { expr2.remove_substring(substring, length) } } }, Expression::Literal(cluster, _, _) => match substring { Substring::Prefix => { cluster.graphemes_mut().drain(..length); } Substring::Suffix => { let graphemes = cluster.graphemes_mut(); graphemes.drain(graphemes.len() - length..); } }, _ => (), } } pub(crate) fn value(&self, substring: Option<&Substring>) -> Option> { match self { Expression::Concatenation(expr1, expr2, _, _, _) => match substring { Some(value) => match value { Substring::Prefix => expr1.value(None), Substring::Suffix => expr2.value(None), }, None => None, }, Expression::Literal(cluster, _, _) => Some(cluster.graphemes().clone()), _ => None, } } fn repeat_zero_or_more_times( expr: &Option>, config: &'a RegExpConfig, ) -> Option> { expr.as_ref() .map(|value| Expression::new_repetition(value.clone(), Quantifier::KleeneStar, config)) } fn concatenate( a: &Option>, b: &Option>, config: &'a RegExpConfig, ) -> Option> { if a.is_none() || b.is_none() { return None; } let expr1 = a.as_ref().unwrap(); let expr2 = b.as_ref().unwrap(); if expr1.is_empty() { return b.clone(); } if expr2.is_empty() { return a.clone(); } if let (Expression::Literal(graphemes_a, _, _), Expression::Literal(graphemes_b, _, _)) = (&expr1, &expr2) { return Some(Expression::new_literal( GraphemeCluster::merge(graphemes_a, graphemes_b, config), config, )); } if let ( Expression::Literal(graphemes_a, _, _), Expression::Concatenation(first, second, _, _, _), ) = (&expr1, &expr2) { if let Expression::Literal(graphemes_first, _, _) = &**first { let literal = Expression::new_literal( GraphemeCluster::merge(graphemes_a, graphemes_first, config), config, ); return Some(Expression::new_concatenation( literal, *second.clone(), config, )); } } if let ( Expression::Literal(graphemes_b, _, _), Expression::Concatenation(first, second, _, _, _), ) = (&expr2, &expr1) { if let Expression::Literal(graphemes_second, _, _) = &**second { let literal = Expression::new_literal( GraphemeCluster::merge(graphemes_second, graphemes_b, config), config, ); return Some(Expression::new_concatenation( *first.clone(), literal, config, )); } } Some(Expression::new_concatenation( expr1.clone(), expr2.clone(), config, )) } fn union( a: &Option>, b: &Option>, config: &'a RegExpConfig, ) -> Option> { if let (Some(mut expr1), Some(mut expr2)) = (a.clone(), b.clone()) { if expr1 != expr2 { let common_prefix = Self::remove_common_substring(&mut expr1, &mut expr2, Substring::Prefix); let common_suffix = Self::remove_common_substring(&mut expr1, &mut expr2, Substring::Suffix); let mut result = if expr1.is_empty() { Some(Expression::new_repetition( expr2.clone(), Quantifier::QuestionMark, config, )) } else if expr2.is_empty() { Some(Expression::new_repetition( expr1.clone(), Quantifier::QuestionMark, config, )) } else { None }; if result.is_none() { if let Expression::Repetition(expr, quantifier, _, _, _) = &expr1 { if quantifier == &Quantifier::QuestionMark { let alternation = Expression::new_alternation( vec![*expr.clone(), expr2.clone()], config, ); result = Some(Expression::new_repetition( alternation, Quantifier::QuestionMark, config, )); } } } if result.is_none() { if let Expression::Repetition(expr, quantifier, _, _, _) = &expr2 { if quantifier == &Quantifier::QuestionMark { let alternation = Expression::new_alternation( vec![expr1.clone(), *expr.clone()], config, ); result = Some(Expression::new_repetition( alternation, Quantifier::QuestionMark, config, )); } } } if result.is_none() && expr1.is_single_codepoint() && expr2.is_single_codepoint() { let first_char_set = Self::extract_character_set(expr1.clone()); let second_char_set = Self::extract_character_set(expr2.clone()); result = Some(Expression::new_character_class( first_char_set, second_char_set, config, )); } if result.is_none() { result = Some(Expression::new_alternation(vec![expr1, expr2], config)); } if let Some(prefix) = common_prefix { result = Some(Expression::new_concatenation( Expression::new_literal( GraphemeCluster::from_graphemes(prefix, config), config, ), result.unwrap(), config, )); } if let Some(suffix) = common_suffix { result = Some(Expression::new_concatenation( result.unwrap(), Expression::new_literal( GraphemeCluster::from_graphemes(suffix, config), config, ), config, )); } result } else if a.is_some() { a.clone() } else if b.is_some() { b.clone() } else { None } } else if a.is_some() { a.clone() } else if b.is_some() { b.clone() } else { None } } fn flatten_alternations( flattened_options: &mut Vec>, current_options: Vec>, ) { for option in current_options { if let Expression::Alternation(expr_options, _, _, _) = option { Self::flatten_alternations(flattened_options, expr_options); } else { flattened_options.push(option); } } } fn extract_character_set(expr: Expression) -> BTreeSet { match expr { Expression::Literal(cluster, _, _) => { let single_char = cluster .graphemes() .first() .unwrap() .value() .chars() .next() .unwrap(); btreeset![single_char] } Expression::CharacterClass(char_set, _) => char_set, _ => BTreeSet::new(), } } fn remove_common_substring( a: &mut Expression, b: &mut Expression, substring: Substring, ) -> Option> { let common_substring = Self::find_common_substring(a, b, &substring); if let Some(value) = &common_substring { a.remove_substring(&substring, value.len()); b.remove_substring(&substring, value.len()); } common_substring } fn find_common_substring( a: &Expression, b: &Expression, substring: &Substring, ) -> Option> { let mut graphemes_a = a.value(Some(substring)).unwrap_or_default(); let mut graphemes_b = b.value(Some(substring)).unwrap_or_default(); let mut common_graphemes = vec![]; if let Substring::Suffix = substring { graphemes_a.reverse(); graphemes_b.reverse(); } for pair in graphemes_a.iter().zip_longest(graphemes_b.iter()) { match pair { Both(grapheme_a, grapheme_b) => { if grapheme_a == grapheme_b { common_graphemes.push(grapheme_a.clone()); } else { break; } } _ => break, } } if let Substring::Suffix = substring { common_graphemes.reverse(); } if common_graphemes.is_empty() { None } else { Some(common_graphemes) } } } #[cfg(test)] mod tests { use super::*; #[test] fn ensure_correct_string_representation_of_alternation_1() { let config = RegExpConfig::new(); let literal1 = Expression::new_literal(GraphemeCluster::from("abc", &config), &config); let literal2 = Expression::new_literal(GraphemeCluster::from("def", &config), &config); let alternation = Expression::new_alternation(vec![literal1, literal2], &config); assert_eq!(alternation.to_string(), "abc|def"); } #[test] fn ensure_correct_string_representation_of_alternation_2() { let config = RegExpConfig::new(); let literal1 = Expression::new_literal(GraphemeCluster::from("a", &config), &config); let literal2 = Expression::new_literal(GraphemeCluster::from("ab", &config), &config); let literal3 = Expression::new_literal(GraphemeCluster::from("abc", &config), &config); let alternation = Expression::new_alternation(vec![literal1, literal2, literal3], &config); assert_eq!(alternation.to_string(), "abc|ab|a"); } #[test] fn ensure_correct_string_representation_of_character_class_1() { let config = RegExpConfig::new(); let char_class = Expression::new_character_class(btreeset!['a'], btreeset!['b'], &config); assert_eq!(char_class.to_string(), "[ab]"); } #[test] fn ensure_correct_string_representation_of_character_class_2() { let config = RegExpConfig::new(); let char_class = Expression::new_character_class(btreeset!['a', 'b'], btreeset!['c'], &config); assert_eq!(char_class.to_string(), "[a-c]"); } #[test] fn ensure_correct_string_representation_of_concatenation_1() { let config = RegExpConfig::new(); let literal1 = Expression::new_literal(GraphemeCluster::from("abc", &config), &config); let literal2 = Expression::new_literal(GraphemeCluster::from("def", &config), &config); let concatenation = Expression::new_concatenation(literal1, literal2, &config); assert_eq!(concatenation.to_string(), "abcdef"); } #[test] fn ensure_correct_string_representation_of_concatenation_2() { let config = RegExpConfig::new(); let literal1 = Expression::new_literal(GraphemeCluster::from("abc", &config), &config); let literal2 = Expression::new_literal(GraphemeCluster::from("def", &config), &config); let repetition = Expression::new_repetition(literal1, Quantifier::KleeneStar, &config); let concatenation = Expression::new_concatenation(repetition, literal2, &config); assert_eq!(concatenation.to_string(), "(?:abc)*def"); } #[test] fn ensure_correct_removal_of_prefix_in_literal() { let config = RegExpConfig::new(); let mut literal = Expression::new_literal(GraphemeCluster::from("abcdef", &config), &config); assert_eq!( literal.value(None), Some( vec!["a", "b", "c", "d", "e", "f"] .iter() .map(|&it| Grapheme::from( it, config.is_capturing_group_enabled, config.is_output_colorized, config.is_verbose_mode_enabled )) .collect_vec() ) ); literal.remove_substring(&Substring::Prefix, 2); assert_eq!( literal.value(None), Some( vec!["c", "d", "e", "f"] .iter() .map(|&it| Grapheme::from( it, config.is_capturing_group_enabled, config.is_output_colorized, config.is_verbose_mode_enabled )) .collect_vec() ) ); } #[test] fn ensure_correct_removal_of_suffix_in_literal() { let config = RegExpConfig::new(); let mut literal = Expression::new_literal(GraphemeCluster::from("abcdef", &config), &config); assert_eq!( literal.value(None), Some( vec!["a", "b", "c", "d", "e", "f"] .iter() .map(|&it| Grapheme::from( it, config.is_capturing_group_enabled, config.is_output_colorized, config.is_verbose_mode_enabled )) .collect_vec() ) ); literal.remove_substring(&Substring::Suffix, 2); assert_eq!( literal.value(None), Some( vec!["a", "b", "c", "d"] .iter() .map(|&it| Grapheme::from( it, config.is_capturing_group_enabled, config.is_output_colorized, config.is_verbose_mode_enabled )) .collect_vec() ) ); } #[test] fn ensure_correct_string_representation_of_repetition_1() { let config = RegExpConfig::new(); let literal = Expression::new_literal(GraphemeCluster::from("abc", &config), &config); let repetition = Expression::new_repetition(literal, Quantifier::KleeneStar, &config); assert_eq!(repetition.to_string(), "(?:abc)*"); } #[test] fn ensure_correct_string_representation_of_repetition_2() { let config = RegExpConfig::new(); let literal = Expression::new_literal(GraphemeCluster::from("a", &config), &config); let repetition = Expression::new_repetition(literal, Quantifier::QuestionMark, &config); assert_eq!(repetition.to_string(), "a?"); } } ================================================ FILE: src/format.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ use crate::char_range::CharRange; use crate::cluster::GraphemeCluster; use crate::component::Component; use crate::expression::Expression; use crate::quantifier::Quantifier; use itertools::Itertools; use std::collections::BTreeSet; use std::fmt::{Display, Formatter, Result}; impl Display for Expression<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> Result { match self { Expression::Alternation( options, is_capturing_group_enabled, is_output_colorized, is_verbose_mode_enabled, ) => format_alternation( f, self, options, *is_capturing_group_enabled, *is_output_colorized, *is_verbose_mode_enabled, ), Expression::CharacterClass(char_set, is_output_colorized) => { format_character_class(f, char_set, *is_output_colorized) } Expression::Concatenation( expr1, expr2, is_capturing_group_enabled, is_output_colorized, is_verbose_mode_enabled, ) => format_concatenation( f, self, expr1, expr2, *is_capturing_group_enabled, *is_output_colorized, *is_verbose_mode_enabled, ), Expression::Literal( cluster, is_non_ascii_char_escaped, is_astral_code_point_converted_to_surrogate, ) => format_literal( f, cluster, *is_non_ascii_char_escaped, *is_astral_code_point_converted_to_surrogate, ), Expression::Repetition( expr, quantifier, is_capturing_group_enabled, is_output_colorized, is_verbose_mode_enabled, ) => format_repetition( f, self, expr, quantifier, *is_capturing_group_enabled, *is_output_colorized, *is_verbose_mode_enabled, ), } } } fn get_codepoint_position(c: char) -> usize { CharRange::all().position(|it| it == c).unwrap() } fn format_alternation( f: &mut Formatter<'_>, expr: &Expression, options: &[Expression], is_capturing_group_enabled: bool, is_output_colorized: bool, is_verbose_mode_enabled: bool, ) -> Result { let pipe_component = Component::Pipe.to_repr(is_output_colorized); let disjunction_operator = if is_verbose_mode_enabled { format!("\n{}\n", pipe_component) } else { pipe_component }; let alternation_str = options .iter() .map(|option| { if option.precedence() < expr.precedence() && !option.is_single_codepoint() { if is_capturing_group_enabled { Component::CapturedParenthesizedExpression( option.to_string(), is_verbose_mode_enabled, true, ) .to_repr(is_output_colorized) } else { Component::UncapturedParenthesizedExpression( option.to_string(), is_verbose_mode_enabled, true, ) .to_repr(is_output_colorized) } } else { format!("{}", option) } }) .join(&disjunction_operator); write!(f, "{}", alternation_str) } fn format_character_class( f: &mut Formatter<'_>, char_set: &BTreeSet, is_output_colorized: bool, ) -> Result { let chars_to_escape = ['[', ']', '\\', '-', '^', '$']; let escaped_char_set = char_set .iter() .map(|c| { if chars_to_escape.contains(c) { format!("{}{}", "\\", c) } else if c == &'\n' { "\\n".to_string() } else if c == &'\r' { "\\r".to_string() } else if c == &'\t' { "\\t".to_string() } else { c.to_string() } }) .collect_vec(); let char_positions = char_set .iter() .map(|&it| get_codepoint_position(it)) .collect_vec(); let mut subsets = vec![]; let mut subset = vec![]; for ((first_c, first_pos), (second_c, second_pos)) in escaped_char_set.iter().zip(char_positions).tuple_windows() { if subset.is_empty() { subset.push(first_c); } if second_pos == first_pos + 1 { subset.push(second_c); } else { subsets.push(subset); subset = vec![second_c]; } } subsets.push(subset); let mut char_class_strs = vec![]; for subset in subsets.iter() { if subset.len() <= 2 { for c in subset.iter() { char_class_strs.push((*c).to_string()); } } else { char_class_strs.push(format!( "{}{}{}", subset.first().unwrap(), Component::Hyphen.to_repr(is_output_colorized), subset.last().unwrap() )); } } write!( f, "{}{}{}", Component::LeftBracket.to_repr(is_output_colorized), char_class_strs.join(""), Component::RightBracket.to_repr(is_output_colorized) ) } fn format_concatenation( f: &mut Formatter<'_>, expr: &Expression, expr1: &Expression, expr2: &Expression, is_capturing_group_enabled: bool, is_output_colorized: bool, is_verbose_mode_enabled: bool, ) -> Result { let expr_strs = [expr1, expr2] .iter() .map(|&it| { if it.precedence() < expr.precedence() && !it.is_single_codepoint() { if is_capturing_group_enabled { Component::CapturedParenthesizedExpression( it.to_string(), is_verbose_mode_enabled, true, ) .to_repr(is_output_colorized) } else { Component::UncapturedParenthesizedExpression( it.to_string(), is_verbose_mode_enabled, true, ) .to_repr(is_output_colorized) } } else { format!("{}", it) } }) .collect_vec(); write!( f, "{}{}", expr_strs.first().unwrap(), expr_strs.last().unwrap() ) } fn format_literal( f: &mut Formatter<'_>, cluster: &GraphemeCluster, is_non_ascii_char_escaped: bool, is_astral_code_point_converted_to_surrogate: bool, ) -> Result { let literal_str = cluster .graphemes() .iter() .cloned() .map(|mut grapheme| { if grapheme.has_repetitions() { grapheme .repetitions_mut() .iter_mut() .for_each(|repeated_grapheme| { repeated_grapheme.escape_regexp_symbols( is_non_ascii_char_escaped, is_astral_code_point_converted_to_surrogate, ); }); } else { grapheme.escape_regexp_symbols( is_non_ascii_char_escaped, is_astral_code_point_converted_to_surrogate, ); } grapheme.to_string() }) .join(""); write!(f, "{}", literal_str) } fn format_repetition( f: &mut Formatter<'_>, expr: &Expression, expr1: &Expression, quantifier: &Quantifier, is_capturing_group_enabled: bool, is_output_colorized: bool, is_verbose_mode_enabled: bool, ) -> Result { if expr1.precedence() < expr.precedence() && !expr1.is_single_codepoint() { if is_capturing_group_enabled { write!( f, "{}{}", Component::CapturedParenthesizedExpression( expr1.to_string(), is_verbose_mode_enabled, false ) .to_repr(is_output_colorized), Component::Quantifier(quantifier.clone(), is_verbose_mode_enabled) .to_repr(is_output_colorized) ) } else { write!( f, "{}{}", Component::UncapturedParenthesizedExpression( expr1.to_string(), is_verbose_mode_enabled, false ) .to_repr(is_output_colorized), Component::Quantifier(quantifier.clone(), is_verbose_mode_enabled) .to_repr(is_output_colorized) ) } } else { write!( f, "{}{}", expr1, Component::Quantifier(quantifier.clone(), is_verbose_mode_enabled) .to_repr(is_output_colorized) ) } } ================================================ FILE: src/grapheme.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ use crate::component::Component; use itertools::Itertools; use std::fmt::{Display, Formatter, Result}; const CHARS_TO_ESCAPE: [&str; 14] = [ "(", ")", "[", "]", "{", "}", "+", "*", "-", ".", "?", "|", "^", "$", ]; const CHAR_CLASSES: [&str; 6] = ["\\d", "\\s", "\\w", "\\D", "\\S", "\\W"]; #[derive(Clone, Debug, Hash, Ord, PartialOrd, Eq, PartialEq)] pub(crate) struct Grapheme { pub(crate) chars: Vec, pub(crate) repetitions: Vec, min: u32, max: u32, is_capturing_group_enabled: bool, is_output_colorized: bool, is_verbose_mode_enabled: bool, } impl Grapheme { pub(crate) fn from( s: &str, is_capturing_group_enabled: bool, is_output_colorized: bool, is_verbose_mode_enabled: bool, ) -> Self { Self { chars: vec![s.to_string()], repetitions: vec![], min: 1, max: 1, is_capturing_group_enabled, is_output_colorized, is_verbose_mode_enabled, } } pub(crate) fn new( chars: Vec, min: u32, max: u32, is_capturing_group_enabled: bool, is_output_colorized: bool, is_verbose_mode_enabled: bool, ) -> Self { Self { chars, repetitions: vec![], min, max, is_capturing_group_enabled, is_output_colorized, is_verbose_mode_enabled, } } pub(crate) fn value(&self) -> String { self.chars.join("") } pub(crate) fn chars(&self) -> &Vec { &self.chars } pub(crate) fn chars_mut(&mut self) -> &mut Vec { &mut self.chars } pub(crate) fn has_repetitions(&self) -> bool { !self.repetitions.is_empty() } pub(crate) fn repetitions_mut(&mut self) -> &mut Vec { &mut self.repetitions } pub(crate) fn minimum(&self) -> u32 { self.min } pub(crate) fn maximum(&self) -> u32 { self.max } pub(crate) fn char_count(&self, is_non_ascii_char_escaped: bool) -> usize { if is_non_ascii_char_escaped { self.chars .iter() .map(|it| it.chars().map(|c| self.escape(c, false)).join("")) .join("") .chars() .count() } else { self.chars.iter().map(|it| it.chars().count()).sum() } } pub(crate) fn escape_non_ascii_chars(&mut self, use_surrogate_pairs: bool) { self.chars = self .chars .iter() .map(|it| { it.chars() .map(|c| self.escape(c, use_surrogate_pairs)) .join("") }) .collect_vec(); } pub(crate) fn escape_regexp_symbols( &mut self, is_non_ascii_char_escaped: bool, is_astral_code_point_converted_to_surrogate: bool, ) { let characters = self.chars_mut(); #[allow(clippy::needless_range_loop)] for i in 0..characters.len() { let mut character = characters[i].clone(); for char_to_escape in CHARS_TO_ESCAPE.iter() { character = character.replace(char_to_escape, &format!("{}{}", "\\", char_to_escape)); } character = character .replace('\n', "\\n") .replace('\r', "\\r") .replace('\t', "\\t"); if character == "\\" { character = "\\\\".to_string(); } characters[i] = character; } if is_non_ascii_char_escaped { self.escape_non_ascii_chars(is_astral_code_point_converted_to_surrogate); } } fn escape(&self, c: char, use_surrogate_pairs: bool) -> String { if c.is_ascii() { c.to_string() } else if use_surrogate_pairs && ('\u{10000}'..'\u{10ffff}').contains(&c) { self.convert_to_surrogate_pair(c) } else { c.escape_unicode().to_string() } } fn convert_to_surrogate_pair(&self, c: char) -> String { c.encode_utf16(&mut [0; 2]) .iter() .map(|it| format!("\\u{{{:x}}}", it)) .join("") } } impl Display for Grapheme { fn fmt(&self, f: &mut Formatter<'_>) -> Result { let is_single_char = self.char_count(false) == 1 || (self.chars.len() == 1 && self.chars[0].matches('\\').count() == 1); let is_range = self.min < self.max; let is_repetition = self.min > 1; let mut value = if self.repetitions.is_empty() { self.value() } else { self.repetitions.iter().map(|it| it.to_string()).join("") }; value = Component::CharClass(value.clone()) .to_repr(self.is_output_colorized && CHAR_CLASSES.contains(&&*value)); if !is_range && is_repetition && is_single_char { write!( f, "{}{}", value, Component::Repetition(self.min, false).to_repr(self.is_output_colorized) ) } else if !is_range && is_repetition && !is_single_char { write!( f, "{}{}", if self.is_capturing_group_enabled { Component::CapturedParenthesizedExpression( value, self.is_verbose_mode_enabled, false, ) .to_repr(self.is_output_colorized) } else { Component::UncapturedParenthesizedExpression( value, self.is_verbose_mode_enabled, false, ) .to_repr(self.is_output_colorized) }, Component::Repetition(self.min, self.is_verbose_mode_enabled) .to_repr(self.is_output_colorized) ) } else if is_range && is_single_char { write!( f, "{}{}", value, Component::RepetitionRange(self.min, self.max, false) .to_repr(self.is_output_colorized) ) } else if is_range && !is_single_char { write!( f, "{}{}", if self.is_capturing_group_enabled { Component::CapturedParenthesizedExpression( value, self.is_verbose_mode_enabled, false, ) .to_repr(self.is_output_colorized) } else { Component::UncapturedParenthesizedExpression( value, self.is_verbose_mode_enabled, false, ) .to_repr(self.is_output_colorized) }, Component::RepetitionRange(self.min, self.max, self.is_verbose_mode_enabled) .to_repr(self.is_output_colorized) ) } else { write!(f, "{}", value) } } } ================================================ FILE: src/lib.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ //! ## 1. What does this tool do? //! //! *grex* is a library as well as a command-line utility that is meant to simplify the often //! complicated and tedious task of creating regular expressions. It does so by automatically //! generating a single regular expression from user-provided test cases. The resulting //! expression is guaranteed to match the test cases which it was generated from. //! //! This project has started as a Rust port of the JavaScript tool //! [*regexgen*](https://github.com/devongovett/regexgen) written by //! [Devon Govett](https://github.com/devongovett). Although a lot of further useful features //! could be added to it, its development was apparently ceased several years ago. The plan //! is now to add these new features to *grex* as Rust really shines when it comes to //! command-line tools. *grex* offers all features that *regexgen* provides, and more. //! //! The philosophy of this project is to generate the most specific regular expression //! possible by default which exactly matches the given input only and nothing else. //! With the use of command-line flags (in the CLI tool) or preprocessing methods //! (in the library), more generalized expressions can be created. //! //! The produced expressions are [Perl-compatible regular expressions](https://www.pcre.org) //! which are also compatible with the regular expression parser in Rust's //! [*regex crate*](https://crates.io/crates/regex). //! Other regular expression parsers or respective libraries from other programming languages //! have not been tested so far, but they ought to be mostly compatible as well. //! //! ## 2. Do I still need to learn to write regexes then? //! //! **Definitely, yes!** Using the standard settings, *grex* produces a regular expression that //! is guaranteed to match only the test cases given as input and nothing else. This has been //! verified by [property tests](https://github.com/pemistahl/grex/blob/main/tests/property_tests.rs). //! However, if the conversion to shorthand character classes such as `\w` is enabled, the //! resulting regex matches a much wider scope of test cases. Knowledge about the consequences of //! this conversion is essential for finding a correct regular expression for your business domain. //! //! *grex* uses an algorithm that tries to find the shortest possible regex for the given test cases. //! Very often though, the resulting expression is still longer or more complex than it needs to be. //! In such cases, a more compact or elegant regex can be created only by hand. //! Also, every regular expression engine has different built-in optimizations. //! *grex* does not know anything about those and therefore cannot optimize its regexes //! for a specific engine. //! //! **So, please learn how to write regular expressions!** The currently best use case for *grex* //! is to find an initial correct regex which should be inspected by hand if further optimizations //! are possible. //! //! ## 3. Current features //! //! - literals //! - character classes //! - detection of common prefixes and suffixes //! - detection of repeated substrings and conversion to `{min,max}` quantifier notation //! - alternation using `|` operator //! - optionality using `?` quantifier //! - escaping of non-ascii characters, with optional conversion of astral code points to surrogate pairs //! - case-sensitive or case-insensitive matching //! - capturing or non-capturing groups //! - optional anchors `^` and `$` //! - fully compliant to [Unicode Standard 15.0](https://unicode.org/versions/Unicode15.0.0) //! - fully compatible with [*regex* crate 1.9.0+](https://crates.io/crates/regex) //! - correctly handles graphemes consisting of multiple Unicode symbols //! - reads input strings from the command-line or from a file //! - produces more readable expressions indented on multiple using optional verbose mode //! //! ## 4. How to use? //! //! The code snippets below show how to use the public api. //! //! For [more detailed examples](https://github.com/pemistahl/grex/tree/main#53-examples), please //! take a look at the project's readme file on GitHub. //! //! ### 4.1 Default settings //! //! Test cases are passed either from a collection via [`RegExpBuilder::from()`] //! or from a file via [`RegExpBuilder::from_file()`]. //! //! ``` //! use grex::RegExpBuilder; //! //! let regexp = RegExpBuilder::from(&["a", "aa", "aaa"]).build(); //! assert_eq!(regexp, "^a(?:aa?)?$"); //! ``` //! //! ### 4.2 Convert to character classes //! //! ``` //! use grex::RegExpBuilder; //! //! let regexp = RegExpBuilder::from(&["a", "aa", "123"]) //! .with_conversion_of_digits() //! .with_conversion_of_words() //! .build(); //! assert_eq!(regexp, "^(?:\\d\\d\\d|\\w(?:\\w)?)$"); //! ``` //! //! ### 4.3 Convert repeated substrings //! //! ``` //! use grex::RegExpBuilder; //! //! let regexp = RegExpBuilder::from(&["aa", "bcbc", "defdefdef"]) //! .with_conversion_of_repetitions() //! .build(); //! assert_eq!(regexp, "^(?:a{2}|(?:bc){2}|(?:def){3})$"); //! ``` //! //! By default, *grex* converts each substring this way which is at least a single character long //! and which is subsequently repeated at least once. You can customize these two parameters //! if you like. //! //! In the following example, the test case `aa` is not converted to `a{2}` because the repeated //! substring `a` has a length of 1, but the minimum substring length has been set to 2. //! //! ``` //! use grex::RegExpBuilder; //! //! let regexp = RegExpBuilder::from(&["aa", "bcbc", "defdefdef"]) //! .with_conversion_of_repetitions() //! .with_minimum_substring_length(2) //! .build(); //! assert_eq!(regexp, "^(?:aa|(?:bc){2}|(?:def){3})$"); //! ``` //! //! Setting a minimum number of 2 repetitions in the next example, only the test case `defdefdef` //! will be converted because it is the only one that is repeated twice. //! //! ``` //! use grex::RegExpBuilder; //! //! let regexp = RegExpBuilder::from(&["aa", "bcbc", "defdefdef"]) //! .with_conversion_of_repetitions() //! .with_minimum_repetitions(2) //! .build(); //! assert_eq!(regexp, "^(?:bcbc|aa|(?:def){3})$"); //! ``` //! //! ### 4.4 Escape non-ascii characters //! //! ``` //! use grex::RegExpBuilder; //! //! let regexp = RegExpBuilder::from(&["You smell like 💩."]) //! .with_escaping_of_non_ascii_chars(false) //! .build(); //! assert_eq!(regexp, "^You smell like \\u{1f4a9}\\.$"); //! ``` //! //! Old versions of JavaScript do not support unicode escape sequences for //! the astral code planes (range `U+010000` to `U+10FFFF`). In order to //! support these symbols in JavaScript regular expressions, the conversion //! to surrogate pairs is necessary. More information on that matter can be //! found [here](https://mathiasbynens.be/notes/javascript-unicode). //! //! ``` //! use grex::RegExpBuilder; //! //! let regexp = RegExpBuilder::from(&["You smell like 💩."]) //! .with_escaping_of_non_ascii_chars(true) //! .build(); //! assert_eq!(regexp, "^You smell like \\u{d83d}\\u{dca9}\\.$"); //! ``` //! //! ### 4.5 Case-insensitive matching //! //! The regular expressions that *grex* generates are case-sensitive by default. //! Case-insensitive matching can be enabled like so: //! //! ``` //! use grex::RegExpBuilder; //! //! let regexp = RegExpBuilder::from(&["big", "BIGGER"]) //! .with_case_insensitive_matching() //! .build(); //! assert_eq!(regexp, "(?i)^big(?:ger)?$"); //! ``` //! //! ### 4.6 Capturing Groups //! //! Non-capturing groups are used by default. //! Extending the previous example, you can switch to capturing groups instead. //! //! ``` //! use grex::RegExpBuilder; //! //! let regexp = RegExpBuilder::from(&["big", "BIGGER"]) //! .with_case_insensitive_matching() //! .with_capturing_groups() //! .build(); //! assert_eq!(regexp, "(?i)^big(ger)?$"); //! ``` //! //! ### 4.7 Verbose mode //! //! If you find the generated regular expression hard to read, you can enable verbose mode. //! The expression is then put on multiple lines and indented to make it more pleasant to the eyes. //! //! ``` //! use grex::RegExpBuilder; //! use indoc::indoc; //! //! let regexp = RegExpBuilder::from(&["a", "b", "bcd"]) //! .with_verbose_mode() //! .build(); //! //! assert_eq!(regexp, indoc!( //! r#" //! (?x) //! ^ //! (?: //! b //! (?: //! cd //! )? //! | //! a //! ) //! $"# //! )); //! ``` //! //! ### 4.8 Disable anchors //! //! By default, the anchors `^` and `$` are put around every generated regular expression in order //! to ensure that it matches only the test cases given as input. Often enough, however, it is //! desired to use the generated pattern as part of a larger one. For this purpose, the anchors //! can be disabled, either separately or both of them. //! //! ``` //! use grex::RegExpBuilder; //! //! let regexp = RegExpBuilder::from(&["a", "aa", "aaa"]) //! .without_anchors() //! .build(); //! assert_eq!(regexp, "a(?:aa?)?"); //! ``` //! //! ### 5. How does it work? //! //! 1. A [deterministic finite automaton](https://en.wikipedia.org/wiki/Deterministic_finite_automaton) (DFA) //! is created from the input strings. //! //! 2. The number of states and transitions between states in the DFA is reduced by applying //! [Hopcroft's DFA minimization algorithm](https://en.wikipedia.org/wiki/DFA_minimization#Hopcroft.27s_algorithm). //! //! 3. The minimized DFA is expressed as a system of linear equations which are solved with //! [Brzozowski's algebraic method](http://cs.stackexchange.com/questions/2016/how-to-convert-finite-automata-to-regular-expressions#2392), //! resulting in the final regular expression. #[macro_use] mod macros; mod builder; mod char_range; mod cluster; mod component; mod config; mod dfa; mod expression; mod format; mod grapheme; mod quantifier; mod regexp; mod substring; mod unicode_tables; #[cfg(feature = "python")] mod python; #[cfg(target_family = "wasm")] mod wasm; pub use builder::RegExpBuilder; #[cfg(target_family = "wasm")] pub use wasm::RegExpBuilder as WasmRegExpBuilder; ================================================ FILE: src/macros.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ macro_rules! btreeset { ( $( $value: expr ),* ) => {{ let mut set = std::collections::BTreeSet::new(); $( set.insert($value); )* set }}; } ================================================ FILE: src/main.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #[cfg(not(target_family = "wasm"))] mod cli { use clap::ArgAction; use clap::Parser; use grex::RegExpBuilder; use itertools::Itertools; use std::io::{stdin, BufRead, Error, ErrorKind, IsTerminal, Read}; use std::path::PathBuf; #[derive(Parser)] #[command( author = "© 2019-today Peter M. Stahl ", about = "Licensed under the Apache License, Version 2.0\n\ Downloadable from https://crates.io/crates/grex\n\ Source code at https://github.com/pemistahl/grex\n\n\ grex generates regular expressions from user-provided test cases.", version, override_usage = "grex [OPTIONS] {INPUT...|--file }", help_template = "{name} {version}\n{author}\n{about}\n\n{usage-heading} {usage}\n\n{all-args}", disable_help_flag = true, disable_version_flag = true )] pub(crate) struct Cli { // -------------------- // INPUT // -------------------- /// One or more test cases separated by blank space /// /// Use a hyphen `-` to read test cases from standard input. /// /// Conflicts with --file. #[arg( value_name = "INPUT", allow_hyphen_values = true, required_unless_present = "file", conflicts_with = "file", help_heading = "Input", display_order = 1 )] input: Vec, /// Reads test cases on separate lines from a file. /// /// Lines may be ended with either a newline `\n` or a carriage return with a line feed `\r\n`. /// The final line ending is optional. /// /// Use a hyphen `-` to read the filename from standard input. /// /// Conflicts with INPUT... #[arg( name = "file", value_name = "FILE", short, long, required_unless_present = "input", help_heading = "Input", display_order = 2 )] file_path: Option, // -------------------- // DIGIT OPTIONS // -------------------- /// Converts any Unicode decimal digit to \d. /// /// Takes precedence over --words if both are set. /// Decimal digits are converted to \d, remaining word characters to \w. /// /// Takes precedence over --non-spaces if both are set. /// Decimal digits are converted to \d, remaining non-space characters to \S. #[arg(name = "digits", short, long, help_heading = "Digit Options")] is_digit_converted: bool, /// Converts any character which is not a Unicode decimal digit to \D. /// /// Takes precedence over --non-words if both are set. /// Non-digits which are also non-word characters are converted to \D. /// /// Takes precedence over --non-spaces if both are set. /// Non-digits which are also non-space characters are converted to \D. #[arg(name = "non-digits", short = 'D', long, help_heading = "Digit Options")] is_non_digit_converted: bool, // -------------------- // WHITESPACE OPTIONS // -------------------- /// Converts any Unicode whitespace character to \s. /// /// Takes precedence over --non-digits if both are set. /// Whitespace is converted to \s, remaining non-digits to \D. /// /// Takes precedence over --non-words if both are set. /// Whitespace is converted to \s, remaining non-word characters to \W. #[arg(name = "spaces", short, long, help_heading = "Whitespace Options")] is_space_converted: bool, /// Converts any character which is not a Unicode whitespace character to \S #[arg( name = "non-spaces", short = 'S', long, help_heading = "Whitespace Options" )] is_non_space_converted: bool, // -------------------- // WORD OPTIONS // -------------------- /// Converts any Unicode word character to \w. /// /// Takes precedence over --non-digits if both are set. /// Word characters are converted to \w, remaining non-digits to \D. /// /// Takes precedence over --non-spaces if both are set. /// Word characters are converted to \w, remaining non-whitespace to \S. #[arg(name = "words", short, long, help_heading = "Word Options")] is_word_converted: bool, /// Converts any character which is not a Unicode word character to \W. /// /// Takes precedence over --non-spaces if both are set. /// Non-word characters which are also non-whitespace are converted to \W. #[arg(name = "non-words", short = 'W', long, help_heading = "Word Options")] is_non_word_converted: bool, // -------------------- // ESCAPING OPTIONS // -------------------- /// Replaces all non-ASCII characters with unicode escape sequences. #[arg(name = "escape", short, long, help_heading = "Escaping Options")] is_non_ascii_char_escaped: bool, /// Converts astral code points to surrogate pairs if --escape is set. #[arg( name = "with-surrogates", long, requires = "escape", help_heading = "Escaping Options" )] is_astral_code_point_converted_to_surrogate: bool, // -------------------- // REPETITION OPTIONS // -------------------- /// Detects repeated non-overlapping substrings and converts them to {min,max} quantifier notation. #[arg( name = "repetitions", short, long, help_heading = "Repetition Options", display_order = 1 )] is_repetition_converted: bool, /// Specifies the minimum quantity of substring repetitions to be converted if --repetitions is set. #[arg( name = "min-repetitions", value_name = "QUANTITY", long, default_value_t = 1, value_parser = repetition_options_parser, help_heading = "Repetition Options" )] minimum_repetitions: u32, /// Specifies the minimum length a repeated substring must have /// in order to be converted if --repetitions is set. #[arg( name = "min-substring-length", value_name = "LENGTH", long, default_value_t = 1, value_parser = repetition_options_parser, help_heading = "Repetition Options" )] minimum_substring_length: u32, // -------------------- // ANCHOR OPTIONS // -------------------- /// Removes the caret anchor `^` from the resulting regular expression. /// /// By default, the caret anchor is added to every generated regular expression /// which guarantees that the expression matches the test cases /// given as input only at the start of a string. /// /// This flag removes the anchor, thereby allowing to match the test cases /// also when they do not occur at the start of a string. #[arg(name = "no-start-anchor", long, help_heading = "Anchor Options")] is_caret_anchor_disabled: bool, /// Removes the dollar sign anchor `$` from the resulting regular expression. /// /// By default, the dollar sign anchor is added to every generated regular expression /// which guarantees that the expression matches the test cases given as input /// only at the end of a string. /// /// This flag removes the anchor, thereby allowing to match the test cases /// also when they do not occur at the end of a string. #[arg(name = "no-end-anchor", long, help_heading = "Anchor Options")] is_dollar_sign_anchor_disabled: bool, /// Removes the caret and dollar sign anchors from the resulting regular expression. /// /// By default, anchors are added to every generated regular expression /// which guarantees that the expression exactly matches only the test cases given as input /// and nothing else. /// /// This flag removes the anchors, thereby allowing to match the test cases /// also when they occur within a larger string that contains other content as well. #[arg(name = "no-anchors", long, help_heading = "Anchor Options")] are_anchors_disabled: bool, // -------------------- // DISPLAY OPTIONS // -------------------- /// Produces a nicer-looking regular expression in verbose mode. #[arg( name = "verbose", short = 'x', long, help_heading = "Display Options", display_order = 1 )] is_verbose_mode_enabled: bool, /// Provides syntax highlighting for the resulting regular expression. #[arg(name = "colorize", short, long, help_heading = "Display Options")] is_output_colorized: bool, // --------------------- // MISCELLANEOUS OPTIONS // --------------------- /// Performs case-insensitive matching, letters match both upper and lower case. #[arg( name = "ignore-case", short, long, help_heading = "Miscellaneous Options", display_order = 1 )] is_case_ignored: bool, /// Replaces non-capturing groups with capturing ones. #[arg( name = "capture-groups", short = 'g', long, help_heading = "Miscellaneous Options", display_order = 2 )] is_group_captured: bool, /// Prints help information #[arg( name = "help", short = 'h', long, action = ArgAction::Help, help_heading = "Miscellaneous Options", display_order = 3 )] help: Option, /// Prints version information #[arg( name = "version", short = 'v', long, action = ArgAction::Version, help_heading = "Miscellaneous Options", display_order = 4 )] version: Option, } pub(crate) fn obtain_input(cli: &Cli) -> Result, Error> { let is_stdin_available = !stdin().is_terminal(); if !cli.input.is_empty() { let is_single_item = cli.input.len() == 1; let is_hyphen = cli.input.first().unwrap() == "-"; if is_single_item && is_hyphen && is_stdin_available { Ok(stdin() .lock() .lines() .map(|line| line.unwrap()) .collect_vec()) } else { Ok(cli.input.clone()) } } else if let Some(file_path) = &cli.file_path { let is_hyphen = file_path.as_os_str() == "-"; let path = if is_hyphen && is_stdin_available { let mut stdin_file_path = String::new(); stdin().read_to_string(&mut stdin_file_path)?; PathBuf::from(stdin_file_path.trim()) } else { file_path.to_path_buf() }; match std::fs::read_to_string(path) { Ok(file_content) => Ok(file_content.lines().map(|it| it.to_string()).collect_vec()), Err(error) => Err(error), } } else { Err(Error::new( ErrorKind::InvalidInput, "error: no valid input could be found whatsoever", )) } } pub(crate) fn handle_input( cli: &Cli, input: Result, Error>, ) -> Result<(), Box> { match input { Ok(test_cases) => { let mut builder = RegExpBuilder::from(&test_cases); if cli.is_digit_converted { builder.with_conversion_of_digits(); } if cli.is_non_digit_converted { builder.with_conversion_of_non_digits(); } if cli.is_space_converted { builder.with_conversion_of_whitespace(); } if cli.is_non_space_converted { builder.with_conversion_of_non_whitespace(); } if cli.is_word_converted { builder.with_conversion_of_words(); } if cli.is_non_word_converted { builder.with_conversion_of_non_words(); } if cli.is_repetition_converted { builder.with_conversion_of_repetitions(); } if cli.is_case_ignored { builder.with_case_insensitive_matching(); } if cli.is_group_captured { builder.with_capturing_groups(); } if cli.is_non_ascii_char_escaped { builder.with_escaping_of_non_ascii_chars( cli.is_astral_code_point_converted_to_surrogate, ); } if cli.is_verbose_mode_enabled { builder.with_verbose_mode(); } if cli.is_caret_anchor_disabled { builder.without_start_anchor(); } if cli.is_dollar_sign_anchor_disabled { builder.without_end_anchor(); } if cli.are_anchors_disabled { builder.without_anchors(); } if cli.is_output_colorized { builder.with_syntax_highlighting(); } builder .with_minimum_repetitions(cli.minimum_repetitions) .with_minimum_substring_length(cli.minimum_substring_length); let regexp = builder.build(); println!("{}", regexp); Ok(()) } Err(error) => match error.kind() { ErrorKind::NotFound => Err("error: the specified file could not be found".into()), ErrorKind::InvalidData => { Err("error: the specified file's encoding is not valid UTF-8".into()) } ErrorKind::PermissionDenied => { Err("permission denied: the specified file could not be opened".into()) } _ => Err(format!("error: {}", error).into()), }, } } fn repetition_options_parser(value: &str) -> Result { match value.parse::() { Ok(parsed_value) => { if parsed_value > 0 { Ok(parsed_value) } else { Err(String::from("Value must not be zero")) } } Err(_) => Err(String::from("Value is not a valid unsigned integer")), } } } #[cfg(not(target_family = "wasm"))] fn main() { use clap::Parser; let cli = cli::Cli::parse(); if let Err(e) = cli::handle_input(&cli, cli::obtain_input(&cli)) { eprintln!("{}", e); std::process::exit(1); } } #[cfg(target_family = "wasm")] fn main() {} ================================================ FILE: src/python.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ use crate::builder::{ RegExpBuilder, MINIMUM_REPETITIONS_MESSAGE, MINIMUM_SUBSTRING_LENGTH_MESSAGE, MISSING_TEST_CASES_MESSAGE, }; use crate::config::RegExpConfig; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::types::PyType; use regex::{Captures, Regex}; use std::sync::LazyLock; #[pymodule] fn grex(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; Ok(()) } #[pymethods] impl RegExpBuilder { #[new] fn new(test_cases: Vec) -> PyResult { if test_cases.is_empty() { Err(PyValueError::new_err(MISSING_TEST_CASES_MESSAGE)) } else { Ok(Self { test_cases, config: RegExpConfig::new(), }) } } /// Specify the test cases to build the regular expression from. /// /// The test cases need not be sorted because `RegExpBuilder` sorts them internally. /// /// Args: /// test_cases (list[str]): The list of test cases /// /// Raises: /// ValueError: if `test_cases` is empty #[classmethod] fn from_test_cases(_cls: &Bound, test_cases: Vec) -> PyResult { Self::new(test_cases) } /// Convert any Unicode decimal digit to character class `\d`. /// /// This method takes precedence over `with_conversion_of_words` if both are set. /// Decimal digits are converted to `\d`, the remaining word characters to `\w`. /// /// This method takes precedence over `with_conversion_of_non_whitespace` if both are set. /// Decimal digits are converted to `\d`, the remaining non-whitespace characters to `\S`. #[pyo3(name = "with_conversion_of_digits")] fn py_with_conversion_of_digits(mut self_: PyRefMut) -> PyRefMut { self_.config.is_digit_converted = true; self_ } /// Convert any character which is not a Unicode decimal digit to character class `\D`. /// /// This method takes precedence over `with_conversion_of_non_words` if both are set. /// Non-digits which are also non-word characters are converted to `\D`. /// /// This method takes precedence over `with_conversion_of_non_whitespace` if both are set. /// Non-digits which are also non-space characters are converted to `\D`. #[pyo3(name = "with_conversion_of_non_digits")] fn py_with_conversion_of_non_digits(mut self_: PyRefMut) -> PyRefMut { self_.config.is_non_digit_converted = true; self_ } /// Convert any Unicode whitespace character to character class `\s`. /// /// This method takes precedence over `with_conversion_of_non_digits` if both are set. /// Whitespace characters are converted to `\s`, the remaining non-digit characters to `\D`. /// /// This method takes precedence over `with_conversion_of_non_words` if both are set. /// Whitespace characters are converted to `\s`, the remaining non-word characters to `\W`. #[pyo3(name = "with_conversion_of_whitespace")] fn py_with_conversion_of_whitespace(mut self_: PyRefMut) -> PyRefMut { self_.config.is_space_converted = true; self_ } /// Convert any character which is not a Unicode whitespace character to character class `\S`. #[pyo3(name = "with_conversion_of_non_whitespace")] fn py_with_conversion_of_non_whitespace(mut self_: PyRefMut) -> PyRefMut { self_.config.is_non_space_converted = true; self_ } /// Convert any Unicode word character to character class `\w`. /// /// This method takes precedence over `with_conversion_of_non_digits` if both are set. /// Word characters are converted to `\w`, the remaining non-digit characters to `\D`. /// /// This method takes precedence over `with_conversion_of_non_whitespace` if both are set. /// Word characters are converted to `\w`, the remaining non-space characters to `\S`. #[pyo3(name = "with_conversion_of_words")] fn py_with_conversion_of_words(mut self_: PyRefMut) -> PyRefMut { self_.config.is_word_converted = true; self_ } /// Convert any character which is not a Unicode word character to character class `\W`. /// /// This method takes precedence over `with_conversion_of_non_whitespace` if both are set. /// Non-words which are also non-space characters are converted to `\W`. #[pyo3(name = "with_conversion_of_non_words")] fn py_with_conversion_of_non_words(mut self_: PyRefMut) -> PyRefMut { self_.config.is_non_word_converted = true; self_ } /// Detect repeated non-overlapping substrings and convert them to `{min,max}` quantifier notation. #[pyo3(name = "with_conversion_of_repetitions")] fn py_with_conversion_of_repetitions(mut self_: PyRefMut) -> PyRefMut { self_.config.is_repetition_converted = true; self_ } /// Enable case-insensitive matching of test cases so that letters match both upper and lower case. #[pyo3(name = "with_case_insensitive_matching")] fn py_with_case_insensitive_matching(mut self_: PyRefMut) -> PyRefMut { self_.config.is_case_insensitive_matching = true; self_ } /// Replace non-capturing groups by capturing ones. #[pyo3(name = "with_capturing_groups")] fn py_with_capturing_groups(mut self_: PyRefMut) -> PyRefMut { self_.config.is_capturing_group_enabled = true; self_ } /// Specify the minimum quantity of substring repetitions to be converted if `with_conversion_of_repetitions` is set. /// /// If the quantity is not explicitly set with this method, a default value of 1 will be used. /// /// Args: /// quantity (int): The minimum quantity of substring repetitions /// /// Raises: /// ValueError: if `quantity` is zero #[pyo3(name = "with_minimum_repetitions")] fn py_with_minimum_repetitions( mut self_: PyRefMut, quantity: i32, ) -> PyResult> { if quantity <= 0 { Err(PyValueError::new_err(MINIMUM_REPETITIONS_MESSAGE)) } else { self_.config.minimum_repetitions = quantity as u32; Ok(self_) } } /// Specify the minimum length a repeated substring must have in order to be converted if `with_conversion_of_repetitions` is set. /// /// If the length is not explicitly set with this method, a default value of 1 will be used. /// /// Args: /// length (int): The minimum substring length /// /// Raises: /// ValueError: if `length` is zero #[pyo3(name = "with_minimum_substring_length")] fn py_with_minimum_substring_length( mut self_: PyRefMut, length: i32, ) -> PyResult> { if length <= 0 { Err(PyValueError::new_err(MINIMUM_SUBSTRING_LENGTH_MESSAGE)) } else { self_.config.minimum_substring_length = length as u32; Ok(self_) } } /// Convert non-ASCII characters to unicode escape sequences. /// /// The parameter `use_surrogate_pairs` specifies whether to convert astral code planes /// (range `U+010000` to `U+10FFFF`) to surrogate pairs. /// /// Args: /// use_surrogate_pairs (bool): Whether to convert astral code planes to surrogate pairs #[pyo3(name = "with_escaping_of_non_ascii_chars")] fn py_with_escaping_of_non_ascii_chars( mut self_: PyRefMut, use_surrogate_pairs: bool, ) -> PyRefMut { self_.config.is_non_ascii_char_escaped = true; self_.config.is_astral_code_point_converted_to_surrogate = use_surrogate_pairs; self_ } /// Produce a nicer looking regular expression in verbose mode. #[pyo3(name = "with_verbose_mode")] fn py_with_verbose_mode(mut self_: PyRefMut) -> PyRefMut { self_.config.is_verbose_mode_enabled = true; self_ } /// Remove the caret anchor '^' from the resulting regular expression, thereby allowing to /// match the test cases also when they do not occur at the start of a string. #[pyo3(name = "without_start_anchor")] fn py_without_start_anchor(mut self_: PyRefMut) -> PyRefMut { self_.config.is_start_anchor_disabled = true; self_ } /// Remove the dollar sign anchor '$' from the resulting regular expression, thereby allowing /// to match the test cases also when they do not occur at the end of a string. #[pyo3(name = "without_end_anchor")] fn py_without_end_anchor(mut self_: PyRefMut) -> PyRefMut { self_.config.is_end_anchor_disabled = true; self_ } /// Remove the caret and dollar sign anchors from the resulting regular expression, thereby /// allowing to match the test cases also when they occur within a larger string that contains /// other content as well. #[pyo3(name = "without_anchors")] fn py_without_anchors(mut self_: PyRefMut) -> PyRefMut { self_.config.is_start_anchor_disabled = true; self_.config.is_end_anchor_disabled = true; self_ } /// Build the actual regular expression using the previously given settings. #[pyo3(name = "build")] fn py_build(&mut self) -> String { let regexp = self.build(); if self.config.is_non_ascii_char_escaped { replace_unicode_escape_sequences(regexp) } else { regexp } } } /// Replaces Rust Unicode escape sequences with Python Unicode escape sequences. fn replace_unicode_escape_sequences(regexp: String) -> String { static FOUR_CHARS_ESCAPE_SEQUENCE: LazyLock = LazyLock::new(|| Regex::new(r"\\u\{([0-9a-f]{4})\}").unwrap()); static FIVE_CHARS_ESCAPE_SEQUENCE: LazyLock = LazyLock::new(|| Regex::new(r"\\u\{([0-9a-f]{5})\}").unwrap()); let mut replacement = FOUR_CHARS_ESCAPE_SEQUENCE .replace_all(®exp, |caps: &Captures| format!("\\u{}", &caps[1])) .to_string(); replacement = FIVE_CHARS_ESCAPE_SEQUENCE .replace_all(&replacement, |caps: &Captures| { format!("\\U000{}", &caps[1]) }) .to_string(); replacement } ================================================ FILE: src/quantifier.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ use std::fmt::{Display, Formatter, Result}; #[derive(Clone, Debug, Eq, PartialEq)] pub(crate) enum Quantifier { KleeneStar, QuestionMark, } impl Display for Quantifier { fn fmt(&self, f: &mut Formatter<'_>) -> Result { write!( f, "{}", match self { Quantifier::KleeneStar => '*', Quantifier::QuestionMark => '?', } ) } } ================================================ FILE: src/regexp.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ use crate::cluster::GraphemeCluster; use crate::component::Component; use crate::config::RegExpConfig; use crate::dfa::Dfa; use crate::expression::Expression; use itertools::Itertools; use regex::Regex; use std::cmp::Ordering; use std::fmt::{Display, Formatter, Result}; pub(crate) struct RegExp<'a> { ast: Expression<'a>, config: &'a RegExpConfig, } impl<'a> RegExp<'a> { pub(crate) fn from(test_cases: &'a mut Vec, config: &'a RegExpConfig) -> Self { if config.is_case_insensitive_matching { Self::convert_for_case_insensitive_matching(test_cases); } Self::sort(test_cases); let grapheme_clusters = Self::grapheme_clusters(test_cases, config); let mut dfa = Dfa::from(&grapheme_clusters, true, config); let mut ast = Expression::from(dfa, config); if config.is_start_anchor_disabled && config.is_end_anchor_disabled { let mut regex = Self::convert_expr_to_regex(&ast, config); if config.is_verbose_mode_enabled { // Remove line breaks before checking matches, otherwise check will be incorrect. regex = Regex::new(®ex.to_string().replace('\n', "")).unwrap(); } if !Self::is_each_test_case_matched_after_rotating_alternations( ®ex, &mut ast, test_cases, ) { dfa = Dfa::from(&grapheme_clusters, false, config); ast = Expression::from(dfa, config); regex = Self::convert_expr_to_regex(&ast, config); if !Self::regex_matches_all_test_cases(®ex, test_cases) { let mut exprs = vec![]; for cluster in grapheme_clusters { let literal = Expression::new_literal(cluster, config); exprs.push(literal); } ast = Expression::new_alternation(exprs, config); } } } Self { ast, config } } fn convert_for_case_insensitive_matching(test_cases: &mut Vec) { // Convert only those test cases to lowercase if // they keep their original number of characters. // Otherwise, "İ" -> "i\u{307}" would not match "İ". *test_cases = test_cases .iter() .map(|it| { let lower_test_case = it.to_lowercase(); if lower_test_case.chars().count() == it.chars().count() { lower_test_case } else { it.to_string() } }) .collect_vec(); } fn convert_expr_to_regex(expr: &Expression, config: &RegExpConfig) -> Regex { if config.is_output_colorized { let color_replace_regex = Regex::new("\u{1b}\\[(?:\\d+;\\d+|0)m").unwrap(); Regex::new(&color_replace_regex.replace_all(&expr.to_string(), "")).unwrap() } else { Regex::new(&expr.to_string()).unwrap() } } fn regex_matches_all_test_cases(regex: &Regex, test_cases: &[String]) -> bool { test_cases .iter() .all(|test_case| regex.find_iter(test_case).count() == 1) } fn sort(test_cases: &mut Vec) { test_cases.sort(); test_cases.dedup(); test_cases.sort_by(|a, b| match a.len().cmp(&b.len()) { Ordering::Equal => a.cmp(b), other => other, }); } fn grapheme_clusters( test_cases: &'a [String], config: &'a RegExpConfig, ) -> Vec> { let mut clusters = test_cases .iter() .map(|it| GraphemeCluster::from(it, config)) .collect_vec(); if config.is_char_class_feature_enabled() { for cluster in clusters.iter_mut() { cluster.convert_to_char_classes(); } } if config.is_repetition_converted { for cluster in clusters.iter_mut() { cluster.convert_repetitions(); } } clusters } fn is_each_test_case_matched_after_rotating_alternations( regex: &Regex, expr: &mut Expression, test_cases: &[String], ) -> bool { for _ in 1..test_cases.len() { if Self::regex_matches_all_test_cases(regex, test_cases) { return true; } else if let Expression::Alternation(options, _, _, _) = expr { options.rotate_right(1); } else if let Expression::Concatenation(first, second, _, _, _) = expr { let a: &mut Expression = first; let b: &mut Expression = second; if let Expression::Alternation(options, _, _, _) = a { options.rotate_right(1); } else if let Expression::Alternation(options, _, _, _) = b { options.rotate_right(1); } } } false } } impl Display for RegExp<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> Result { let flag = if self.config.is_case_insensitive_matching && self.config.is_verbose_mode_enabled { Component::IgnoreCaseAndVerboseModeFlag.to_repr(self.config.is_output_colorized) } else if self.config.is_case_insensitive_matching { Component::IgnoreCaseFlag.to_repr(self.config.is_output_colorized) } else if self.config.is_verbose_mode_enabled { Component::VerboseModeFlag.to_repr(self.config.is_output_colorized) } else { String::new() }; let caret = if self.config.is_start_anchor_disabled { String::new() } else { Component::Caret(self.config.is_verbose_mode_enabled) .to_repr(self.config.is_output_colorized) }; let dollar_sign = if self.config.is_end_anchor_disabled { String::new() } else { Component::DollarSign(self.config.is_verbose_mode_enabled) .to_repr(self.config.is_output_colorized) }; let mut regexp = match self.ast { Expression::Alternation(_, _, _, _) => { format!( "{}{}{}{}", flag, caret, if self.config.is_capturing_group_enabled { Component::CapturedParenthesizedExpression( self.ast.to_string(), self.config.is_verbose_mode_enabled, false, ) .to_repr(self.config.is_output_colorized) } else { Component::UncapturedParenthesizedExpression( self.ast.to_string(), self.config.is_verbose_mode_enabled, false, ) .to_repr(self.config.is_output_colorized) }, dollar_sign ) } _ => { format!("{}{}{}{}", flag, caret, self.ast, dollar_sign) } }; regexp = regexp .replace('\u{b}', "\\v") // U+000B Line Tabulation .replace('\u{c}', "\\f"); // U+000C Form Feed if self.config.is_verbose_mode_enabled { regexp = regexp .replace('#', "\\#") .replace( [ ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\u{85}', '\u{a0}', '\u{1680}', '\u{2000}', '\u{2001}', '\u{2002}', '\u{2003}', '\u{2004}', '\u{2005}', '\u{2006}', '\u{2007}', '\u{2008}', '\u{2009}', '\u{200a}', '\u{2028}', '\u{2029}', '\u{202f}', '\u{205f}', '\u{3000}', ], "\\s", ) .replace(' ', "\\ "); } write!( f, "{}", if self.config.is_verbose_mode_enabled { indent_regexp(regexp, self.config) } else { regexp } ) } } fn indent_regexp(regexp: String, config: &RegExpConfig) -> String { let mut indented_regexp = vec![]; let mut nesting_level = 0; for (i, line) in regexp.lines().enumerate() { if i == 1 && config.is_start_anchor_disabled { nesting_level += 1; } if line.is_empty() { continue; } let is_colored_line = line.starts_with("\u{1b}["); if nesting_level > 0 && ((is_colored_line && (line.contains('$') || line.contains(')'))) || (line == "$" || line.starts_with(')'))) { nesting_level -= 1; } let indentation = " ".repeat(nesting_level); indented_regexp.push(format!("{indentation}{line}")); if (is_colored_line && (line.contains('^') || (i > 0 && line.contains('(')))) || (line == "^" || (i > 0 && line.starts_with('('))) { nesting_level += 1; } } indented_regexp.join("\n") } ================================================ FILE: src/substring.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ pub(crate) enum Substring { Prefix, Suffix, } ================================================ FILE: src/unicode_tables/decimal.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // // ucd-generate general-category ucd-16.0.0 --chars --include decimalnumber // // Unicode version: 16.0.0. // // ucd-generate 0.3.1 is available on crates.io. pub const DECIMAL_NUMBER: &[(char, char)] = &[ ('0', '9'), ('٠', '٩'), ('۰', '۹'), ('߀', '߉'), ('०', '९'), ('০', '৯'), ('੦', '੯'), ('૦', '૯'), ('୦', '୯'), ('௦', '௯'), ('౦', '౯'), ('೦', '೯'), ('൦', '൯'), ('෦', '෯'), ('๐', '๙'), ('໐', '໙'), ('༠', '༩'), ('၀', '၉'), ('႐', '႙'), ('០', '៩'), ('᠐', '᠙'), ('᥆', '᥏'), ('᧐', '᧙'), ('᪀', '᪉'), ('᪐', '᪙'), ('᭐', '᭙'), ('᮰', '᮹'), ('᱀', '᱉'), ('᱐', '᱙'), ('꘠', '꘩'), ('꣐', '꣙'), ('꤀', '꤉'), ('꧐', '꧙'), ('꧰', '꧹'), ('꩐', '꩙'), ('꯰', '꯹'), ('0', '9'), ('𐒠', '𐒩'), ('𐴰', '𐴹'), ('𐵀', '𐵉'), ('𑁦', '𑁯'), ('𑃰', '𑃹'), ('𑄶', '𑄿'), ('𑇐', '𑇙'), ('𑋰', '𑋹'), ('𑑐', '𑑙'), ('𑓐', '𑓙'), ('𑙐', '𑙙'), ('𑛀', '𑛉'), ('𑛐', '𑛣'), ('𑜰', '𑜹'), ('𑣠', '𑣩'), ('𑥐', '𑥙'), ('𑯰', '𑯹'), ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𑶠', '𑶩'), ('𑽐', '𑽙'), ('𖄰', '𖄹'), ('𖩠', '𖩩'), ('𖫀', '𖫉'), ('𖭐', '𖭙'), ('𖵰', '𖵹'), ('𜳰', '𜳹'), ('𝟎', '𝟿'), ('𞅀', '𞅉'), ('𞋰', '𞋹'), ('𞓰', '𞓹'), ('𞗱', '𞗺'), ('𞥐', '𞥙'), ('🯰', '🯹'), ]; ================================================ FILE: src/unicode_tables/mod.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ mod decimal; mod space; mod word; pub use decimal::DECIMAL_NUMBER; pub use space::WHITE_SPACE; pub use word::WORD; ================================================ FILE: src/unicode_tables/space.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // // ucd-generate property-bool ucd-16.0.0 --chars --include whitespace // // Unicode version: 16.0.0. // // ucd-generate 0.3.1 is available on crates.io. pub const WHITE_SPACE: &[(char, char)] = &[ ('\t', '\r'), (' ', ' '), ('\u{85}', '\u{85}'), ('\u{a0}', '\u{a0}'), ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'), ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'), ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'), ]; ================================================ FILE: src/unicode_tables/word.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // // ucd-generate perl-word ucd-16.0.0 --chars // // Unicode version: 16.0.0. // // ucd-generate 0.3.1 is available on crates.io. pub const WORD: &[(char, char)] = &[ ('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'), ('º', 'º'), ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ˁ'), ('ˆ', 'ˑ'), ('ˠ', 'ˤ'), ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('\u{300}', 'ʹ'), ('Ͷ', 'ͷ'), ('ͺ', 'ͽ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ρ'), ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('\u{483}', 'ԯ'), ('Ա', 'Ֆ'), ('ՙ', 'ՙ'), ('ՠ', 'ֈ'), ('\u{591}', '\u{5bd}'), ('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'), ('\u{5c4}', '\u{5c5}'), ('\u{5c7}', '\u{5c7}'), ('א', 'ת'), ('ׯ', 'ײ'), ('\u{610}', '\u{61a}'), ('ؠ', '٩'), ('ٮ', 'ۓ'), ('ە', '\u{6dc}'), ('\u{6df}', '\u{6e8}'), ('\u{6ea}', 'ۼ'), ('ۿ', 'ۿ'), ('ܐ', '\u{74a}'), ('ݍ', 'ޱ'), ('߀', 'ߵ'), ('ߺ', 'ߺ'), ('\u{7fd}', '\u{7fd}'), ('ࠀ', '\u{82d}'), ('ࡀ', '\u{85b}'), ('ࡠ', 'ࡪ'), ('ࡰ', 'ࢇ'), ('ࢉ', 'ࢎ'), ('\u{897}', '\u{8e1}'), ('\u{8e3}', '\u{963}'), ('०', '९'), ('ॱ', 'ঃ'), ('অ', 'ঌ'), ('এ', 'ঐ'), ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'), ('শ', 'হ'), ('\u{9bc}', '\u{9c4}'), ('ে', 'ৈ'), ('ো', 'ৎ'), ('\u{9d7}', '\u{9d7}'), ('ড়', 'ঢ়'), ('য়', '\u{9e3}'), ('০', 'ৱ'), ('ৼ', 'ৼ'), ('\u{9fe}', '\u{9fe}'), ('\u{a01}', 'ਃ'), ('ਅ', 'ਊ'), ('ਏ', 'ਐ'), ('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'), ('\u{a3c}', '\u{a3c}'), ('ਾ', '\u{a42}'), ('\u{a47}', '\u{a48}'), ('\u{a4b}', '\u{a4d}'), ('\u{a51}', '\u{a51}'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'), ('੦', '\u{a75}'), ('\u{a81}', 'ઃ'), ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'), ('પ', 'ર'), ('લ', 'ળ'), ('વ', 'હ'), ('\u{abc}', '\u{ac5}'), ('\u{ac7}', 'ૉ'), ('ો', '\u{acd}'), ('ૐ', 'ૐ'), ('ૠ', '\u{ae3}'), ('૦', '૯'), ('ૹ', '\u{aff}'), ('\u{b01}', 'ଃ'), ('ଅ', 'ଌ'), ('ଏ', 'ଐ'), ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'), ('ଵ', 'ହ'), ('\u{b3c}', '\u{b44}'), ('େ', 'ୈ'), ('ୋ', '\u{b4d}'), ('\u{b55}', '\u{b57}'), ('ଡ଼', 'ଢ଼'), ('ୟ', '\u{b63}'), ('୦', '୯'), ('ୱ', 'ୱ'), ('\u{b82}', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'), ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'), ('ண', 'த'), ('ந', 'ப'), ('ம', 'ஹ'), ('\u{bbe}', 'ூ'), ('ெ', 'ை'), ('ொ', '\u{bcd}'), ('ௐ', 'ௐ'), ('\u{bd7}', '\u{bd7}'), ('௦', '௯'), ('\u{c00}', 'ఌ'), ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'), ('\u{c3c}', 'ౄ'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'), ('ౘ', 'ౚ'), ('ౝ', 'ౝ'), ('ౠ', '\u{c63}'), ('౦', '౯'), ('ಀ', 'ಃ'), ('ಅ', 'ಌ'), ('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('\u{cbc}', 'ೄ'), ('\u{cc6}', '\u{cc8}'), ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), ('೦', '೯'), ('ೱ', 'ೳ'), ('\u{d00}', 'ഌ'), ('എ', 'ഐ'), ('ഒ', '\u{d44}'), ('െ', 'ൈ'), ('ൊ', 'ൎ'), ('ൔ', '\u{d57}'), ('ൟ', '\u{d63}'), ('൦', '൯'), ('ൺ', 'ൿ'), ('\u{d81}', 'ඃ'), ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'), ('ල', 'ල'), ('ව', 'ෆ'), ('\u{dca}', '\u{dca}'), ('\u{dcf}', '\u{dd4}'), ('\u{dd6}', '\u{dd6}'), ('ෘ', '\u{ddf}'), ('෦', '෯'), ('ෲ', 'ෳ'), ('ก', '\u{e3a}'), ('เ', '\u{e4e}'), ('๐', '๙'), ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ຆ', 'ຊ'), ('ຌ', 'ຣ'), ('ລ', 'ລ'), ('ວ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('\u{ec8}', '\u{ece}'), ('໐', '໙'), ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), ('\u{f18}', '\u{f19}'), ('༠', '༩'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), ('\u{f39}', '\u{f39}'), ('༾', 'ཇ'), ('ཉ', 'ཬ'), ('\u{f71}', '\u{f84}'), ('\u{f86}', '\u{f97}'), ('\u{f99}', '\u{fbc}'), ('\u{fc6}', '\u{fc6}'), ('က', '၉'), ('ၐ', '\u{109d}'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'), ('ჼ', 'ቈ'), ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'), ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'), ('ኲ', 'ኵ'), ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'), ('ወ', 'ዖ'), ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'), ('\u{135d}', '\u{135f}'), ('ᎀ', 'ᎏ'), ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᐁ', 'ᙬ'), ('ᙯ', 'ᙿ'), ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), ('ᜀ', '\u{1715}'), ('ᜟ', '\u{1734}'), ('ᝀ', '\u{1753}'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('\u{1772}', '\u{1773}'), ('ក', '\u{17d3}'), ('ៗ', 'ៗ'), ('ៜ', '\u{17dd}'), ('០', '៩'), ('\u{180b}', '\u{180d}'), ('\u{180f}', '᠙'), ('ᠠ', 'ᡸ'), ('ᢀ', 'ᢪ'), ('ᢰ', 'ᣵ'), ('ᤀ', 'ᤞ'), ('\u{1920}', 'ᤫ'), ('ᤰ', '\u{193b}'), ('᥆', 'ᥭ'), ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧙'), ('ᨀ', '\u{1a1b}'), ('ᨠ', '\u{1a5e}'), ('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}', '᪉'), ('᪐', '᪙'), ('ᪧ', 'ᪧ'), ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', 'ᭌ'), ('᭐', '᭙'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '\u{1bf3}'), ('ᰀ', '\u{1c37}'), ('᱀', '᱉'), ('ᱍ', 'ᱽ'), ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', 'ᳺ'), ('ᴀ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾼ'), ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῌ'), ('ῐ', 'ΐ'), ('ῖ', 'Ί'), ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'), ('\u{200c}', '\u{200d}'), ('‿', '⁀'), ('⁔', '⁔'), ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('\u{20d0}', '\u{20f0}'), ('ℂ', 'ℂ'), ('ℇ', 'ℇ'), ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('ℙ', 'ℝ'), ('ℤ', 'ℤ'), ('Ω', 'Ω'), ('ℨ', 'ℨ'), ('K', 'ℭ'), ('ℯ', 'ℹ'), ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'), ('Ⓐ', 'ⓩ'), ('Ⰰ', 'ⳤ'), ('Ⳬ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('ⴰ', 'ⵧ'), ('ⵯ', 'ⵯ'), ('\u{2d7f}', 'ⶖ'), ('ⶠ', 'ⶦ'), ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'), ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('\u{2de0}', '\u{2dff}'), ('ⸯ', 'ⸯ'), ('々', '〇'), ('〡', '\u{302f}'), ('〱', '〵'), ('〸', '〼'), ('ぁ', 'ゖ'), ('\u{3099}', '\u{309a}'), ('ゝ', 'ゟ'), ('ァ', 'ヺ'), ('ー', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆿ'), ('ㇰ', 'ㇿ'), ('㐀', '䶿'), ('一', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘫ'), ('Ꙁ', '\u{a672}'), ('\u{a674}', '\u{a67d}'), ('ꙿ', '\u{a6f1}'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠧ'), ('\u{a82c}', '\u{a82c}'), ('ꡀ', 'ꡳ'), ('ꢀ', '\u{a8c5}'), ('꣐', '꣙'), ('\u{a8e0}', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', '\u{a92d}'), ('ꤰ', '\u{a953}'), ('ꥠ', 'ꥼ'), ('\u{a980}', '\u{a9c0}'), ('ꧏ', '꧙'), ('ꧠ', 'ꧾ'), ('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('ꩠ', 'ꩶ'), ('ꩺ', 'ꫂ'), ('ꫛ', 'ꫝ'), ('ꫠ', 'ꫯ'), ('ꫲ', '\u{aaf6}'), ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭩ'), ('ꭰ', 'ꯪ'), ('꯬', '\u{abed}'), ('꯰', '꯹'), ('가', '힣'), ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'), ('豈', '舘'), ('並', '龎'), ('ff', 'st'), ('ﬓ', 'ﬗ'), ('יִ', 'ﬨ'), ('שׁ', 'זּ'), ('טּ', 'לּ'), ('מּ', 'מּ'), ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', 'ﮱ'), ('ﯓ', 'ﴽ'), ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'), ('ﷰ', 'ﷻ'), ('\u{fe00}', '\u{fe0f}'), ('\u{fe20}', '\u{fe2f}'), ('︳', '︴'), ('﹍', '﹏'), ('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'), ('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z'), ('ヲ', 'ᄒ'), ('ᅡ', 'ᅦ'), ('ᅧ', 'ᅬ'), ('ᅭ', 'ᅲ'), ('ᅳ', 'ᅵ'), ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'), ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐅀', '𐅴'), ('\u{101fd}', '\u{101fd}'), ('𐊀', '𐊜'), ('𐊠', '𐋐'), ('\u{102e0}', '\u{102e0}'), ('𐌀', '𐌟'), ('𐌭', '𐍊'), ('𐍐', '\u{1037a}'), ('𐎀', '𐎝'), ('𐎠', '𐏃'), ('𐏈', '𐏏'), ('𐏑', '𐏕'), ('𐐀', '𐒝'), ('𐒠', '𐒩'), ('𐒰', '𐓓'), ('𐓘', '𐓻'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), ('𐕰', '𐕺'), ('𐕼', '𐖊'), ('𐖌', '𐖒'), ('𐖔', '𐖕'), ('𐖗', '𐖡'), ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), ('𐞀', '𐞅'), ('𐞇', '𐞰'), ('𐞲', '𐞺'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐡕'), ('𐡠', '𐡶'), ('𐢀', '𐢞'), ('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐤀', '𐤕'), ('𐤠', '𐤹'), ('𐦀', '𐦷'), ('𐦾', '𐦿'), ('𐨀', '\u{10a03}'), ('\u{10a05}', '\u{10a06}'), ('\u{10a0c}', '𐨓'), ('𐨕', '𐨗'), ('𐨙', '𐨵'), ('\u{10a38}', '\u{10a3a}'), ('\u{10a3f}', '\u{10a3f}'), ('𐩠', '𐩼'), ('𐪀', '𐪜'), ('𐫀', '𐫇'), ('𐫉', '\u{10ae6}'), ('𐬀', '𐬵'), ('𐭀', '𐭕'), ('𐭠', '𐭲'), ('𐮀', '𐮑'), ('𐰀', '𐱈'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '\u{10d27}'), ('𐴰', '𐴹'), ('𐵀', '𐵥'), ('\u{10d69}', '\u{10d6d}'), ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('\u{10eab}', '\u{10eac}'), ('𐺰', '𐺱'), ('𐻂', '𐻄'), ('\u{10efc}', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '\u{10f50}'), ('𐽰', '\u{10f85}'), ('𐾰', '𐿄'), ('𐿠', '𐿶'), ('𑀀', '\u{11046}'), ('𑁦', '𑁵'), ('\u{1107f}', '\u{110ba}'), ('\u{110c2}', '\u{110c2}'), ('𑃐', '𑃨'), ('𑃰', '𑃹'), ('\u{11100}', '\u{11134}'), ('𑄶', '𑄿'), ('𑅄', '𑅇'), ('𑅐', '\u{11173}'), ('𑅶', '𑅶'), ('\u{11180}', '𑇄'), ('\u{111c9}', '\u{111cc}'), ('𑇎', '𑇚'), ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '\u{11237}'), ('\u{1123e}', '\u{11241}'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊨'), ('𑊰', '\u{112ea}'), ('𑋰', '𑋹'), ('\u{11300}', '𑌃'), ('𑌅', '𑌌'), ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'), ('𑌲', '𑌳'), ('𑌵', '𑌹'), ('\u{1133b}', '𑍄'), ('𑍇', '𑍈'), ('𑍋', '\u{1134d}'), ('𑍐', '𑍐'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), ('𑎀', '𑎉'), ('𑎋', '𑎋'), ('𑎎', '𑎎'), ('𑎐', '𑎵'), ('𑎷', '\u{113c0}'), ('\u{113c2}', '\u{113c2}'), ('\u{113c5}', '\u{113c5}'), ('\u{113c7}', '𑏊'), ('𑏌', '𑏓'), ('\u{113e1}', '\u{113e2}'), ('𑐀', '𑑊'), ('𑑐', '𑑙'), ('\u{1145e}', '𑑡'), ('𑒀', '𑓅'), ('𑓇', '𑓇'), ('𑓐', '𑓙'), ('𑖀', '\u{115b5}'), ('𑖸', '\u{115c0}'), ('𑗘', '\u{115dd}'), ('𑘀', '\u{11640}'), ('𑙄', '𑙄'), ('𑙐', '𑙙'), ('𑚀', '𑚸'), ('𑛀', '𑛉'), ('𑛐', '𑛣'), ('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜹'), ('𑝀', '𑝆'), ('𑠀', '\u{1183a}'), ('𑢠', '𑣩'), ('𑣿', '𑤆'), ('𑤉', '𑤉'), ('𑤌', '𑤓'), ('𑤕', '𑤖'), ('𑤘', '𑤵'), ('𑤷', '𑤸'), ('\u{1193b}', '\u{11943}'), ('𑥐', '𑥙'), ('𑦠', '𑦧'), ('𑦪', '\u{119d7}'), ('\u{119da}', '𑧡'), ('𑧣', '𑧤'), ('𑨀', '\u{11a3e}'), ('\u{11a47}', '\u{11a47}'), ('𑩐', '\u{11a99}'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), ('𑯀', '𑯠'), ('𑯰', '𑯹'), ('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱀'), ('𑱐', '𑱙'), ('𑱲', '𑲏'), ('\u{11c92}', '\u{11ca7}'), ('𑲩', '\u{11cb6}'), ('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '\u{11d36}'), ('\u{11d3a}', '\u{11d3a}'), ('\u{11d3c}', '\u{11d3d}'), ('\u{11d3f}', '\u{11d47}'), ('𑵐', '𑵙'), ('𑵠', '𑵥'), ('𑵧', '𑵨'), ('𑵪', '𑶎'), ('\u{11d90}', '\u{11d91}'), ('𑶓', '𑶘'), ('𑶠', '𑶩'), ('𑻠', '𑻶'), ('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f42}'), ('𑽐', '\u{11f5a}'), ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'), ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('\u{13440}', '\u{13455}'), ('𓑠', '𔏺'), ('𔐀', '𔙆'), ('𖄀', '𖄹'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩰', '𖪾'), ('𖫀', '𖫉'), ('𖫐', '𖫭'), ('\u{16af0}', '\u{16af4}'), ('𖬀', '\u{16b36}'), ('𖭀', '𖭃'), ('𖭐', '𖭙'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), ('𖵀', '𖵬'), ('𖵰', '𖵹'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '\u{16fe4}'), ('\u{16ff0}', '\u{16ff1}'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), ('𛀀', '𛄢'), ('𛄲', '𛄲'), ('𛅐', '𛅒'), ('𛅕', '𛅕'), ('𛅤', '𛅧'), ('𛅰', '𛋻'), ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('\u{1bc9d}', '\u{1bc9e}'), ('𜳰', '𜳹'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), ('\u{1d242}', '\u{1d244}'), ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕒', '𝚥'), ('𝚨', '𝛀'), ('𝛂', '𝛚'), ('𝛜', '𝛺'), ('𝛼', '𝜔'), ('𝜖', '𝜴'), ('𝜶', '𝝎'), ('𝝐', '𝝮'), ('𝝰', '𝞈'), ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'), ('𝟎', '𝟿'), ('\u{1da00}', '\u{1da36}'), ('\u{1da3b}', '\u{1da6c}'), ('\u{1da75}', '\u{1da75}'), ('\u{1da84}', '\u{1da84}'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}'), ('𝼀', '𝼞'), ('𝼥', '𝼪'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), ('\u{1e01b}', '\u{1e021}'), ('\u{1e023}', '\u{1e024}'), ('\u{1e026}', '\u{1e02a}'), ('𞀰', '𞁭'), ('\u{1e08f}', '\u{1e08f}'), ('𞄀', '𞄬'), ('\u{1e130}', '𞄽'), ('𞅀', '𞅉'), ('𞅎', '𞅎'), ('𞊐', '\u{1e2ae}'), ('𞋀', '𞋹'), ('𞓐', '𞓹'), ('𞗐', '𞗺'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), ('𞟰', '𞟾'), ('𞠀', '𞣄'), ('\u{1e8d0}', '\u{1e8d6}'), ('𞤀', '𞥋'), ('𞥐', '𞥙'), ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'), ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'), ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'), ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'), ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'), ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'), ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('🄰', '🅉'), ('🅐', '🅩'), ('🅰', '🆉'), ('🯰', '🯹'), ('𠀀', '𪛟'), ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), ('\u{e0100}', '\u{e01ef}'), ]; ================================================ FILE: src/wasm.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #![allow(non_snake_case)] use crate::builder::{ RegExpBuilder as Builder, MINIMUM_REPETITIONS_MESSAGE, MINIMUM_SUBSTRING_LENGTH_MESSAGE, MISSING_TEST_CASES_MESSAGE, }; use itertools::Itertools; use wasm_bindgen::prelude::*; /// This class builds regular expressions from user-provided test cases. #[wasm_bindgen] #[derive(Clone)] pub struct RegExpBuilder { builder: Builder, } #[wasm_bindgen] impl RegExpBuilder { /// Specifies the test cases to build the regular expression from. /// /// The test cases need not be sorted because `RegExpBuilder` sorts them internally. /// /// ⚠ Throws an error if `testCases` is empty. pub fn from(testCases: Box<[JsValue]>) -> Result { let strs = testCases .iter() .filter_map(|it| it.as_string()) .collect_vec(); if strs.is_empty() { return Err(JsValue::from(MISSING_TEST_CASES_MESSAGE)); } Ok(RegExpBuilder { builder: Builder::from(&strs), }) } /// Tells `RegExpBuilder` to convert any Unicode decimal digit to character class `\d`. /// /// This method takes precedence over `withConversionOfWords` if both are set. /// Decimal digits are converted to `\d`, the remaining word characters to `\w`. /// /// This method takes precedence over `withConversionOfWhitespace` if both are set. /// Decimal digits are converted to `\d`, the remaining non-whitespace characters to `\S`. pub fn withConversionOfDigits(&mut self) -> RegExpBuilder { self.builder.config.is_digit_converted = true; self.clone() } /// Tells `RegExpBuilder` to convert any character which is not /// a Unicode decimal digit to character class `\D`. /// /// This method takes precedence over `withConversionOfNonWords` if both are set. /// Non-digits which are also non-word characters are converted to `\D`. /// /// This method takes precedence over `withConversionOfNonWhitespace` if both are set. /// Non-digits which are also non-space characters are converted to `\D`. pub fn withConversionOfNonDigits(&mut self) -> RegExpBuilder { self.builder.config.is_non_digit_converted = true; self.clone() } /// Tells `RegExpBuilder` to convert any Unicode whitespace character to character class `\s`. /// /// This method takes precedence over `withConversionOfNonDigits` if both are set. /// Whitespace characters are converted to `\s`, the remaining non-digit characters to `\D`. /// /// This method takes precedence over `withConversionOfNonWords` if both are set. /// Whitespace characters are converted to `\s`, the remaining non-word characters to `\W`. pub fn withConversionOfWhitespace(&mut self) -> RegExpBuilder { self.builder.config.is_space_converted = true; self.clone() } /// Tells `RegExpBuilder` to convert any character which is not /// a Unicode whitespace character to character class `\S`. pub fn withConversionOfNonWhitespace(&mut self) -> RegExpBuilder { self.builder.config.is_non_space_converted = true; self.clone() } /// Tells `RegExpBuilder` to convert any Unicode word character to character class `\w`. /// /// This method takes precedence over `withConversionOfNonDigits` if both are set. /// Word characters are converted to `\w`, the remaining non-digit characters to `\D`. /// /// This method takes precedence over `withConversionOfNonWhitespace` if both are set. /// Word characters are converted to `\w`, the remaining non-space characters to `\S`. pub fn withConversionOfWords(&mut self) -> RegExpBuilder { self.builder.config.is_word_converted = true; self.clone() } /// Tells `RegExpBuilder` to convert any character which is not /// a Unicode word character to character class `\W`. /// /// This method takes precedence over `withConversionOfNonWhitespace` if both are set. /// Non-words which are also non-space characters are converted to `\W`. pub fn withConversionOfNonWords(&mut self) -> RegExpBuilder { self.builder.config.is_non_word_converted = true; self.clone() } /// Tells `RegExpBuilder` to detect repeated non-overlapping substrings and /// to convert them to `{min,max}` quantifier notation. pub fn withConversionOfRepetitions(&mut self) -> RegExpBuilder { self.builder.config.is_repetition_converted = true; self.clone() } /// Tells `RegExpBuilder` to enable case-insensitive matching of test cases /// so that letters match both upper and lower case. pub fn withCaseInsensitiveMatching(&mut self) -> RegExpBuilder { self.builder.config.is_case_insensitive_matching = true; self.clone() } /// Tells `RegExpBuilder` to replace non-capturing groups by capturing ones. pub fn withCapturingGroups(&mut self) -> RegExpBuilder { self.builder.config.is_capturing_group_enabled = true; self.clone() } /// Tells `RegExpBuilder` to convert non-ASCII characters to unicode escape sequences. /// The parameter `useSurrogatePairs` specifies whether to convert astral code planes /// (range `U+010000` to `U+10FFFF`) to surrogate pairs. pub fn withEscapingOfNonAsciiChars(&mut self, useSurrogatePairs: bool) -> RegExpBuilder { self.builder.config.is_non_ascii_char_escaped = true; self.builder .config .is_astral_code_point_converted_to_surrogate = useSurrogatePairs; self.clone() } /// Tells `RegExpBuilder` to produce a nicer looking regular expression in verbose mode. pub fn withVerboseMode(&mut self) -> RegExpBuilder { self.builder.config.is_verbose_mode_enabled = true; self.clone() } /// Tells `RegExpBuilder` to remove the caret anchor '^' from the resulting regular /// expression, thereby allowing to match the test cases also when they do not occur /// at the start of a string. pub fn withoutStartAnchor(&mut self) -> RegExpBuilder { self.builder.config.is_start_anchor_disabled = true; self.clone() } /// Tells `RegExpBuilder` to remove the dollar sign anchor '$' from the resulting regular /// expression, thereby allowing to match the test cases also when they do not occur /// at the end of a string. pub fn withoutEndAnchor(&mut self) -> RegExpBuilder { self.builder.config.is_end_anchor_disabled = true; self.clone() } /// Tells `RegExpBuilder` to remove the caret and dollar sign anchors from the resulting /// regular expression, thereby allowing to match the test cases also when they occur /// within a larger string that contains other content as well. pub fn withoutAnchors(&mut self) -> RegExpBuilder { self.builder.config.is_start_anchor_disabled = true; self.builder.config.is_end_anchor_disabled = true; self.clone() } /// Specifies the minimum quantity of substring repetitions to be converted /// if `withConversionOfRepetitions` is set. /// /// If the quantity is not explicitly set with this method, a default value of 1 will be used. /// /// ⚠ Throws an error if `quantity` is zero. pub fn withMinimumRepetitions(&mut self, quantity: u32) -> Result { if quantity < 1 { return Err(JsValue::from(MINIMUM_REPETITIONS_MESSAGE)); } self.builder.config.minimum_repetitions = quantity; Ok(self.clone()) } /// Specifies the minimum length a repeated substring must have in order to be converted /// if `withConversionOfRepetitions` is set. /// /// If the length is not explicitly set with this method, a default value of 1 will be used. /// /// ⚠ Throws an error if `length` is zero. pub fn withMinimumSubstringLength(&mut self, length: u32) -> Result { if length < 1 { return Err(JsValue::from(MINIMUM_SUBSTRING_LENGTH_MESSAGE)); } self.builder.config.minimum_substring_length = length; Ok(self.clone()) } /// Builds the actual regular expression using the previously given settings. pub fn build(&mut self) -> String { self.builder.build() } } ================================================ FILE: tests/cli_integration_tests.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #![cfg(not(target_family = "wasm"))] use assert_cmd::{cargo_bin, Command}; use indoc::indoc; use predicates::prelude::*; use std::io::Write; use tempfile::NamedTempFile; const TEST_CASE: &str = "I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."; mod no_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&[TEST_CASE]); grex.assert() .success() .stdout(predicate::eq("^I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩\\.$\n")); } #[test] fn succeeds_with_ignore_case_option() { let mut grex = init_command(); grex.args(&["--ignore-case", "Ä@Ö€Ü", "ä@ö€ü", "Ä@ö€Ü", "ä@Ö€ü"]); grex.assert() .success() .stdout(predicate::eq("(?i)^ä@ö€ü$\n")); } #[test] fn succeeds_with_leading_hyphen() { let mut grex = init_command(); grex.args(&["-a", "b", "c"]); grex.assert() .success() .stdout(predicate::eq("^(?:\\-a|[bc])$\n")); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I \\u{2665}\\u{2665}\\u{2665} 36 and \\u{663} and y\\u{306}y\\u{306} and \\u{1f4a9}\\u{1f4a9}\\.$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&["--escape", "--with-surrogates", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I \\u{2665}\\u{2665}\\u{2665} 36 and \\u{663} and y\\u{306}y\\u{306} and \\u{d83d}\\u{dca9}\\u{d83d}\\u{dca9}\\.$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\ \ \ ♥♥♥\ 36\ and\ ٣\ and\ y̆y̆\ and\ 💩💩\. $ "#, ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--escape", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\ \ \ \u{2665}\u{2665}\u{2665}\ 36\ and\ \u{663}\ and\ y\u{306}y\u{306}\ and\ \u{1f4a9}\u{1f4a9}\. $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--escape", "--with-surrogates", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\ \ \ \u{2665}\u{2665}\u{2665}\ 36\ and\ \u{663}\ and\ y\u{306}y\u{306}\ and\ \u{d83d}\u{dca9}\u{d83d}\u{dca9}\. $ "# ))); } #[test] fn succeeds_with_file_input() { let mut file = NamedTempFile::new().unwrap(); writeln!(file, "a\nb\\n\n\nc\näöü\n♥").unwrap(); let mut grex = init_command(); grex.args(&["-f", file.path().to_str().unwrap()]); grex.assert() .success() .stdout(predicate::eq("^(?:b\\\\n|äöü|[ac♥])$\n")); } #[test] fn succeeds_with_test_cases_from_stdin() { let mut grex = init_command(); grex.write_stdin("a\nb\\n\n\nc\näöü\n♥") .arg("-") .assert() .stdout(predicate::eq("^(?:b\\\\n|äöü|[ac♥])$\n")); } #[test] fn succeeds_with_file_from_stdin() { let mut file = NamedTempFile::new().unwrap(); writeln!(file, "a\nb\\n\n\nc\näöü\n♥").unwrap(); let mut grex = init_command(); grex.write_stdin(file.path().to_str().unwrap()) .args(&["-f", "-"]) .assert() .stdout(predicate::eq("^(?:b\\\\n|äöü|[ac♥])$\n")); } #[test] fn fails_with_surrogate_but_without_escape_option() { let mut grex = init_command(); grex.args(&["--with-surrogates", TEST_CASE]); grex.assert().failure().stderr(predicate::str::contains( "required arguments were not provided", )); } #[test] fn fails_without_arguments() { let mut grex = init_command(); grex.assert().failure().stderr(predicate::str::contains( "required arguments were not provided", )); } #[test] fn fails_when_file_name_is_not_provided() { let mut grex = init_command(); grex.arg("-f"); grex.assert().failure().stderr(predicate::str::contains( "a value is required for '--file ' but none was supplied", )); } #[test] fn fails_when_file_does_not_exist() { let mut grex = init_command(); grex.args(&["-f", "/path/to/non-existing/file"]); grex.assert() .failure() .stdout(predicate::str::is_empty()) .stderr(predicate::eq( "error: the specified file could not be found\n", )); } #[test] fn fails_with_first_file_input_and_then_direct_input() { let mut grex = init_command(); grex.args(&["-f", "/path/to/some/file", TEST_CASE]); grex.assert().failure().stderr(predicate::str::contains( "the argument '--file ' cannot be used with '[INPUT]...'", )); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--repetitions", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I {3}♥{3} 36 and ٣ and (?:y̆){2} and 💩{2}\\.$\n", )); } #[test] fn succeeds_with_ignore_case_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--ignore-case", "ÄÖÜäöü@Ö€", "äöüÄöÜ@ö€"]); grex.assert() .success() .stdout(predicate::eq("(?i)^(?:äöü){2}@ö€$\n")); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I {3}\\u{2665}{3} 36 and \\u{663} and (?:y\\u{306}){2} and \\u{1f4a9}{2}\\.$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--escape", "--with-surrogates", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I {3}\\u{2665}{3} 36 and \\u{663} and (?:y\\u{306}){2} and (?:\\u{d83d}\\u{dca9}){2}\\.$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\ {3}♥{3}\ 36\ and\ ٣\ and\ (?: y̆ ){2} \ and\ 💩{2}\. $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--escape", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\ {3}\u{2665}{3}\ 36\ and\ \u{663}\ and\ (?: y\u{306} ){2} \ and\ \u{1f4a9}{2}\. $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\ {3}\u{2665}{3}\ 36\ and\ \u{663}\ and\ (?: y\u{306} ){2} \ and\ (?: \u{d83d}\u{dca9} ){2} \. $ "# ))); } #[test] fn succeeds_with_increased_minimum_repetitions() { let mut grex = init_command(); grex.args(&["--repetitions", "--min-repetitions", "2", TEST_CASE]); grex.assert() .success() .stdout(predicate::eq("^I {3}♥{3} 36 and ٣ and y̆y̆ and 💩💩\\.$\n")); } #[test] fn succeeds_with_increased_minimum_substring_length() { let mut grex = init_command(); grex.args(&["--repetitions", "--min-substring-length", "2", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I ♥♥♥ 36 and ٣ and (?:y̆){2} and 💩💩\\.$\n", )); } #[test] fn fails_with_minimum_repetitions_equal_to_zero() { let mut grex = init_command(); grex.args(&["--min-repetitions", "0", TEST_CASE]); grex.assert() .failure() .stderr(predicate::str::contains("Value must not be zero")); } #[test] fn fails_with_minimum_repetitions_equal_to_invalid_value() { let mut grex = init_command(); grex.args(&["--min-repetitions", "§!$", TEST_CASE]); grex.assert().failure().stderr(predicate::str::contains( "Value is not a valid unsigned integer", )); } #[test] fn fails_with_minimum_substring_length_equal_to_zero() { let mut grex = init_command(); grex.args(&["--min-substring-length", "0", TEST_CASE]); grex.assert() .failure() .stderr(predicate::str::contains("Value must not be zero")); } #[test] fn fails_with_minimum_substring_length_equal_to_invalid_value() { let mut grex = init_command(); grex.args(&["--min-substring-length", "§!$", TEST_CASE]); grex.assert().failure().stderr(predicate::str::contains( "Value is not a valid unsigned integer", )); } } } mod digit_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--digits", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I ♥♥♥ \\d\\d and \\d and y̆y̆ and 💩💩\\.$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--digits", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I \\u{2665}\\u{2665}\\u{2665} \\d\\d and \\d and y\\u{306}y\\u{306} and \\u{1f4a9}\\u{1f4a9}\\.$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&["--digits", "--escape", "--with-surrogates", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I \\u{2665}\\u{2665}\\u{2665} \\d\\d and \\d and y\\u{306}y\\u{306} and \\u{d83d}\\u{dca9}\\u{d83d}\\u{dca9}\\.$\n" )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--digits", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\ \ \ ♥♥♥\ \d\d\ and\ \d\ and\ y̆y̆\ and\ 💩💩\. $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--digits", "--escape", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\ \ \ \u{2665}\u{2665}\u{2665}\ \d\d\ and\ \d\ and\ y\u{306}y\u{306}\ and\ \u{1f4a9}\u{1f4a9}\. $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--digits", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\ \ \ \u{2665}\u{2665}\u{2665}\ \d\d\ and\ \d\ and\ y\u{306}y\u{306}\ and\ \u{d83d}\u{dca9}\u{d83d}\u{dca9}\. $ "# ))); } #[test] fn succeeds_with_capturing_groups_option() { let mut grex = init_command(); grex.args(&["--capture-groups", "abc", "def"]); grex.assert() .success() .stdout(predicate::eq("^(abc|def)$\n")); } #[test] fn succeeds_with_syntax_highlighting() { let mut grex = init_command(); grex.args(&["--colorize", "abc", "def"]); grex.assert() .success() .stdout(predicate::eq("\u{1b}[1;33m^\u{1b}[0m\u{1b}[1;32m(?:\u{1b}[0mabc\u{1b}[1;31m|\u{1b}[0mdef\u{1b}[1;32m)\u{1b}[0m\u{1b}[1;33m$\u{1b}[0m\n")); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--repetitions", "--digits", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I {3}♥{3} \\d(?:\\d and ){2}(?:y̆){2} and 💩{2}\\.$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--digits", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I {3}\\u{2665}{3} \\d(?:\\d and ){2}(?:y\\u{306}){2} and \\u{1f4a9}{2}\\.$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^I {3}\\u{2665}{3} \\d(?:\\d and ){2}(?:y\\u{306}){2} and (?:\\u{d83d}\\u{dca9}){2}\\.$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--digits", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\ {3}♥{3}\ \d (?: \d\ and\ ){2} (?: y̆ ){2} \ and\ 💩{2}\. $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\ {3}\u{2665}{3}\ \d (?: \d\ and\ ){2} (?: y\u{306} ){2} \ and\ \u{1f4a9}{2}\. $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\ {3}\u{2665}{3}\ \d (?: \d\ and\ ){2} (?: y\u{306} ){2} \ and\ (?: \u{d83d}\u{dca9} ){2} \. $ "# ))); } #[test] fn succeeds_with_increased_minimum_repetitions() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--min-repetitions", "2", "--digits", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^I {3}♥{3} \\d\\d and \\d and y̆y̆ and 💩💩\\.$\n", )); } #[test] fn succeeds_with_increased_minimum_substring_length() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--min-substring-length", "2", "--digits", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^I ♥♥♥ \\d(?:\\d and ){2}(?:y̆){2} and 💩💩\\.$\n", )); } } } mod space_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--spaces", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I\\s\\s\\s♥♥♥\\s36\\sand\\s٣\\sand\\sy̆y̆\\sand\\s💩💩\\.$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--spaces", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I\\s\\s\\s\\u{2665}\\u{2665}\\u{2665}\\s36\\sand\\s\\u{663}\\sand\\sy\\u{306}y\\u{306}\\sand\\s\\u{1f4a9}\\u{1f4a9}\\.$\n" )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&["--spaces", "--escape", "--with-surrogates", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I\\s\\s\\s\\u{2665}\\u{2665}\\u{2665}\\s36\\sand\\s\\u{663}\\sand\\sy\\u{306}y\\u{306}\\sand\\s\\u{d83d}\\u{dca9}\\u{d83d}\\u{dca9}\\.$\n" )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--spaces", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\s\s\s♥♥♥\s36\sand\s٣\sand\sy̆y̆\sand\s💩💩\. $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--spaces", "--escape", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\s\s\s\u{2665}\u{2665}\u{2665}\s36\sand\s\u{663}\sand\sy\u{306}y\u{306}\sand\s\u{1f4a9}\u{1f4a9}\. $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--spaces", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\s\s\s\u{2665}\u{2665}\u{2665}\s36\sand\s\u{663}\sand\sy\u{306}y\u{306}\sand\s\u{d83d}\u{dca9}\u{d83d}\u{dca9}\. $ "# ))); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--repetitions", "--spaces", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I\\s{3}♥{3}\\s36\\sand\\s٣\\sand\\s(?:y̆){2}\\sand\\s💩{2}\\.$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--spaces", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I\\s{3}\\u{2665}{3}\\s36\\sand\\s\\u{663}\\sand\\s(?:y\\u{306}){2}\\sand\\s\\u{1f4a9}{2}\\.$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--spaces", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^I\\s{3}\\u{2665}{3}\\s36\\sand\\s\\u{663}\\sand\\s(?:y\\u{306}){2}\\sand\\s(?:\\u{d83d}\\u{dca9}){2}\\.$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--spaces", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\s{3}♥{3}\s36\sand\s٣\sand\s (?: y̆ ){2} \sand\s💩{2}\. $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--spaces", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\s{3}\u{2665}{3}\s36\sand\s\u{663}\sand\s (?: y\u{306} ){2} \sand\s\u{1f4a9}{2}\. $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--spaces", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\s{3}\u{2665}{3}\s36\sand\s\u{663}\sand\s (?: y\u{306} ){2} \sand\s (?: \u{d83d}\u{dca9} ){2} \. $ "# ))); } } } mod word_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--words", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\w ♥♥♥ \\w\\w \\w\\w\\w \\w \\w\\w\\w \\w\\w\\w\\w \\w\\w\\w 💩💩\\.$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--words", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\w \\u{2665}\\u{2665}\\u{2665} \\w\\w \\w\\w\\w \\w \\w\\w\\w \\w\\w\\w\\w \\w\\w\\w \\u{1f4a9}\\u{1f4a9}\\.$\n" )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&["--words", "--escape", "--with-surrogates", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\w \\u{2665}\\u{2665}\\u{2665} \\w\\w \\w\\w\\w \\w \\w\\w\\w \\w\\w\\w\\w \\w\\w\\w \\u{d83d}\\u{dca9}\\u{d83d}\\u{dca9}\\.$\n" )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--words", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\ \ \ ♥♥♥\ \w\w\ \w\w\w\ \w\ \w\w\w\ \w\w\w\w\ \w\w\w\ 💩💩\. $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--words", "--escape", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\ \ \ \u{2665}\u{2665}\u{2665}\ \w\w\ \w\w\w\ \w\ \w\w\w\ \w\w\w\w\ \w\w\w\ \u{1f4a9}\u{1f4a9}\. $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--words", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\ \ \ \u{2665}\u{2665}\u{2665}\ \w\w\ \w\w\w\ \w\ \w\w\w\ \w\w\w\w\ \w\w\w\ \u{d83d}\u{dca9}\u{d83d}\u{dca9}\. $ "# ))); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--repetitions", "--words", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\w {3}♥{3} \\w{2}(?: \\w{3} \\w){2}(?:\\w{3} ){2}💩{2}\\.$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--words", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\w {3}\\u{2665}{3} \\w{2}(?: \\w{3} \\w){2}(?:\\w{3} ){2}\\u{1f4a9}{2}\\.$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--words", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\w {3}\\u{2665}{3} \\w{2}(?: \\w{3} \\w){2}(?:\\w{3} ){2}(?:\\u{d83d}\\u{dca9}){2}\\.$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--words", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\ {3}♥{3}\ \w{2} (?: \ \w{3}\ \w ){2} (?: \w{3}\ ){2} 💩{2}\. $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--words", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\ {3}\u{2665}{3}\ \w{2} (?: \ \w{3}\ \w ){2} (?: \w{3}\ ){2} \u{1f4a9}{2}\. $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--words", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\ {3}\u{2665}{3}\ \w{2} (?: \ \w{3}\ \w ){2} (?: \w{3}\ ){2} (?: \u{d83d}\u{dca9} ){2} \. $ "# ))); } } } mod digit_space_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--digits", "--spaces", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I\\s\\s\\s♥♥♥\\s\\d\\d\\sand\\s\\d\\sand\\sy̆y̆\\sand\\s💩💩\\.$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--digits", "--spaces", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I\\s\\s\\s\\u{2665}\\u{2665}\\u{2665}\\s\\d\\d\\sand\\s\\d\\sand\\sy\\u{306}y\\u{306}\\sand\\s\\u{1f4a9}\\u{1f4a9}\\.$\n" )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--digits", "--spaces", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^I\\s\\s\\s\\u{2665}\\u{2665}\\u{2665}\\s\\d\\d\\sand\\s\\d\\sand\\sy\\u{306}y\\u{306}\\sand\\s\\u{d83d}\\u{dca9}\\u{d83d}\\u{dca9}\\.$\n" )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--digits", "--spaces", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\s\s\s♥♥♥\s\d\d\sand\s\d\sand\sy̆y̆\sand\s💩💩\. $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--digits", "--spaces", "--escape", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\s\s\s\u{2665}\u{2665}\u{2665}\s\d\d\sand\s\d\sand\sy\u{306}y\u{306}\sand\s\u{1f4a9}\u{1f4a9}\. $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--digits", "--spaces", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\s\s\s\u{2665}\u{2665}\u{2665}\s\d\d\sand\s\d\sand\sy\u{306}y\u{306}\sand\s\u{d83d}\u{dca9}\u{d83d}\u{dca9}\. $ "# ))); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--repetitions", "--digits", "--spaces", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I\\s{3}♥{3}\\s\\d(?:\\d\\sand\\s){2}(?:y̆){2}\\sand\\s💩{2}\\.$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--spaces", "--escape", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^I\\s{3}\\u{2665}{3}\\s\\d(?:\\d\\sand\\s){2}(?:y\\u{306}){2}\\sand\\s\\u{1f4a9}{2}\\.$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--spaces", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^I\\s{3}\\u{2665}{3}\\s\\d(?:\\d\\sand\\s){2}(?:y\\u{306}){2}\\sand\\s(?:\\u{d83d}\\u{dca9}){2}\\.$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--spaces", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\s{3}♥{3}\s\d (?: \d\sand\s ){2} (?: y̆ ){2} \sand\s💩{2}\. $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--spaces", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\s{3}\u{2665}{3}\s\d (?: \d\sand\s ){2} (?: y\u{306} ){2} \sand\s\u{1f4a9}{2}\. $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--spaces", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\s{3}\u{2665}{3}\s\d (?: \d\sand\s ){2} (?: y\u{306} ){2} \sand\s (?: \u{d83d}\u{dca9} ){2} \. $ "# ))); } } } mod digit_word_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--digits", "--words", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\w ♥♥♥ \\d\\d \\w\\w\\w \\d \\w\\w\\w \\w\\w\\w\\w \\w\\w\\w 💩💩\\.$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--digits", "--words", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\w \\u{2665}\\u{2665}\\u{2665} \\d\\d \\w\\w\\w \\d \\w\\w\\w \\w\\w\\w\\w \\w\\w\\w \\u{1f4a9}\\u{1f4a9}\\.$\n" )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--digits", "--words", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\w \\u{2665}\\u{2665}\\u{2665} \\d\\d \\w\\w\\w \\d \\w\\w\\w \\w\\w\\w\\w \\w\\w\\w \\u{d83d}\\u{dca9}\\u{d83d}\\u{dca9}\\.$\n" )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--digits", "--words", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\ \ \ ♥♥♥\ \d\d\ \w\w\w\ \d\ \w\w\w\ \w\w\w\w\ \w\w\w\ 💩💩\. $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--digits", "--words", "--escape", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\ \ \ \u{2665}\u{2665}\u{2665}\ \d\d\ \w\w\w\ \d\ \w\w\w\ \w\w\w\w\ \w\w\w\ \u{1f4a9}\u{1f4a9}\. $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--digits", "--words", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\ \ \ \u{2665}\u{2665}\u{2665}\ \d\d\ \w\w\w\ \d\ \w\w\w\ \w\w\w\w\ \w\w\w\ \u{d83d}\u{dca9}\u{d83d}\u{dca9}\. $ "# ))); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--repetitions", "--digits", "--words", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\w {3}♥{3} \\d(?:\\d \\w{3} ){2}\\w(?:\\w{3} ){2}💩{2}\\.$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--words", "--escape", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\w {3}\\u{2665}{3} \\d(?:\\d \\w{3} ){2}\\w(?:\\w{3} ){2}\\u{1f4a9}{2}\\.$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--words", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\w {3}\\u{2665}{3} \\d(?:\\d \\w{3} ){2}\\w(?:\\w{3} ){2}(?:\\u{d83d}\\u{dca9}){2}\\.$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--words", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\ {3}♥{3}\ \d (?: \d\ \w{3}\ ){2} \w (?: \w{3}\ ){2} 💩{2}\. $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--words", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\ {3}\u{2665}{3}\ \d (?: \d\ \w{3}\ ){2} \w (?: \w{3}\ ){2} \u{1f4a9}{2}\. $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--words", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\ {3}\u{2665}{3}\ \d (?: \d\ \w{3}\ ){2} \w (?: \w{3}\ ){2} (?: \u{d83d}\u{dca9} ){2} \. $ "# ))); } } } mod space_word_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--words", "--spaces", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\w\\s\\s\\s♥♥♥\\s\\w\\w\\s\\w\\w\\w\\s\\w\\s\\w\\w\\w\\s\\w\\w\\w\\w\\s\\w\\w\\w\\s💩💩\\.$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--words", "--spaces", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\w\\s\\s\\s\\u{2665}\\u{2665}\\u{2665}\\s\\w\\w\\s\\w\\w\\w\\s\\w\\s\\w\\w\\w\\s\\w\\w\\w\\w\\s\\w\\w\\w\\s\\u{1f4a9}\\u{1f4a9}\\.$\n" )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--words", "--spaces", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\w\\s\\s\\s\\u{2665}\\u{2665}\\u{2665}\\s\\w\\w\\s\\w\\w\\w\\s\\w\\s\\w\\w\\w\\s\\w\\w\\w\\w\\s\\w\\w\\w\\s\\u{d83d}\\u{dca9}\\u{d83d}\\u{dca9}\\.$\n" )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--words", "--spaces", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\s\s\s♥♥♥\s\w\w\s\w\w\w\s\w\s\w\w\w\s\w\w\w\w\s\w\w\w\s💩💩\. $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--words", "--spaces", "--escape", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\s\s\s\u{2665}\u{2665}\u{2665}\s\w\w\s\w\w\w\s\w\s\w\w\w\s\w\w\w\w\s\w\w\w\s\u{1f4a9}\u{1f4a9}\. $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--words", "--spaces", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\s\s\s\u{2665}\u{2665}\u{2665}\s\w\w\s\w\w\w\s\w\s\w\w\w\s\w\w\w\w\s\w\w\w\s\u{d83d}\u{dca9}\u{d83d}\u{dca9}\. $ "# ))); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--repetitions", "--words", "--spaces", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\w\\s{3}♥{3}\\s\\w{2}(?:\\s\\w{3}\\s\\w){2}(?:\\w{3}\\s){2}💩{2}\\.$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--words", "--spaces", "--escape", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\w\\s{3}\\u{2665}{3}\\s\\w{2}(?:\\s\\w{3}\\s\\w){2}(?:\\w{3}\\s){2}\\u{1f4a9}{2}\\.$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--words", "--spaces", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\w\\s{3}\\u{2665}{3}\\s\\w{2}(?:\\s\\w{3}\\s\\w){2}(?:\\w{3}\\s){2}(?:\\u{d83d}\\u{dca9}){2}\\.$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--words", "--spaces", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\s{3}♥{3}\s\w{2} (?: \s\w{3}\s\w ){2} (?: \w{3}\s ){2} 💩{2}\. $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--words", "--spaces", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\s{3}\u{2665}{3}\s\w{2} (?: \s\w{3}\s\w ){2} (?: \w{3}\s ){2} \u{1f4a9}{2}\. $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--words", "--spaces", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\s{3}\u{2665}{3}\s\w{2} (?: \s\w{3}\s\w ){2} (?: \w{3}\s ){2} (?: \u{d83d}\u{dca9} ){2} \. $ "# ))); } } } mod digit_space_word_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--digits", "--words", "--spaces", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\w\\s\\s\\s♥♥♥\\s\\d\\d\\s\\w\\w\\w\\s\\d\\s\\w\\w\\w\\s\\w\\w\\w\\w\\s\\w\\w\\w\\s💩💩\\.$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--digits", "--words", "--spaces", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\w\\s\\s\\s\\u{2665}\\u{2665}\\u{2665}\\s\\d\\d\\s\\w\\w\\w\\s\\d\\s\\w\\w\\w\\s\\w\\w\\w\\w\\s\\w\\w\\w\\s\\u{1f4a9}\\u{1f4a9}\\.$\n" )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--digits", "--words", "--spaces", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\w\\s\\s\\s\\u{2665}\\u{2665}\\u{2665}\\s\\d\\d\\s\\w\\w\\w\\s\\d\\s\\w\\w\\w\\s\\w\\w\\w\\w\\s\\w\\w\\w\\s\\u{d83d}\\u{dca9}\\u{d83d}\\u{dca9}\\.$\n" )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--digits", "--words", "--spaces", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\s\s\s♥♥♥\s\d\d\s\w\w\w\s\d\s\w\w\w\s\w\w\w\w\s\w\w\w\s💩💩\. $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--digits", "--words", "--spaces", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\s\s\s\u{2665}\u{2665}\u{2665}\s\d\d\s\w\w\w\s\d\s\w\w\w\s\w\w\w\w\s\w\w\w\s\u{1f4a9}\u{1f4a9}\. $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--digits", "--words", "--spaces", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\s\s\s\u{2665}\u{2665}\u{2665}\s\d\d\s\w\w\w\s\d\s\w\w\w\s\w\w\w\w\s\w\w\w\s\u{d83d}\u{dca9}\u{d83d}\u{dca9}\. $ "# ))); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--words", "--spaces", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\w\\s{3}♥{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w(?:\\w{3}\\s){2}💩{2}\\.$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--words", "--spaces", "--escape", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\w\\s{3}\\u{2665}{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w(?:\\w{3}\\s){2}\\u{1f4a9}{2}\\.$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--words", "--spaces", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\w\\s{3}\\u{2665}{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w(?:\\w{3}\\s){2}(?:\\u{d83d}\\u{dca9}){2}\\.$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--words", "--spaces", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\s{3}♥{3}\s\d (?: \d\s\w{3}\s ){2} \w (?: \w{3}\s ){2} 💩{2}\. $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--words", "--spaces", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\s{3}\u{2665}{3}\s\d (?: \d\s\w{3}\s ){2} \w (?: \w{3}\s ){2} \u{1f4a9}{2}\. $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--words", "--spaces", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\s{3}\u{2665}{3}\s\d (?: \d\s\w{3}\s ){2} \w (?: \w{3}\s ){2} (?: \u{d83d}\u{dca9} ){2} \. $ "# ))); } } } mod non_digit_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--non-digits", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\D\\D\\D\\D\\D\\D\\D\\D36\\D\\D\\D\\D\\D٣\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--non-digits", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\D\\D\\D\\D\\D\\D\\D\\D36\\D\\D\\D\\D\\D\\u{663}\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&["--non-digits", "--escape", "--with-surrogates", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\D\\D\\D\\D\\D\\D\\D\\D36\\D\\D\\D\\D\\D\\u{663}\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--non-digits", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D\D\D\D\D\D\D\D36\D\D\D\D\D٣\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--non-digits", "--escape", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D\D\D\D\D\D\D\D36\D\D\D\D\D\u{663}\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--non-digits", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D\D\D\D\D\D\D\D36\D\D\D\D\D\u{663}\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D $ "# ))); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--repetitions", "--non-digits", TEST_CASE]); grex.assert() .success() .stdout(predicate::eq("^\\D{8}36\\D{5}٣\\D{17}$\n")); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--non-digits", "--escape", TEST_CASE]); grex.assert() .success() .stdout(predicate::eq("^\\D{8}36\\D{5}\\u{663}\\D{17}$\n")); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert() .success() .stdout(predicate::eq("^\\D{8}36\\D{5}\\u{663}\\D{17}$\n")); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--non-digits", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D{8}36\D{5}٣\D{17} $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D{8}36\D{5}\u{663}\D{17} $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D{8}36\D{5}\u{663}\D{17} $ "# ))); } } } mod non_space_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--non-spaces", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\S \\S\\S\\S \\S\\S \\S\\S\\S \\S \\S\\S\\S \\S\\S\\S\\S \\S\\S\\S \\S\\S\\S$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--non-spaces", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\S \\S\\S\\S \\S\\S \\S\\S\\S \\S \\S\\S\\S \\S\\S\\S\\S \\S\\S\\S \\S\\S\\S$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&["--non-spaces", "--escape", "--with-surrogates", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\S \\S\\S\\S \\S\\S \\S\\S\\S \\S \\S\\S\\S \\S\\S\\S\\S \\S\\S\\S \\S\\S\\S$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--non-spaces", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\ \ \ \S\S\S\ \S\S\ \S\S\S\ \S\ \S\S\S\ \S\S\S\S\ \S\S\S\ \S\S\S $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--non-spaces", "--escape", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\ \ \ \S\S\S\ \S\S\ \S\S\S\ \S\ \S\S\S\ \S\S\S\S\ \S\S\S\ \S\S\S $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--non-spaces", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\ \ \ \S\S\S\ \S\S\ \S\S\S\ \S\ \S\S\S\ \S\S\S\S\ \S\S\S\ \S\S\S $ "# ))); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--repetitions", "--non-spaces", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\S {3}\\S(?:\\S{2} ){2}\\S{3} (?:\\S(?: \\S{3}){2}){2}$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--non-spaces", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\S {3}\\S(?:\\S{2} ){2}\\S{3} (?:\\S(?: \\S{3}){2}){2}$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-spaces", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\S {3}\\S(?:\\S{2} ){2}\\S{3} (?:\\S(?: \\S{3}){2}){2}$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--non-spaces", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\ {3}\S (?: \S{2}\ ){2} \S{3}\ (?: \S (?: \ \S{3} ){2} ){2} $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-spaces", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\ {3}\S (?: \S{2}\ ){2} \S{3}\ (?: \S (?: \ \S{3} ){2} ){2} $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-spaces", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\ {3}\S (?: \S{2}\ ){2} \S{3}\ (?: \S (?: \ \S{3} ){2} ){2} $ "# ))); } } } mod non_word_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--non-words", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I\\W\\W\\W\\W\\W\\W\\W36\\Wand\\W٣\\Wand\\Wy̆y̆\\Wand\\W\\W\\W\\W$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--non-words", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I\\W\\W\\W\\W\\W\\W\\W36\\Wand\\W\\u{663}\\Wand\\Wy\\u{306}y\\u{306}\\Wand\\W\\W\\W\\W$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&["--non-words", "--escape", "--with-surrogates", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I\\W\\W\\W\\W\\W\\W\\W36\\Wand\\W\\u{663}\\Wand\\Wy\\u{306}y\\u{306}\\Wand\\W\\W\\W\\W$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--non-words", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\W\W\W\W\W\W\W36\Wand\W٣\Wand\Wy̆y̆\Wand\W\W\W\W $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--non-words", "--escape", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\W\W\W\W\W\W\W36\Wand\W\u{663}\Wand\Wy\u{306}y\u{306}\Wand\W\W\W\W $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--non-words", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\W\W\W\W\W\W\W36\Wand\W\u{663}\Wand\Wy\u{306}y\u{306}\Wand\W\W\W\W $ "# ))); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--repetitions", "--non-words", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I\\W{7}36\\Wand\\W٣\\Wand\\W(?:y̆){2}\\Wand\\W{4}$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--non-words", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^I\\W{7}36\\Wand\\W\\u{663}\\Wand\\W(?:y\\u{306}){2}\\Wand\\W{4}$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-words", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^I\\W{7}36\\Wand\\W\\u{663}\\Wand\\W(?:y\\u{306}){2}\\Wand\\W{4}$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--repetitions", "--non-words", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\W{7}36\Wand\W٣\Wand\W (?: y̆ ){2} \Wand\W{4} $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-words", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\W{7}36\Wand\W\u{663}\Wand\W (?: y\u{306} ){2} \Wand\W{4} $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-words", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\W{7}36\Wand\W\u{663}\Wand\W (?: y\u{306} ){2} \Wand\W{4} $ "# ))); } } } mod non_digit_non_space_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--non-digits", "--non-spaces", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\D\\D\\D\\D\\D\\D\\D\\D\\S\\S\\D\\D\\D\\D\\D\\S\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--non-digits", "--non-spaces", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\D\\D\\D\\D\\D\\D\\D\\D\\S\\S\\D\\D\\D\\D\\D\\S\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--non-digits", "--non-spaces", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\D\\D\\D\\D\\D\\D\\D\\D\\S\\S\\D\\D\\D\\D\\D\\S\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--non-digits", "--non-spaces", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D\D\D\D\D\D\D\D\S\S\D\D\D\D\D\S\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--non-digits", "--non-spaces", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D\D\D\D\D\D\D\D\S\S\D\D\D\D\D\S\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--non-digits", "--non-spaces", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D\D\D\D\D\D\D\D\S\S\D\D\D\D\D\S\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D $ "# ))); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--repetitions", "--non-digits", "--non-spaces", TEST_CASE]); grex.assert() .success() .stdout(predicate::eq("^\\D{8}\\S{2}\\D{5}\\S\\D{17}$\n")); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--non-spaces", "--escape", TEST_CASE, ]); grex.assert() .success() .stdout(predicate::eq("^\\D{8}\\S{2}\\D{5}\\S\\D{17}$\n")); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--non-spaces", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert() .success() .stdout(predicate::eq("^\\D{8}\\S{2}\\D{5}\\S\\D{17}$\n")); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--non-spaces", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D{8}\S{2}\D{5}\S\D{17} $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--non-spaces", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D{8}\S{2}\D{5}\S\D{17} $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--non-spaces", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D{8}\S{2}\D{5}\S\D{17} $ "# ))); } } } mod non_digit_non_word_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--non-digits", "--non-words", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\D\\D\\D\\D\\D\\D\\D\\D36\\D\\D\\D\\D\\D٣\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--non-digits", "--non-words", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\D\\D\\D\\D\\D\\D\\D\\D36\\D\\D\\D\\D\\D\\u{663}\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--non-digits", "--non-words", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\D\\D\\D\\D\\D\\D\\D\\D36\\D\\D\\D\\D\\D\\u{663}\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--non-digits", "--non-words", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D\D\D\D\D\D\D\D36\D\D\D\D\D٣\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--non-digits", "--non-words", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D\D\D\D\D\D\D\D36\D\D\D\D\D\u{663}\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--non-digits", "--non-words", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D\D\D\D\D\D\D\D36\D\D\D\D\D\u{663}\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D $ "# ))); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--repetitions", "--non-digits", "--non-words", TEST_CASE]); grex.assert() .success() .stdout(predicate::eq("^\\D{8}36\\D{5}٣\\D{17}$\n")); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--non-words", "--escape", TEST_CASE, ]); grex.assert() .success() .stdout(predicate::eq("^\\D{8}36\\D{5}\\u{663}\\D{17}$\n")); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--non-words", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert() .success() .stdout(predicate::eq("^\\D{8}36\\D{5}\\u{663}\\D{17}$\n")); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--non-words", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D{8}36\D{5}٣\D{17} $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--non-words", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D{8}36\D{5}\u{663}\D{17} $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--non-words", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D{8}36\D{5}\u{663}\D{17} $ "# ))); } } } mod non_space_non_word_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--non-spaces", "--non-words", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\S\\W\\W\\W\\W\\W\\W\\W\\S\\S\\W\\S\\S\\S\\W\\S\\W\\S\\S\\S\\W\\S\\S\\S\\S\\W\\S\\S\\S\\W\\W\\W\\W$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--non-spaces", "--non-words", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\S\\W\\W\\W\\W\\W\\W\\W\\S\\S\\W\\S\\S\\S\\W\\S\\W\\S\\S\\S\\W\\S\\S\\S\\S\\W\\S\\S\\S\\W\\W\\W\\W$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--non-spaces", "--non-words", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\S\\W\\W\\W\\W\\W\\W\\W\\S\\S\\W\\S\\S\\S\\W\\S\\W\\S\\S\\S\\W\\S\\S\\S\\S\\W\\S\\S\\S\\W\\W\\W\\W$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--non-spaces", "--non-words", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\W\W\W\W\W\W\W\S\S\W\S\S\S\W\S\W\S\S\S\W\S\S\S\S\W\S\S\S\W\W\W\W $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--non-spaces", "--non-words", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\W\W\W\W\W\W\W\S\S\W\S\S\S\W\S\W\S\S\S\W\S\S\S\S\W\S\S\S\W\W\W\W $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--non-spaces", "--non-words", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\W\W\W\W\W\W\W\S\S\W\S\S\S\W\S\W\S\S\S\W\S\S\S\S\W\S\S\S\W\W\W\W $ "# ))); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--repetitions", "--non-spaces", "--non-words", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\S\\W{7}\\S(?:\\S\\W\\S{3}\\W){2}\\S{4}\\W\\S{3}\\W{4}$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-spaces", "--non-words", "--escape", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\S\\W{7}\\S(?:\\S\\W\\S{3}\\W){2}\\S{4}\\W\\S{3}\\W{4}$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-spaces", "--non-words", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\S\\W{7}\\S(?:\\S\\W\\S{3}\\W){2}\\S{4}\\W\\S{3}\\W{4}$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-spaces", "--non-words", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\W{7}\S (?: \S\W\S{3}\W ){2} \S{4}\W\S{3}\W{4} $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-spaces", "--non-words", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\W{7}\S (?: \S\W\S{3}\W ){2} \S{4}\W\S{3}\W{4} $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-spaces", "--non-words", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\W{7}\S (?: \S\W\S{3}\W ){2} \S{4}\W\S{3}\W{4} $ "# ))); } } } mod non_digit_non_space_non_word_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--non-digits", "--non-spaces", "--non-words", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\D\\D\\D\\D\\D\\D\\D\\D\\S\\S\\D\\D\\D\\D\\D\\S\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&[ "--non-digits", "--non-spaces", "--non-words", "--escape", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\D\\D\\D\\D\\D\\D\\D\\D\\S\\S\\D\\D\\D\\D\\D\\S\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--non-digits", "--non-spaces", "--non-words", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\D\\D\\D\\D\\D\\D\\D\\D\\S\\S\\D\\D\\D\\D\\D\\S\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--non-digits", "--non-spaces", "--non-words", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D\D\D\D\D\D\D\D\S\S\D\D\D\D\D\S\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--non-digits", "--non-spaces", "--non-words", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D\D\D\D\D\D\D\D\S\S\D\D\D\D\D\S\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--non-digits", "--non-spaces", "--non-words", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D\D\D\D\D\D\D\D\S\S\D\D\D\D\D\S\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D $ "# ))); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--non-spaces", "--non-words", TEST_CASE, ]); grex.assert() .success() .stdout(predicate::eq("^\\D{8}\\S{2}\\D{5}\\S\\D{17}$\n")); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--non-spaces", "--non-words", "--escape", TEST_CASE, ]); grex.assert() .success() .stdout(predicate::eq("^\\D{8}\\S{2}\\D{5}\\S\\D{17}$\n")); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--non-spaces", "--non-words", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert() .success() .stdout(predicate::eq("^\\D{8}\\S{2}\\D{5}\\S\\D{17}$\n")); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--non-spaces", "--non-words", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D{8}\S{2}\D{5}\S\D{17} $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--non-spaces", "--non-words", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D{8}\S{2}\D{5}\S\D{17} $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--non-digits", "--non-spaces", "--non-words", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D{8}\S{2}\D{5}\S\D{17} $ "# ))); } } } mod digit_non_digit_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--digits", "--non-digits", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\D\\D\\D\\D\\D\\D\\D\\D\\d\\d\\D\\D\\D\\D\\D\\d\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--digits", "--non-digits", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\D\\D\\D\\D\\D\\D\\D\\D\\d\\d\\D\\D\\D\\D\\D\\d\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--digits", "--non-digits", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\D\\D\\D\\D\\D\\D\\D\\D\\d\\d\\D\\D\\D\\D\\D\\d\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--digits", "--non-digits", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D\D\D\D\D\D\D\D\d\d\D\D\D\D\D\d\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--digits", "--non-digits", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D\D\D\D\D\D\D\D\d\d\D\D\D\D\D\d\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--digits", "--non-digits", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D\D\D\D\D\D\D\D\d\d\D\D\D\D\D\d\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D\D $ "# ))); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--repetitions", "--digits", "--non-digits", TEST_CASE]); grex.assert() .success() .stdout(predicate::eq("^\\D{8}\\d{2}\\D{5}\\d\\D{17}$\n")); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--non-digits", "--escape", TEST_CASE, ]); grex.assert() .success() .stdout(predicate::eq("^\\D{8}\\d{2}\\D{5}\\d\\D{17}$\n")); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--non-digits", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert() .success() .stdout(predicate::eq("^\\D{8}\\d{2}\\D{5}\\d\\D{17}$\n")); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--non-digits", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D{8}\d{2}\D{5}\d\D{17} $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--non-digits", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D{8}\d{2}\D{5}\d\D{17} $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--digits", "--non-digits", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \D{8}\d{2}\D{5}\d\D{17} $ "# ))); } } } mod space_non_space_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--spaces", "--non-spaces", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\S\\s\\s\\s\\S\\S\\S\\s\\S\\S\\s\\S\\S\\S\\s\\S\\s\\S\\S\\S\\s\\S\\S\\S\\S\\s\\S\\S\\S\\s\\S\\S\\S$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--spaces", "--non-spaces", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\S\\s\\s\\s\\S\\S\\S\\s\\S\\S\\s\\S\\S\\S\\s\\S\\s\\S\\S\\S\\s\\S\\S\\S\\S\\s\\S\\S\\S\\s\\S\\S\\S$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--spaces", "--non-spaces", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\S\\s\\s\\s\\S\\S\\S\\s\\S\\S\\s\\S\\S\\S\\s\\S\\s\\S\\S\\S\\s\\S\\S\\S\\S\\s\\S\\S\\S\\s\\S\\S\\S$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--spaces", "--non-spaces", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\s\s\s\S\S\S\s\S\S\s\S\S\S\s\S\s\S\S\S\s\S\S\S\S\s\S\S\S\s\S\S\S $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--spaces", "--non-spaces", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\s\s\s\S\S\S\s\S\S\s\S\S\S\s\S\s\S\S\S\s\S\S\S\S\s\S\S\S\s\S\S\S $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--spaces", "--non-spaces", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\s\s\s\S\S\S\s\S\S\s\S\S\S\s\S\s\S\S\S\s\S\S\S\S\s\S\S\S\s\S\S\S $ "# ))); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--repetitions", "--spaces", "--non-spaces", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\S\\s{3}\\S(?:\\S{2}\\s){2}\\S{3}\\s(?:\\S(?:\\s\\S{3}){2}){2}$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--spaces", "--non-spaces", "--escape", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\S\\s{3}\\S(?:\\S{2}\\s){2}\\S{3}\\s(?:\\S(?:\\s\\S{3}){2}){2}$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--spaces", "--non-spaces", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\S\\s{3}\\S(?:\\S{2}\\s){2}\\S{3}\\s(?:\\S(?:\\s\\S{3}){2}){2}$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--spaces", "--non-spaces", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\s{3}\S (?: \S{2}\s ){2} \S{3}\s (?: \S (?: \s\S{3} ){2} ){2} $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--spaces", "--non-spaces", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\s{3}\S (?: \S{2}\s ){2} \S{3}\s (?: \S (?: \s\S{3} ){2} ){2} $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--spaces", "--non-spaces", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \S\s{3}\S (?: \S{2}\s ){2} \S{3}\s (?: \S (?: \s\S{3} ){2} ){2} $ "# ))); } } } mod word_non_word_conversion { use super::*; mod no_repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--words", "--non-words", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\w\\W\\W\\W\\W\\W\\W\\W\\w\\w\\W\\w\\w\\w\\W\\w\\W\\w\\w\\w\\W\\w\\w\\w\\w\\W\\w\\w\\w\\W\\W\\W\\W$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&["--words", "--non-words", "--escape", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\w\\W\\W\\W\\W\\W\\W\\W\\w\\w\\W\\w\\w\\w\\W\\w\\W\\w\\w\\w\\W\\w\\w\\w\\w\\W\\w\\w\\w\\W\\W\\W\\W$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--words", "--non-words", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\w\\W\\W\\W\\W\\W\\W\\W\\w\\w\\W\\w\\w\\w\\W\\w\\W\\w\\w\\w\\W\\w\\w\\w\\w\\W\\w\\w\\w\\W\\W\\W\\W$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--words", "--non-words", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\W\W\W\W\W\W\W\w\w\W\w\w\w\W\w\W\w\w\w\W\w\w\w\w\W\w\w\w\W\W\W\W $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&["--words", "--non-words", "--escape", "--verbose", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\W\W\W\W\W\W\W\w\w\W\w\w\w\W\w\W\w\w\w\W\w\w\w\w\W\w\w\w\W\W\W\W $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--words", "--non-words", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\W\W\W\W\W\W\W\w\w\W\w\w\w\W\w\W\w\w\w\W\w\w\w\w\W\w\w\w\W\W\W\W $ "# ))); } } mod repetition { use super::*; #[test] fn succeeds() { let mut grex = init_command(); grex.args(&["--repetitions", "--words", "--non-words", TEST_CASE]); grex.assert().success().stdout(predicate::eq( "^\\w\\W{7}\\w(?:\\w\\W\\w{3}\\W){2}\\w{4}\\W\\w{3}\\W{4}$\n", )); } #[test] fn succeeds_with_escape_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--words", "--non-words", "--escape", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\w\\W{7}\\w(?:\\w\\W\\w{3}\\W){2}\\w{4}\\W\\w{3}\\W{4}$\n", )); } #[test] fn succeeds_with_escape_and_surrogate_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--words", "--non-words", "--escape", "--with-surrogates", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq( "^\\w\\W{7}\\w(?:\\w\\W\\w{3}\\W){2}\\w{4}\\W\\w{3}\\W{4}$\n", )); } #[test] fn succeeds_with_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--words", "--non-words", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\W{7}\w (?: \w\W\w{3}\W ){2} \w{4}\W\w{3}\W{4} $ "# ))); } #[test] fn succeeds_with_escape_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--words", "--non-words", "--escape", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\W{7}\w (?: \w\W\w{3}\W ){2} \w{4}\W\w{3}\W{4} $ "# ))); } #[test] fn succeeds_with_escape_and_surrogate_and_verbose_mode_option() { let mut grex = init_command(); grex.args(&[ "--repetitions", "--words", "--non-words", "--escape", "--with-surrogates", "--verbose", TEST_CASE, ]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ \w\W{7}\w (?: \w\W\w{3}\W ){2} \w{4}\W\w{3}\W{4} $ "# ))); } } } mod anchor_conversion { use super::*; mod no_verbose { use super::*; #[test] fn succeeds_with_no_start_anchor_option() { let mut grex = init_command(); grex.args(&["--no-start-anchor", TEST_CASE]); grex.assert() .success() .stdout(predicate::eq("I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩\\.$\n")); } #[test] fn succeeds_with_no_end_anchor_option() { let mut grex = init_command(); grex.args(&["--no-end-anchor", TEST_CASE]); grex.assert() .success() .stdout(predicate::eq("^I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩\\.\n")); } #[test] fn succeeds_with_no_anchors_option() { let mut grex = init_command(); grex.args(&["--no-anchors", TEST_CASE]); grex.assert() .success() .stdout(predicate::eq("I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩\\.\n")); } } mod verbose { use super::*; #[test] fn succeeds_with_verbose_mode_and_no_start_anchor_option() { let mut grex = init_command(); grex.args(&["--verbose", "--no-start-anchor", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) I\ \ \ ♥♥♥\ 36\ and\ ٣\ and\ y̆y̆\ and\ 💩💩\. $ "#, ))); } #[test] fn succeeds_with_verbose_mode_and_no_end_anchor_option() { let mut grex = init_command(); grex.args(&["--verbose", "--no-end-anchor", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) ^ I\ \ \ ♥♥♥\ 36\ and\ ٣\ and\ y̆y̆\ and\ 💩💩\. "#, ))); } #[test] fn succeeds_with_verbose_mode_and_no_anchors_option() { let mut grex = init_command(); grex.args(&["--verbose", "--no-anchors", TEST_CASE]); grex.assert().success().stdout(predicate::eq(indoc!( r#" (?x) I\ \ \ ♥♥♥\ 36\ and\ ٣\ and\ y̆y̆\ and\ 💩💩\. "#, ))); } } } fn init_command() -> Command { Command::new(cargo_bin!()) } ================================================ FILE: tests/lib_integration_tests.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #![cfg(not(target_family = "wasm"))] use grex::RegExpBuilder; use indoc::indoc; use regex::Regex; use rstest::rstest; use std::io::Write; use tempfile::NamedTempFile; mod no_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case(vec![""], "^$"), case(vec![" "], "^ $"), case(vec![" "], "^ $"), case(vec!["["], "^\\[$"), case(vec!["a", "("], "^[(a]$"), case(vec!["a", "\n"], "^[\\na]$"), case(vec!["a", "["], "^[\\[a]$"), case(vec!["a", "-", "c", "!"], "^[!\\-ac]$"), case(vec!["a", "b"], "^[ab]$"), case(vec!["a", "b", "c"], "^[a-c]$"), case(vec!["a", "c", "d", "e", "f"], "^[ac-f]$"), case(vec!["a", "b", "x", "d", "e"], "^[abdex]$"), case(vec!["a", "b", "x", "de"], "^(?:de|[abx])$"), case(vec!["a", "b", "c", "x", "d", "e"], "^[a-ex]$"), case(vec!["a", "b", "c", "x", "de"], "^(?:de|[a-cx])$"), case(vec!["a", "b", "c", "d", "e", "f", "o", "x", "y", "z"], "^[a-fox-z]$"), case(vec!["a", "b", "d", "e", "f", "o", "x", "y", "z"], "^[abd-fox-z]$"), case(vec!["1", "2"], "^[12]$"), case(vec!["1", "2", "3"], "^[1-3]$"), case(vec!["1", "3", "4", "5", "6"], "^[13-6]$"), case(vec!["1", "2", "8", "4", "5"], "^[12458]$"), case(vec!["1", "2", "8", "45"], "^(?:45|[128])$"), case(vec!["1", "2", "3", "8", "4", "5"], "^[1-58]$"), case(vec!["1", "2", "3", "8", "45"], "^(?:45|[1-38])$"), case(vec!["1", "2", "3", "5", "7", "8", "9"], "^[1-357-9]$"), case(vec!["a", "b", "bc"], "^(?:bc?|a)$"), case(vec!["a", "b", "bcd"], "^(?:b(?:cd)?|a)$"), case(vec!["a", "ab", "abc"], "^a(?:bc?)?$"), case(vec!["ac", "bc"], "^[ab]c$"), case(vec!["ab", "ac"], "^a[bc]$"), case(vec!["bc", "abc"], "^a?bc$"), case(vec!["ac", "abc"], "^ab?c$"), case(vec!["abc", "abxyc"], "^ab(?:xy)?c$"), case(vec!["ab", "abc"], "^abc?$"), case(vec!["abx", "cdx"], "^(?:ab|cd)x$"), case(vec!["abd", "acd"], "^a[bc]d$"), case(vec!["abc", "abcd"], "^abcd?$"), case(vec!["abc", "abcde"], "^abc(?:de)?$"), case(vec!["ade", "abcde"], "^a(?:bc)?de$"), case(vec!["abcxy", "adexy"], "^a(?:bc|de)xy$"), case(vec!["axy", "abcxy", "adexy"], "^a(?:(?:bc)?|de)xy$"), // goal: "^a(bc|de)?xy$" case(vec!["abcxy", "abcw", "efgh"], "^(?:abc(?:xy|w)|efgh)$"), case(vec!["abcxy", "efgh", "abcw"], "^(?:abc(?:xy|w)|efgh)$"), case(vec!["efgh", "abcxy", "abcw"], "^(?:abc(?:xy|w)|efgh)$"), case(vec!["abxy", "cxy", "efgh"], "^(?:(?:ab|c)xy|efgh)$"), case(vec!["abxy", "efgh", "cxy"], "^(?:(?:ab|c)xy|efgh)$"), case(vec!["efgh", "abxy", "cxy"], "^(?:(?:ab|c)xy|efgh)$"), case(vec!["aaacaac", "aac"], "^aa(?:acaa)?c$"), case(vec!["a", "ä", "o", "ö", "u", "ü"], "^[aouäöü]$"), case(vec!["y̆", "a", "z"], "^(?:y̆|[az])$"), // goal: "^[az]|y\\u{306}$" case(vec!["a", "b\n", "c"], "^(?:b\\n|[ac])$"), case(vec!["a", "b\\n", "c"], "^(?:b\\\\n|[ac])$"), case(vec!["[a-z]", "(d,e,f)"], "^(?:\\(d,e,f\\)|\\[a\\-z\\])$"), case(vec!["3.5", "4.5", "4,5"], "^(?:3\\.5|4[,.]5)$"), case(vec!["\u{b}"], "^\\v$"), // U+000B Line Tabulation case(vec!["\\u{b}"], "^\\\\u\\{b\\}$"), case(vec!["\u{c}"], "^\\f$"), // U+000C Form Feed case(vec!["\\u{c}"], "^\\\\u\\{c\\}$"), case(vec!["\u{200b}"], "^​$"), case(vec!["I ♥ cake"], "^I ♥ cake$"), case(vec!["I \u{2665} cake"], "^I ♥ cake$"), case(vec!["I \\u{2665} cake"], "^I \\\\u\\{2665\\} cake$"), case(vec!["I \\u2665 cake"], "^I \\\\u2665 cake$"), case(vec!["My ♥ is yours.", "My 💩 is yours."], "^My [♥💩] is yours\\.$"), case(vec!["[\u{c3e}"], "^\\[\u{c3e}$"), case(vec!["\\\u{10376}"], "^\\\\\u{10376}$"), case(vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩\\.$"), case(vec!["\u{890}\0"], "^\u{890}\0$"), case(vec!["\u{890}\\0"], "^\u{890}\\\\0$"), case(vec!["\u{890}\\\0"], "^\u{890}\\\\\0$"), case(vec!["\u{890}\\\\0"], "^\u{890}\\\\\\\\0$"), case(vec!["\\𑇂"], "^\\\\𑇂$"), case(vec!["𑇂\\"], "^𑇂\\\\$") )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases).build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec!["İ"], "(?i)^İ$"), case(vec!["ABC", "abc", "AbC", "aBc"], "(?i)^abc$"), case(vec!["ABC", "zBC", "abc", "AbC", "aBc"], "(?i)^[az]bc$"), case(vec!["Ä@Ö€Ü", "ä@ö€ü", "Ä@ö€Ü", "ä@Ö€ü"], "(?i)^ä@ö€ü$"), )] fn succeeds_with_ignore_case_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_case_insensitive_matching() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec!["My ♥ and 💩 is yours."], "^My \\u{2665} and \\u{1f4a9} is yours\\.$"), case(vec!["My ♥ is yours.", "My 💩 is yours."], "^My (?:\\u{2665}|\\u{1f4a9}) is yours\\.$"), case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I \\u{2665}\\u{2665}\\u{2665} 36 and \\u{663} and y\\u{306}y\\u{306} and \\u{1f4a9}\\u{1f4a9}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec!["My ♥ and 💩 is yours."], "^My \\u{2665} and \\u{d83d}\\u{dca9} is yours\\.$"), case(vec!["My ♥ is yours.", "My 💩 is yours."], "^My (?:\\u{2665}|\\u{d83d}\\u{dca9}) is yours\\.$"), case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I \\u{2665}\\u{2665}\\u{2665} 36 and \\u{663} and y\\u{306}y\\u{306} and \\u{d83d}\\u{dca9}\\u{d83d}\\u{dca9}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); } #[rstest(test_cases, expected_output, case(vec!["a", "b", "bc"], "^(bc?|a)$"), case(vec!["a", "b", "bcd"], "^(b(cd)?|a)$"), case(vec!["a", "ab", "abc"], "^a(bc?)?$"), case(vec!["efgh", "abcxy", "abcw"], "^(abc(xy|w)|efgh)$"), )] fn succeeds_with_capturing_groups_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_capturing_groups() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec![""], indoc!( r#" (?x) ^ $"# )), case(vec![" "], indoc!( r#" (?x) ^ \ $"# )), case(vec![" "], indoc!( r#" (?x) ^ \ \ \ $"# )), case(vec!["\u{200b}"], indoc!( r#" (?x) ^ ​ $"# )), case(vec!["a", "b", "c"], indoc!( r#" (?x) ^ [a-c] $"# )), case(vec!["a", "b", "bc"], indoc!( r#" (?x) ^ (?: bc? | a ) $"# )), case(vec!["a", "ab", "abc"], indoc!( r#" (?x) ^ a (?: bc? )? $"# )), case(vec!["a", "b", "bcd"], indoc!( r#" (?x) ^ (?: b (?: cd )? | a ) $"# )), case(vec!["a", "b", "x", "de"], indoc!( r#" (?x) ^ (?: de | [abx] ) $"# )), case(vec!["[a-z]", "(d,e,f)"], indoc!( r#" (?x) ^ (?: \(d,e,f\) | \[a\-z\] ) $"# )), case(vec!["3.5", "4.5", "4,5"], indoc!( r#" (?x) ^ (?: 3\.5 | 4[,.]5 ) $"# )), case(vec!["Ga", "G)"], indoc!( r#" (?x) ^ G[)a] $"# )), case(vec!["aG", ")G"], indoc!( r#" (?x) ^ [)a]G $"# )), case(vec!["Ga", "G)", "G("], indoc!( r#" (?x) ^ G[()a] $"# )), case(vec!["aG", ")G", "(G"], indoc!( r#" (?x) ^ [()a]G $"# )), case(vec!["aaacaac", "aac"], indoc!( r#" (?x) ^ aa (?: acaa )? c $"# )), )] fn succeeds_with_verbose_mode_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases).with_verbose_mode().build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec!["İ"], indoc!( r#" (?ix) ^ İ $"# )), case(vec!["ABC", "abc", "AbC", "aBc"], indoc!( r#" (?ix) ^ abc $"# )), case(vec!["ABC", "zBC", "abc", "AbC", "aBc"], indoc!( r#" (?ix) ^ [az]bc $"# )), case(vec!["Ä@Ö€Ü", "ä@ö€ü", "Ä@ö€Ü", "ä@Ö€ü"], indoc!( r#" (?ix) ^ ä@ö€ü $"# )) )] fn succeeds_with_ignore_case_and_verbose_mode_option( test_cases: Vec<&str>, expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) .with_case_insensitive_matching() .with_verbose_mode() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[test] fn succeeds_with_file_input() { let mut file = NamedTempFile::new().unwrap(); writeln!(file, "a\nb\nc\r\nxyz").unwrap(); let expected_output = "^(?:xyz|[a-c])$"; let test_cases = vec!["a", "b", "c", "xyz"]; let regexp = RegExpBuilder::from_file(file.path()).build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case(vec![""], "^$"), case(vec![" "], "^ $"), case(vec![" "], "^ {3}$"), case(vec!["a"], "^a$"), case(vec!["aa"], "^a{2}$"), case(vec!["aaa"], "^a{3}$"), case(vec!["aaa aaa"], "^a{3} a{3}$"), case(vec!["ababab ababab"], "^(?:ab){3} (?:ab){3}$"), case(vec!["ababab ababab"], "^(?:ab){3} {2}(?:ab){3}$"), case(vec!["a ababab ababab"], "^a(?: (?:ab){3}){2}$"), case(vec!["ababab ababab a"], "^a(?:b(?:ab){2} a){2}$"), case(vec!["ababababab abab ababab"], "^ababab(?:(?:ab){2} ){2}(?:ab){3}$"), case(vec!["a", "aa"], "^a{1,2}$"), case(vec!["aaa", "a", "aa"], "^a{1,3}$"), case(vec!["aaaa", "a", "aa"], "^(?:a{1,2}|a{4})$"), case(vec!["a", "aa", "aaa", "aaaa", "aaab"], "^(?:a{3}b|a{1,4})$"), case(vec!["baabaaaaaabb"], "^ba{2}ba{6}b{2}$"), case(vec!["aabbaabbaaa"], "^(?:a{2}b{2}){2}a{3}$"), case(vec!["aabbaa"], "^a{2}b{2}a{2}$"), case(vec!["aabbabb"], "^a(?:ab{2}){2}$"), case(vec!["ababab"], "^(?:ab){3}$"), case(vec!["abababa"], "^a(?:ba){3}$"), case(vec!["aababab"], "^a(?:ab){3}$"), case(vec!["abababaa"], "^(?:ab){3}a{2}$"), case(vec!["aaaaaabbbbb"], "^a{6}b{5}$"), case(vec!["aabaababab"], "^a{2}ba(?:ab){3}$"), case(vec!["aaaaaaabbbbbba"], "^a{7}b{6}a$"), case(vec!["abaaaabaaba"], "^abaaa(?:aba){2}$"), case(vec!["bbaababb"], "^b{2}a{2}bab{2}$"), case(vec!["b", "ba"], "^ba?$"), case(vec!["b", "ba", "baa"], "^b(?:a{1,2})?$"), case(vec!["b", "ba", "baaa", "baa"], "^b(?:a{1,3})?$"), case(vec!["b", "ba", "baaaa", "baa"], "^b(?:a{1,2}|a{4})?$"), case(vec!["axy", "abcxyxy", "adexy"], "^a(?:(?:de)?xy|bc(?:xy){2})$"), case(vec!["xy̆y̆y̆y̆z"], "^x(?:y̆){4}z$"), case(vec!["xy̆y̆z", "xy̆y̆y̆z"], "^x(?:y̆){2,3}z$"), case(vec!["xy̆y̆z", "xy̆y̆y̆y̆z"], "^x(?:(?:y̆){2}|(?:y̆){4})z$"), case(vec!["zyxx", "yxx"], "^z?yx{2}$"), case(vec!["zyxx", "yxx", "yxxx"], "^(?:zyx{2}|yx{2,3})$"), case(vec!["zyxxx", "yxx", "yxxx"], "^(?:zyx{3}|yx{2,3})$"), case(vec!["a", "b\n\n", "c"], "^(?:b\\n{2}|[ac])$"), case(vec!["a", "b\nb\nb", "c"], "^(?:b(?:\\nb){2}|[ac])$"), case(vec!["a", "b\nx\nx", "c"], "^(?:b(?:\\nx){2}|[ac])$"), case(vec!["a", "b\n\t\n\t", "c"], "^(?:b(?:\\n\\t){2}|[ac])$"), case(vec!["a", "b\n", "b\n\n", "b\n\n\n", "c"], "^(?:b\\n{1,3}|[ac])$"), case(vec!["4.5", "3.55"], "^(?:4\\.5|3\\.5{2})$"), case(vec!["4.5", "4.55"], "^4\\.5{1,2}$"), case(vec!["4.5", "4.55", "3.5"], "^(?:3\\.5|4\\.5{1,2})$"), case(vec!["4.5", "44.5", "44.55", "4.55"], "^4{1,2}\\.5{1,2}$"), case(vec!["I ♥♥ cake"], "^I ♥{2} cake$"), case(vec!["I ♥ cake", "I ♥♥ cake"], "^I ♥{1,2} cake$"), case(vec!["I \u{2665}\u{2665} cake"], "^I ♥{2} cake$"), case(vec!["I \\u{2665} cake"], "^I \\\\u\\{26{2}5\\} cake$"), case(vec!["I \\u{2665}\\u{2665} cake"], "^I (?:\\\\u\\{26{2}5\\}){2} cake$"), case(vec!["I \\u2665\\u2665 cake"], "^I (?:\\\\u26{2}5){2} cake$"), case(vec!["My ♥♥♥ is yours.", "My 💩💩 is yours."], "^My (?:💩{2}|♥{3}) is yours\\.$"), case(vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I {3}♥{3} 36 and ٣ and (?:y̆){2} and 💩{2}\\.$") )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec!["İ", "İİ"], "(?i)^İ{1,2}$"), case(vec!["AAAAB", "aaaab", "AaAaB", "aAaAB"], "(?i)^a{4}b$"), case(vec!["ÄÖÜäöü@Ö€", "äöüÄöÜ@ö€"], "(?i)^(?:äöü){2}@ö€$"), )] fn succeeds_with_ignore_case_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_case_insensitive_matching() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec!["My ♥♥♥ and 💩💩 is yours."], "^My \\u{2665}{3} and \\u{1f4a9}{2} is yours\\.$"), case(vec!["My ♥♥♥ is yours.", "My 💩💩 is yours."], "^My (?:\\u{1f4a9}{2}|\\u{2665}{3}) is yours\\.$"), case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I {3}\\u{2665}{3} 36 and \\u{663} and (?:y\\u{306}){2} and \\u{1f4a9}{2}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec!["My ♥♥♥ and 💩💩 is yours."], "^My \\u{2665}{3} and (?:\\u{d83d}\\u{dca9}){2} is yours\\.$"), case(vec!["My ♥♥♥ is yours.", "My 💩💩 is yours."], "^My (?:(?:\\u{d83d}\\u{dca9}){2}|\\u{2665}{3}) is yours\\.$"), case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I {3}\\u{2665}{3} 36 and \\u{663} and (?:y\\u{306}){2} and (?:\\u{d83d}\\u{dca9}){2}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); } #[rstest(test_cases, expected_output, case(vec![" "], indoc!( r#" (?x) ^ \ {3} $"# )), case(vec!["aa"], indoc!( r#" (?x) ^ a{2} $"# )), case(vec!["aaa", "a", "aa"], indoc!( r#" (?x) ^ a{1,3} $"# )), case(vec!["aaaa", "a", "aa"], indoc!( r#" (?x) ^ (?: a{1,2} | a{4} ) $"# )), case(vec!["ababab"], indoc!( r#" (?x) ^ (?: ab ){3} $"# )), case(vec!["abababa"], indoc!( r#" (?x) ^ a (?: ba ){3} $"# )), case(vec!["abababaa"], indoc!( r#" (?x) ^ (?: ab ){3} a{2} $"# )), case(vec!["aabaababab"], indoc!( r#" (?x) ^ a{2}ba (?: ab ){3} $"# )), case(vec!["abaaaabaaba"], indoc!( r#" (?x) ^ abaaa (?: aba ){2} $"# )), case(vec!["xy̆y̆z", "xy̆y̆y̆y̆z"], indoc!( r#" (?x) ^ x (?: (?: y̆ ){2} | (?: y̆ ){4} ) z $"# )), case(vec!["a", "b\n\t\n\t", "c"], indoc!( r#" (?x) ^ (?: b (?: \n\t ){2} | [ac] ) $"# )), case(vec!["My ♥♥♥ is yours.", "My 💩💩 is yours."], indoc!( r#" (?x) ^ My\ (?: 💩{2} | ♥{3} ) \ is\ yours\. $"# )) )] fn succeeds_with_verbose_mode_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_verbose_mode() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec![""], "^$"), case(vec![" "], "^ $"), case(vec![" "], "^ $"), case(vec![" "], "^ {4}$"), case(vec![" "], "^ {6}$"), case(vec!["a"], "^a$"), case(vec!["aa"], "^aa$"), case(vec!["aaa"], "^aaa$"), case(vec!["aaaa"], "^a{4}$"), case(vec!["aaaaa"], "^a{5}$"), case(vec!["ababababab abab ababab"], "^(?:ab){5} abab ababab$"), case(vec!["aabbaaaabbbabbbbba"], "^aabba{4}bbbab{5}a$"), case(vec!["baabaaaaaabb"], "^baaba{6}bb$"), case(vec!["ababab"], "^ababab$"), case(vec!["abababab"], "^(?:ab){4}$"), case(vec!["abababa"], "^abababa$"), case(vec!["ababababa"], "^a(?:ba){4}$"), case(vec!["aababab"], "^aababab$"), case(vec!["aabababab"], "^a(?:ab){4}$"), case(vec!["xy̆y̆z", "xy̆y̆y̆y̆z"], "^x(?:y̆y̆|(?:y̆){4})z$"), case(vec!["aaa", "a", "aa"], "^a(?:aa?)?$"), case(vec!["a", "aa", "aaa", "aaaa"], "^(?:aaa|aa?|a{4})$"), case(vec!["a", "aa", "aaa", "aaaa", "aaaaa", "aaaaaa"], "^(?:aaa|aa?|a{4,6})$") )] fn succeeds_with_increased_minimum_repetitions( test_cases: Vec<&str>, expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_minimum_repetitions(3) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec!["aaa"], "^aaa$"), case(vec!["ababab"], "^ababab$"), case(vec!["abcabcabc"], "^(?:abc){3}$"), case(vec!["abcabcabc", "dede"], "^(?:dede|(?:abc){3})$"), case(vec!["abcabcabc", "defgdefg"], "^(?:(?:defg){2}|(?:abc){3})$"), case(vec!["ababababab abab ababab"], "^ababab(?:abab ){2}ababab$") )] fn succeeds_with_increased_minimum_substring_length( test_cases: Vec<&str>, expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_minimum_substring_length(3) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec!["abababab"], "^abababab$"), case(vec!["abcabcabc"], "^abcabcabc$"), case(vec!["abcabcabcabc"], "^(?:abc){4}$"), case(vec!["aaaaaaaaaaaa"], "^aaaaaaaaaaaa$"), case(vec!["abababab", "abcabcabcabc"], "^(?:abababab|(?:abc){4})$"), case(vec!["ababababab abab ababab"], "^ababababab abab ababab$") )] fn succeeds_with_increased_minimum_repetitions_and_substring_length( test_cases: Vec<&str>, expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_minimum_repetitions(3) .with_minimum_substring_length(3) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } } mod digit_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case(vec![""], "^$"), case(vec!["a"], "^a$"), case(vec!["1"], "^\\d$"), case(vec!["-1"], "^\\-\\d$"), case(vec!["12"], "^\\d\\d$"), case(vec!["1", "2"], "^\\d$"), case(vec!["1", "23"], "^\\d(?:\\d)?$"), case(vec!["1", "234"], "^\\d(?:\\d\\d)?$"), case(vec!["8", "234"], "^\\d(?:\\d\\d)?$"), case(vec!["890", "34"], "^\\d\\d(?:\\d)?$"), case(vec!["abc123"], "^abc\\d\\d\\d$"), case(vec!["a1b2c3"], "^a\\db\\dc\\d$"), case(vec!["abc", "123"], "^(?:\\d\\d\\d|abc)$"), case(vec!["١", "٣", "٥"], "^\\d$"), // Arabic digits: ١ = 1, ٣ = 3, ٥ = 5 case(vec!["١٣٥"], "^\\d\\d\\d$"), case(vec!["a٣3", "b5٥"], "^[ab]\\d\\d$"), case(vec!["I ♥ 123"], "^I ♥ \\d\\d\\d$"), case(vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I ♥♥♥ \\d\\d and \\d and y̆y̆ and 💩💩\\.$") )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_digits() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I \\u{2665}\\u{2665}\\u{2665} \\d\\d and \\d and y\\u{306}y\\u{306} and \\u{1f4a9}\\u{1f4a9}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_digits() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I \\u{2665}\\u{2665}\\u{2665} \\d\\d and \\d and y\\u{306}y\\u{306} and \\u{d83d}\\u{dca9}\\u{d83d}\\u{dca9}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_digits() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I {3}♥{3} \\d(?:\\d and ){2}(?:y̆){2} and 💩{2}\\.$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_digits() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I {3}\\u{2665}{3} \\d(?:\\d and ){2}(?:y\\u{306}){2} and \\u{1f4a9}{2}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_digits() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I {3}\\u{2665}{3} \\d(?:\\d and ){2}(?:y\\u{306}){2} and (?:\\u{d83d}\\u{dca9}){2}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_digits() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); } #[rstest(test_cases, expected_output, case(vec!["1"], "^\\d$"), case(vec!["12"], "^\\d\\d$"), case(vec!["123"], "^\\d{3}$"), case(vec!["1", "12", "123"], "^(?:\\d\\d|\\d|\\d{3})$"), case(vec!["12", "123", "1234"], "^(?:\\d\\d|\\d{3,4})$"), case(vec!["123", "1234", "12345"], "^\\d{3,5}$"), case(vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I {3}♥{3} \\d\\d and \\d and y̆y̆ and 💩💩\\.$") )] fn succeeds_with_increased_minimum_repetitions( test_cases: Vec<&str>, expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_digits() .with_minimum_repetitions(2) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } } mod space_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case(vec![""], "^$"), case(vec![" "], "^\\s$"), case(vec![" "], "^\\s\\s\\s$"), case(vec!["\n"], "^\\s$"), case(vec!["\u{c}"], "^\\s$"), // form feed \f case(vec!["\u{b}"], "^\\s$"), // vertical tab \v case(vec!["\n", "\r"], "^\\s$"), case(vec!["\n\t", "\r"], "^\\s(?:\\s)?$"), case(vec!["a"], "^a$"), case(vec!["1"], "^1$"), case(vec!["I ♥ 123"], "^I\\s♥\\s123$"), case(vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\s\\s\\s♥♥♥\\s36\\sand\\s٣\\sand\\sy̆y̆\\sand\\s💩💩\\.$") )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\s\\s\\s\\u{2665}\\u{2665}\\u{2665}\\s36\\sand\\s\\u{663}\\sand\\sy\\u{306}y\\u{306}\\sand\\s\\u{1f4a9}\\u{1f4a9}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_whitespace() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\s\\s\\s\\u{2665}\\u{2665}\\u{2665}\\s36\\sand\\s\\u{663}\\sand\\sy\\u{306}y\\u{306}\\sand\\s\\u{d83d}\\u{dca9}\\u{d83d}\\u{dca9}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_whitespace() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\s{3}♥{3}\\s36\\sand\\s٣\\sand\\s(?:y̆){2}\\sand\\s💩{2}\\.$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\s{3}\\u{2665}{3}\\s36\\sand\\s\\u{663}\\sand\\s(?:y\\u{306}){2}\\sand\\s\\u{1f4a9}{2}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_whitespace() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\s{3}\\u{2665}{3}\\s36\\sand\\s\\u{663}\\sand\\s(?:y\\u{306}){2}\\sand\\s(?:\\u{d83d}\\u{dca9}){2}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_whitespace() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); } #[rstest(test_cases, expected_output, case(vec![" "], "^\\s$"), case(vec![" "], "^\\s\\s$"), case(vec![" "], "^\\s{3}$"), case(vec![" ", " ", " "], "^(?:\\s\\s|\\s|\\s{3})$"), case(vec![" ", " ", " "], "^(?:\\s\\s|\\s{3,4})$"), case(vec![" ", " ", " "], "^\\s{3,5}$"), case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\s{3}♥{3}\\s36\\sand\\s٣\\sand\\sy\u{306}y\u{306}\\sand\\s💩💩\\.$" ) )] fn succeeds_with_increased_minimum_repetitions( test_cases: Vec<&str>, expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_whitespace() .with_minimum_repetitions(2) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } } mod word_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case(vec![""], "^$"), case(vec![" "], "^ $"), case(vec!["a"], "^\\w$"), case(vec!["1"], "^\\w$"), case(vec!["-1"], "^\\-\\w$"), case(vec!["1", "2"], "^\\w$"), case(vec!["ä", "ß"], "^\\w$"), case(vec!["abc", "1234"], "^\\w\\w\\w(?:\\w)?$"), case(vec!["١", "٣", "٥"], "^\\w$"), // Arabic digits: ١ = 1, ٣ = 3, ٥ = 5 case(vec!["١٣٥"], "^\\w\\w\\w$"), case(vec!["a٣3", "b5٥"], "^\\w\\w\\w$"), case(vec!["I ♥ 123"], "^\\w ♥ \\w\\w\\w$"), case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w ♥♥♥ \\w\\w \\w\\w\\w \\w \\w\\w\\w \\w\\w\\w\\w \\w\\w\\w 💩💩\\.$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w \\u{2665}\\u{2665}\\u{2665} \\w\\w \\w\\w\\w \\w \\w\\w\\w \\w\\w\\w\\w \\w\\w\\w \\u{1f4a9}\\u{1f4a9}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w \\u{2665}\\u{2665}\\u{2665} \\w\\w \\w\\w\\w \\w \\w\\w\\w \\w\\w\\w\\w \\w\\w\\w \\u{d83d}\\u{dca9}\\u{d83d}\\u{dca9}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_words() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w {3}♥{3} \\w{2}(?: \\w{3} \\w){2}(?:\\w{3} ){2}💩{2}\\.$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w {3}\\u{2665}{3} \\w{2}(?: \\w{3} \\w){2}(?:\\w{3} ){2}\\u{1f4a9}{2}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w {3}\\u{2665}{3} \\w{2}(?: \\w{3} \\w){2}(?:\\w{3} ){2}(?:\\u{d83d}\\u{dca9}){2}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_words() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); } #[rstest(test_cases, expected_output, case(vec!["a"], "^\\w$"), case(vec!["ab"], "^\\w\\w$"), case(vec!["abc"], "^\\w{3}$"), case(vec!["a", "ab", "abc"], "^(?:\\w\\w|\\w|\\w{3})$"), case(vec!["ab", "abc", "abcd"], "^(?:\\w\\w|\\w{3,4})$"), case(vec!["abc", "abcd", "abcde"], "^\\w{3,5}$"), case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w {3}♥{3} \\w\\w \\w{3} \\w \\w{3} \\w{4} \\w{3} 💩💩\\.$" ) )] fn succeeds_with_increased_minimum_repetitions( test_cases: Vec<&str>, expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_words() .with_minimum_repetitions(2) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } } mod digit_space_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\s\\s\\s♥♥♥\\s\\d\\d\\sand\\s\\d\\sand\\sy̆y̆\\sand\\s💩💩\\.$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_digits() .with_conversion_of_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\s\\s\\s\\u{2665}\\u{2665}\\u{2665}\\s\\d\\d\\sand\\s\\d\\sand\\sy\\u{306}y\\u{306}\\sand\\s\\u{1f4a9}\\u{1f4a9}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_digits() .with_conversion_of_whitespace() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\s\\s\\s\\u{2665}\\u{2665}\\u{2665}\\s\\d\\d\\sand\\s\\d\\sand\\sy\\u{306}y\\u{306}\\sand\\s\\u{d83d}\\u{dca9}\\u{d83d}\\u{dca9}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_digits() .with_conversion_of_whitespace() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\s{3}♥{3}\\s\\d(?:\\d\\sand\\s){2}(?:y̆){2}\\sand\\s💩{2}\\.$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_digits() .with_conversion_of_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\s{3}\\u{2665}{3}\\s\\d(?:\\d\\sand\\s){2}(?:y\\u{306}){2}\\sand\\s\\u{1f4a9}{2}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_digits() .with_conversion_of_whitespace() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\s{3}\\u{2665}{3}\\s\\d(?:\\d\\sand\\s){2}(?:y\\u{306}){2}\\sand\\s(?:\\u{d83d}\\u{dca9}){2}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_digits() .with_conversion_of_whitespace() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); } #[rstest(test_cases, expected_output, case(vec!["1\n"], "^\\d\\s$"), case(vec!["1\n1\n"], "^\\d\\s\\d\\s$"), case(vec!["1\n1\n1\n"], "^(?:\\d\\s){3}$"), case(vec!["1\n", "1\n1\n", "1\n1\n1\n"], "^(?:\\d\\s\\d\\s|\\d\\s|(?:\\d\\s){3})$"), case(vec!["1\n1\n", "1\n1\n1\n", "1\n1\n1\n1\n"], "^(?:\\d\\s\\d\\s|(?:\\d\\s){3,4})$"), case(vec!["1\n1\n1\n", "1\n1\n1\n1\n", "1\n1\n1\n1\n1\n"], "^(?:\\d\\s){3,5}$"), case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\s{3}♥{3}\\s\\d\\d\\sand\\s\\d\\sand\\sy̆y̆\\sand\\s💩💩\\.$" ) )] fn succeeds_with_increased_minimum_repetitions( test_cases: Vec<&str>, expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_digits() .with_conversion_of_whitespace() .with_minimum_repetitions(2) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec!["1\n1\n"], "^1\\n1\\n$"), case(vec!["1\n\n1\n\n"], "^(?:1\\n\\n){2}$") )] fn succeeds_with_increased_minimum_substring_length( test_cases: Vec<&str>, expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_minimum_substring_length(3) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec!["1\n1\n"], "^1\\n1\\n$"), case(vec!["1\n1\n1\n"], "^1\\n1\\n1\\n$"), case(vec!["1\n\n1\n\n"], "^1\\n\\n1\\n\\n$"), case(vec!["1\n\n1\n\n1\n\n"], "^(?:1\\n\\n){3}$") )] fn succeeds_with_increased_minimum_repetitions_and_substring_length( test_cases: Vec<&str>, expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_minimum_repetitions(2) .with_minimum_substring_length(3) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } } mod digit_word_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w ♥♥♥ \\d\\d \\w\\w\\w \\d \\w\\w\\w \\w\\w\\w\\w \\w\\w\\w 💩💩\\.$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_digits() .with_conversion_of_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w \\u{2665}\\u{2665}\\u{2665} \\d\\d \\w\\w\\w \\d \\w\\w\\w \\w\\w\\w\\w \\w\\w\\w \\u{1f4a9}\\u{1f4a9}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_digits() .with_conversion_of_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w \\u{2665}\\u{2665}\\u{2665} \\d\\d \\w\\w\\w \\d \\w\\w\\w \\w\\w\\w\\w \\w\\w\\w \\u{d83d}\\u{dca9}\\u{d83d}\\u{dca9}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_digits() .with_conversion_of_words() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w {3}♥{3} \\d(?:\\d \\w{3} ){2}\\w(?:\\w{3} ){2}💩{2}\\.$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_digits() .with_conversion_of_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w {3}\\u{2665}{3} \\d(?:\\d \\w{3} ){2}\\w(?:\\w{3} ){2}\\u{1f4a9}{2}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_digits() .with_conversion_of_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w {3}\\u{2665}{3} \\d(?:\\d \\w{3} ){2}\\w(?:\\w{3} ){2}(?:\\u{d83d}\\u{dca9}){2}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_digits() .with_conversion_of_words() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); } } } mod space_word_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w\\s\\s\\s♥♥♥\\s\\w\\w\\s\\w\\w\\w\\s\\w\\s\\w\\w\\w\\s\\w\\w\\w\\w\\s\\w\\w\\w\\s💩💩\\.$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_whitespace() .with_conversion_of_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w\\s\\s\\s\\u{2665}\\u{2665}\\u{2665}\\s\\w\\w\\s\\w\\w\\w\\s\\w\\s\\w\\w\\w\\s\\w\\w\\w\\w\\s\\w\\w\\w\\s\\u{1f4a9}\\u{1f4a9}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_whitespace() .with_conversion_of_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w\\s\\s\\s\\u{2665}\\u{2665}\\u{2665}\\s\\w\\w\\s\\w\\w\\w\\s\\w\\s\\w\\w\\w\\s\\w\\w\\w\\w\\s\\w\\w\\w\\s\\u{d83d}\\u{dca9}\\u{d83d}\\u{dca9}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_whitespace() .with_conversion_of_words() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w\\s{3}♥{3}\\s\\w{2}(?:\\s\\w{3}\\s\\w){2}(?:\\w{3}\\s){2}💩{2}\\.$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_whitespace() .with_conversion_of_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w\\s{3}\\u{2665}{3}\\s\\w{2}(?:\\s\\w{3}\\s\\w){2}(?:\\w{3}\\s){2}\\u{1f4a9}{2}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_whitespace() .with_conversion_of_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w\\s{3}\\u{2665}{3}\\s\\w{2}(?:\\s\\w{3}\\s\\w){2}(?:\\w{3}\\s){2}(?:\\u{d83d}\\u{dca9}){2}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_whitespace() .with_conversion_of_words() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); } } } mod digit_space_word_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w\\s\\s\\s♥♥♥\\s\\d\\d\\s\\w\\w\\w\\s\\d\\s\\w\\w\\w\\s\\w\\w\\w\\w\\s\\w\\w\\w\\s💩💩\\.$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_digits() .with_conversion_of_whitespace() .with_conversion_of_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w\\s\\s\\s\\u{2665}\\u{2665}\\u{2665}\\s\\d\\d\\s\\w\\w\\w\\s\\d\\s\\w\\w\\w\\s\\w\\w\\w\\w\\s\\w\\w\\w\\s\\u{1f4a9}\\u{1f4a9}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_digits() .with_conversion_of_whitespace() .with_conversion_of_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w\\s\\s\\s\\u{2665}\\u{2665}\\u{2665}\\s\\d\\d\\s\\w\\w\\w\\s\\d\\s\\w\\w\\w\\s\\w\\w\\w\\w\\s\\w\\w\\w\\s\\u{d83d}\\u{dca9}\\u{d83d}\\u{dca9}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_digits() .with_conversion_of_whitespace() .with_conversion_of_words() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w\\s{3}♥{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w(?:\\w{3}\\s){2}💩{2}\\.$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_digits() .with_conversion_of_whitespace() .with_conversion_of_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w\\s{3}\\u{2665}{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w(?:\\w{3}\\s){2}\\u{1f4a9}{2}\\.$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_digits() .with_conversion_of_whitespace() .with_conversion_of_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w\\s{3}\\u{2665}{3}\\s\\d(?:\\d\\s\\w{3}\\s){2}\\w(?:\\w{3}\\s){2}(?:\\u{d83d}\\u{dca9}){2}\\.$" ) )] fn succeeds_with_escape_and_surrogate_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_digits() .with_conversion_of_whitespace() .with_conversion_of_words() .with_escaping_of_non_ascii_chars(true) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); } } } mod non_digit_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\D\\D\\D\\D\\D\\D\\D\\D36\\D\\D\\D\\D\\D٣\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_non_digits() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\D\\D\\D\\D\\D\\D\\D\\D36\\D\\D\\D\\D\\D\\u{663}\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_non_digits() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case(vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\D{8}36\\D{5}٣\\D{17}$") )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_non_digits() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\D{8}36\\D{5}\\u{663}\\D{17}$") )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_non_digits() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } } mod non_space_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\S \\S\\S\\S \\S\\S \\S\\S\\S \\S \\S\\S\\S \\S\\S\\S\\S \\S\\S\\S \\S\\S\\S$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_non_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\S {3}\\S(?:\\S{2} ){2}\\S{3} (?:\\S(?: \\S{3}){2}){2}$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_non_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } } mod non_word_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\W\\W\\W\\W\\W\\W\\W36\\Wand\\W٣\\Wand\\Wy̆y̆\\Wand\\W\\W\\W\\W$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\W\\W\\W\\W\\W\\W\\W36\\Wand\\W\\u{663}\\Wand\\Wy\\u{306}y\\u{306}\\Wand\\W\\W\\W\\W$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_non_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\W{7}36\\Wand\\W٣\\Wand\\W(?:y̆){2}\\Wand\\W{4}$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^I\\W{7}36\\Wand\\W\\u{663}\\Wand\\W(?:y\\u{306}){2}\\Wand\\W{4}$" ) )] fn succeeds_with_escape_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_non_words() .with_escaping_of_non_ascii_chars(false) .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } } mod non_digit_non_space_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\D\\D\\D\\D\\D\\D\\D\\D\\S\\S\\D\\D\\D\\D\\D\\S\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_non_digits() .with_conversion_of_non_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case(vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\D{8}\\S{2}\\D{5}\\S\\D{17}$") )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_non_digits() .with_conversion_of_non_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } } mod non_digit_non_word_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\D\\D\\D\\D\\D\\D\\D\\D36\\D\\D\\D\\D\\D٣\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_non_digits() .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case(vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\D{8}36\\D{5}٣\\D{17}$") )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_non_digits() .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } } mod non_space_non_word_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\S\\W\\W\\W\\W\\W\\W\\W\\S\\S\\W\\S\\S\\S\\W\\S\\W\\S\\S\\S\\W\\S\\S\\S\\S\\W\\S\\S\\S\\W\\W\\W\\W$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_non_whitespace() .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\S\\W{7}\\S(?:\\S\\W\\S{3}\\W){2}\\S{4}\\W\\S{3}\\W{4}$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_non_whitespace() .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } } mod non_digit_non_space_non_word_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\D\\D\\D\\D\\D\\D\\D\\D\\S\\S\\D\\D\\D\\D\\D\\S\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_non_digits() .with_conversion_of_non_whitespace() .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case(vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\D{8}\\S{2}\\D{5}\\S\\D{17}$") )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_non_digits() .with_conversion_of_non_whitespace() .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } } mod digit_non_digit_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\D\\D\\D\\D\\D\\D\\D\\D\\d\\d\\D\\D\\D\\D\\D\\d\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D\\D$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_digits() .with_conversion_of_non_digits() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case(vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\D{8}\\d{2}\\D{5}\\d\\D{17}$") )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_digits() .with_conversion_of_non_digits() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } } mod space_non_space_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\S\\s\\s\\s\\S\\S\\S\\s\\S\\S\\s\\S\\S\\S\\s\\S\\s\\S\\S\\S\\s\\S\\S\\S\\S\\s\\S\\S\\S\\s\\S\\S\\S$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_whitespace() .with_conversion_of_non_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\S\\s{3}\\S(?:\\S{2}\\s){2}\\S{3}\\s(?:\\S(?:\\s\\S{3}){2}){2}$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_whitespace() .with_conversion_of_non_whitespace() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } } mod word_non_word_conversion { use super::*; mod no_repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w\\W\\W\\W\\W\\W\\W\\W\\w\\w\\W\\w\\w\\w\\W\\w\\W\\w\\w\\w\\W\\w\\w\\w\\w\\W\\w\\w\\w\\W\\W\\W\\W$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_words() .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } mod repetition { use super::*; #[rstest(test_cases, expected_output, case( vec!["I ♥♥♥ 36 and ٣ and y̆y̆ and 💩💩."], "^\\w\\W{7}\\w(?:\\w\\W\\w{3}\\W){2}\\w{4}\\W\\w{3}\\W{4}$" ) )] fn succeeds(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .with_conversion_of_repetitions() .with_conversion_of_words() .with_conversion_of_non_words() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } } mod anchor_conversion { use super::*; mod no_verbose { use super::*; #[rstest(test_cases, expected_output, case(vec!["aaacaac", "aac"], "aa(?:acaa)?c$"), case(vec!["My ♥♥♥ and 💩💩 is yours."], "My ♥♥♥ and 💩💩 is yours\\.$"), )] fn succeeds_without_start_anchor_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .without_start_anchor() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec!["aaacaac", "aac"], "^aa(?:acaa)?c"), case(vec!["My ♥♥♥ and 💩💩 is yours."], "^My ♥♥♥ and 💩💩 is yours\\."), )] fn succeeds_without_end_anchor_option(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases) .without_end_anchor() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec!["bab", "b", "cb", "bba"], "(?:b(?:ba|ab)?|cb)"), case(vec!["a", "aba", "baaa", "aaab"], "(?:baaa|a(?:aab|ba)?)"), case(vec!["a", "abab", "bbb", "aaac"], "(?:a(?:bab|aac)?|bbb)"), case(vec!["aaacaac", "aac"], "aa(?:acaa)?c"), case(vec!["My ♥♥♥ and 💩💩 is yours."], "My ♥♥♥ and 💩💩 is yours\\."), case( // https://github.com/pemistahl/grex/issues/31 vec!["agbhd", "eibcd", "egbcd", "fbjbf", "agbh", "eibc", "egbc", "ebc", "fbc", "cd", "f", "c", "abcd", "ebcd", "fbcd"], "(?:a(?:gbhd?|bcd)|e(?:ibcd?|gbcd?|bcd?)|f(?:b(?:jbf|cd?))?|cd?)" ) )] fn succeeds_without_anchors(test_cases: Vec<&str>, expected_output: &str) { let regexp = RegExpBuilder::from(&test_cases).without_anchors().build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } mod verbose { use super::*; #[rstest(test_cases, expected_output, case(vec!["My ♥♥♥ and 💩💩 is yours."], indoc!( r#" (?x) My\ ♥♥♥\ and\ 💩💩\ is\ yours\. $"# )) )] fn succeeds_with_verbose_mode_and_without_start_anchor_option( test_cases: Vec<&str>, expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) .with_verbose_mode() .without_start_anchor() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec!["My ♥♥♥ and 💩💩 is yours."], indoc!( r#" (?x) ^ My\ ♥♥♥\ and\ 💩💩\ is\ yours\."# )) )] fn succeeds_with_verbose_mode_and_without_end_anchor_option( test_cases: Vec<&str>, expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) .with_verbose_mode() .without_end_anchor() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } #[rstest(test_cases, expected_output, case(vec!["aaacaac", "aac"], indoc!( r#" (?x) aa (?: acaa )? c"# )), case(vec!["My ♥♥♥ and 💩💩 is yours."], indoc!( r#" (?x) My\ ♥♥♥\ and\ 💩💩\ is\ yours\."# )) )] fn succeeds_with_verbose_mode_and_without_anchors_option( test_cases: Vec<&str>, expected_output: &str, ) { let regexp = RegExpBuilder::from(&test_cases) .with_verbose_mode() .without_anchors() .build(); assert_that_regexp_is_correct(regexp, expected_output, &test_cases); assert_that_regexp_matches_test_cases(expected_output, test_cases); } } } fn assert_that_regexp_is_correct(regexp: String, expected_output: &str, test_cases: &[&str]) { assert_eq!( regexp, expected_output, "\n\ninput: {:?}\nexpected: {}\nactual: {}\n\n", test_cases, expected_output, regexp ); } fn assert_that_regexp_matches_test_cases(expected_output: &str, test_cases: Vec<&str>) { let regexp = Regex::new(expected_output).unwrap(); for test_case in test_cases { let substrings = regexp .find_iter(test_case) .map(|m| m.as_str()) .collect::>(); assert_eq!( substrings.len(), 1, "expression '{}' does not match test case '{}' entirely but {} of its substrings: {:?}", expected_output, test_case, substrings.len(), substrings ); } } ================================================ FILE: tests/property_tests.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #![cfg(not(target_family = "wasm"))] use grex::RegExpBuilder; use proptest::prelude::*; use regex::{Error, Regex, RegexBuilder}; proptest! { #![proptest_config(ProptestConfig::with_cases(500))] #[test] fn valid_regexes_with_default_settings( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec).build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_case_insensitive_matching( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_case_insensitive_matching() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_case_insensitive_matching_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_case_insensitive_matching() .with_verbose_mode() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_escape_sequences( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_escaping_of_non_ascii_chars(false) .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_verbose_mode() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_escape_sequences_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_escaping_of_non_ascii_chars(false) .with_verbose_mode() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_conversion_of_digits( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_digits() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_conversion_of_digits_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_digits() .with_verbose_mode() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_conversion_of_non_digits( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_non_digits() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_conversion_of_non_digits_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_non_digits() .with_verbose_mode() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_conversion_of_whitespace( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_whitespace() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_conversion_of_whitespace_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_whitespace() .with_verbose_mode() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_conversion_of_non_whitespace( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_non_whitespace() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_conversion_of_non_whitespace_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_non_whitespace() .with_verbose_mode() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_conversion_of_words( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_words() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_conversion_of_words_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_words() .with_verbose_mode() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_conversion_of_non_words( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_non_words() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_conversion_of_non_words_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_non_words() .with_verbose_mode() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_conversion_of_repetitions( test_cases in prop::collection::hash_set(".{1,10}", 1..=5), minimum_repetitions in 1..100u32, minimum_substring_length in 1..100u32 ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_repetitions() .with_minimum_repetitions(minimum_repetitions) .with_minimum_substring_length(minimum_substring_length) .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn valid_regexes_with_conversion_of_repetitions_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5), minimum_repetitions in 1..100u32, minimum_substring_length in 1..100u32 ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_repetitions() .with_minimum_repetitions(minimum_repetitions) .with_minimum_substring_length(minimum_substring_length) .with_verbose_mode() .build(); prop_assert!(compile_regexp(®exp).is_ok()); } #[test] fn matching_regexes_with_default_settings( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec).build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_case_insensitive_matching( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_case_insensitive_matching() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_case_insensitive_matching_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_case_insensitive_matching() .with_verbose_mode() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_escape_sequences( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_escaping_of_non_ascii_chars(false) .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_verbose_mode() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_escape_sequences_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_escaping_of_non_ascii_chars(false) .with_verbose_mode() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_conversion_of_digits( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_digits() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_conversion_of_digits_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_digits() .with_verbose_mode() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_conversion_of_non_digits( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_digits() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_conversion_of_non_digits_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_digits() .with_verbose_mode() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_conversion_of_whitespace( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_whitespace() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_conversion_of_whitespace_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_whitespace() .with_verbose_mode() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_conversion_of_non_whitespace( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_non_whitespace() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_conversion_of_non_whitespace_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_non_whitespace() .with_verbose_mode() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_conversion_of_words( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_words() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_conversion_of_words_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_words() .with_verbose_mode() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_conversion_of_non_words( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_non_words() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_conversion_of_non_words_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_non_words() .with_verbose_mode() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_conversion_of_repetitions( test_cases in prop::collection::hash_set(".{1,10}", 1..=5), minimum_repetitions in 1..100u32, minimum_substring_length in 1..100u32 ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_repetitions() .with_minimum_repetitions(minimum_repetitions) .with_minimum_substring_length(minimum_substring_length) .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_with_conversion_of_repetitions_and_verbose_mode( test_cases in prop::collection::hash_set(".{1,10}", 1..=5), minimum_repetitions in 1..100u32, minimum_substring_length in 1..100u32 ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_conversion_of_repetitions() .with_minimum_repetitions(minimum_repetitions) .with_minimum_substring_length(minimum_substring_length) .with_verbose_mode() .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(test_cases.iter().all(|test_case| compiled_regexp.is_match(test_case))); } } #[test] fn matching_regexes_without_start_anchor( test_cases in prop::collection::hash_set("[A-C]{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec).without_start_anchor().build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { for test_case in test_cases_vec { let substrings = compiled_regexp.find_iter(&test_case).map(|m| m.as_str()).collect::>(); prop_assert_eq!( substrings.len(), 1, "expression '{}' does not match test case '{}' entirely but {} of its substrings: {:?}", regexp, test_case, substrings.len(), substrings ); } } } #[test] fn matching_regexes_without_end_anchor( test_cases in prop::collection::hash_set("[A-C]{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec).without_end_anchor().build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { for test_case in test_cases_vec { let substrings = compiled_regexp.find_iter(&test_case).map(|m| m.as_str()).collect::>(); prop_assert_eq!( substrings.len(), 1, "expression '{}' does not match test case '{}' entirely but {} of its substrings: {:?}", regexp, test_case, substrings.len(), substrings ); } } } #[test] fn matching_regexes_without_anchors( test_cases in prop::collection::hash_set("[A-C]{1,10}", 1..=5) ) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec).without_anchors().build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { for test_case in test_cases_vec { let substrings = compiled_regexp.find_iter(&test_case).map(|m| m.as_str()).collect::>(); prop_assert_eq!( substrings.len(), 1, "expression '{}' does not match test case '{}' entirely but {} of its substrings: {:?}", regexp, test_case, substrings.len(), substrings ); } } } #[test] fn regexes_not_matching_other_strings_with_default_settings( test_cases in prop::collection::hash_set(".{1,10}", 1..=5), other_strings in prop::collection::hash_set(".{1,10}", 1..=5) ) { if test_cases.is_disjoint(&other_strings) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec).build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(other_strings.iter().all(|other_string| !compiled_regexp.is_match(other_string))); } } } #[test] fn regexes_not_matching_other_strings_with_escape_sequences( test_cases in prop::collection::hash_set(".{1,10}", 1..=5), other_strings in prop::collection::hash_set(".{1,10}", 1..=5) ) { if test_cases.is_disjoint(&other_strings) { let test_cases_vec = test_cases.iter().cloned().collect::>(); let regexp = RegExpBuilder::from(&test_cases_vec) .with_escaping_of_non_ascii_chars(false) .build(); if let Ok(compiled_regexp) = compile_regexp(®exp) { prop_assert!(other_strings.iter().all(|other_string| !compiled_regexp.is_match(other_string))); } } } } fn compile_regexp(regexp: &str) -> Result { RegexBuilder::new(regexp).size_limit(20000000).build() } ================================================ FILE: tests/python/test_grex.py ================================================ # # Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. # See the License for the specific language governing permissions and # limitations under the License. import inspect import pytest import re from grex import RegExpBuilder @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param(["abc", "abd", "abe"], "^ab[c-e]$"), ] ) def test_default_settings(test_cases, expected_pattern): pattern = RegExpBuilder.from_test_cases(test_cases).build() assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param(["My ♥ and 💩 is yours."], "^My \\u2665 and \\U0001f4a9 is yours\\.$"), ] ) def test_escaping(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .with_escaping_of_non_ascii_chars(use_surrogate_pairs=False) .build()) assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param(["My ♥ and 💩 is yours."], "^My \\u2665 and \\ud83d\\udca9 is yours\\.$"), ] ) def test_escaping_with_surrogate_pairs(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .with_escaping_of_non_ascii_chars(use_surrogate_pairs=True) .build()) assert pattern == expected_pattern # module re does not support matching surrogate pairs @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param(["efgh", "abcxy", "abcw"], "^(abc(xy|w)|efgh)$"), ] ) def test_capturing_groups(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .with_capturing_groups() .build()) assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param(["efgh", "abcxy", "abcw"], "(?:abc(?:xy|w)|efgh)"), ] ) def test_without_anchors(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .without_anchors() .build()) assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param(["ABC", "zBC", "abc", "AbC", "aBc"], "(?i)^[az]bc$"), ] ) def test_case_insensitive_matching(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .with_case_insensitive_matching() .build()) assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param( ["[a-z]", "(d,e,f)"], inspect.cleandoc(""" (?x) ^ (?: \\(d,e,f\\) | \\[a\\-z\\] ) $ """) ), ] ) def test_verbose_mode(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .with_verbose_mode() .build()) assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param( ["Ä@Ö€Ü", "ä@ö€ü", "Ä@ö€Ü", "ä@Ö€ü"], inspect.cleandoc(""" (?ix) ^ ä@ö€ü $ """) ) ] ) def test_case_insensitive_matching_and_verbose_mode(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .with_case_insensitive_matching() .with_verbose_mode() .build()) assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param(["a", "b\nx\nx", "c"], "^(?:b(?:\\nx){2}|[ac])$"), ] ) def test_conversion_of_repetitions(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .with_conversion_of_repetitions() .build()) assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param(["My ♥♥♥ and 💩💩 is yours."], "^My \\u2665{3} and \\U0001f4a9{2} is yours\\.$"), ] ) def test_escaping_and_conversion_of_repetitions(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .with_escaping_of_non_ascii_chars(use_surrogate_pairs=False) .with_conversion_of_repetitions() .build()) assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param(["a1b2c3"], "^a\\db\\dc\\d$"), ] ) def test_conversion_of_digits(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .with_conversion_of_digits() .build()) assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param(["a1b2c3"], "^\\D1\\D2\\D3$"), ] ) def test_conversion_of_non_digits(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .with_conversion_of_non_digits() .build()) assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param(["\n\t", "\r"], "^\\s(?:\\s)?$"), ] ) def test_conversion_of_whitespace(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .with_conversion_of_whitespace() .build()) assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param(["a1 b2 c3"], "^\\S\\S \\S\\S \\S\\S$"), ] ) def test_conversion_of_non_whitespace(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .with_conversion_of_non_whitespace() .build()) assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param(["abc", "1234"], "^\\w\\w\\w(?:\\w)?$"), ] ) def test_conversion_of_words(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .with_conversion_of_words() .build()) assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param(["abc 1234"], "^abc\\W1234$"), ] ) def test_conversion_of_non_words(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .with_conversion_of_non_words() .build()) assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param(["aababab"], "^aababab$"), pytest.param(["aabababab"], "^a(?:ab){4}$") ] ) def test_minimum_repetitions(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .with_conversion_of_repetitions() .with_minimum_repetitions(3) .build()) assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) @pytest.mark.parametrize( "test_cases,expected_pattern", [ pytest.param(["ababab"], "^ababab$"), pytest.param(["abcabcabc"], "^(?:abc){3}$") ] ) def test_minimum_substring_length(test_cases, expected_pattern): pattern = (RegExpBuilder.from_test_cases(test_cases) .with_conversion_of_repetitions() .with_minimum_substring_length(3) .build()) assert pattern == expected_pattern for test_case in test_cases: assert re.match(pattern, test_case) def test_error_for_empty_test_cases(): with pytest.raises(ValueError) as exception_info: RegExpBuilder.from_test_cases([]) assert ( exception_info.value.args[0] == "No test cases have been provided for regular expression generation" ) def test_error_for_invalid_minimum_repetitions(): with pytest.raises(ValueError) as exception_info: RegExpBuilder.from_test_cases(["abcd"]).with_minimum_repetitions(-4) assert ( exception_info.value.args[0] == "Quantity of minimum repetitions must be greater than zero" ) def test_error_for_invalid_minimum_substring_length(): with pytest.raises(ValueError) as exception_info: RegExpBuilder.from_test_cases(["abcd"]).with_minimum_substring_length(-2) assert ( exception_info.value.args[0] == "Minimum substring length must be greater than zero" ) ================================================ FILE: tests/wasm_browser_tests.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #![cfg(target_family = "wasm")] use grex::WasmRegExpBuilder; use indoc::indoc; use wasm_bindgen::JsValue; use wasm_bindgen_test::*; wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser); #[wasm_bindgen_test] fn assert_regexpbuilder_succeeds() { let test_cases = Box::new([JsValue::from("hello"), JsValue::from("world")]); let builder = WasmRegExpBuilder::from(test_cases); assert!(builder.is_ok()); let regexp = builder.unwrap().build(); assert_eq!(regexp, "^(?:hello|world)$"); } #[wasm_bindgen_test] fn assert_regexpbuilder_fails() { let builder = WasmRegExpBuilder::from(Box::new([])); assert_eq!( builder.err(), Some(JsValue::from( "No test cases have been provided for regular expression generation" )) ); } #[wasm_bindgen_test] fn test_conversion_of_digits() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withConversionOfDigits() .build(); assert_eq!(regexp, "^(?:abc |\\d\\d\\d)$"); } #[wasm_bindgen_test] fn test_conversion_of_non_digits() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withConversionOfNonDigits() .build(); assert_eq!(regexp, "^(?:\\D\\D\\D\\D\\D|123)$"); } #[wasm_bindgen_test] fn test_conversion_of_whitespace() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withConversionOfWhitespace() .build(); assert_eq!(regexp, "^(?:abc\\s\\s|123)$"); } #[wasm_bindgen_test] fn test_conversion_of_non_whitespace() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withConversionOfNonWhitespace() .build(); assert_eq!(regexp, "^\\S\\S\\S(?: )?$"); } #[wasm_bindgen_test] fn test_conversion_of_words() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withConversionOfWords() .build(); assert_eq!(regexp, "^\\w\\w\\w(?: )?$"); } #[wasm_bindgen_test] fn test_conversion_of_non_words() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withConversionOfNonWords() .build(); assert_eq!(regexp, "^(?:abc\\W\\W|123)$"); } #[wasm_bindgen_test] fn test_conversion_of_repetitions() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withConversionOfRepetitions() .build(); assert_eq!(regexp, "^(?:abc {2}|123)$"); } #[wasm_bindgen_test] fn test_case_insensitive_matching() { let test_cases = Box::new([ JsValue::from("ABC"), JsValue::from("abc "), JsValue::from("123"), ]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withCaseInsensitiveMatching() .build(); assert_eq!(regexp, "(?i)^(?:abc(?: )?|123)$"); } #[wasm_bindgen_test] fn test_capturing_groups() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withCapturingGroups() .build(); assert_eq!(regexp, "^(abc |123)$"); } #[wasm_bindgen_test] fn test_escaping_of_non_ascii_chars() { let test_cases = Box::new([ JsValue::from("abc "), JsValue::from("123"), JsValue::from("♥"), ]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withEscapingOfNonAsciiChars(false) .build(); assert_eq!(regexp, "^(?:abc |123|\\u{2665})$"); } #[wasm_bindgen_test] fn test_verbose_mode() { let test_cases = Box::new([ JsValue::from("abc "), JsValue::from("123"), JsValue::from("♥"), ]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withVerboseMode() .build(); assert_eq!( regexp, indoc!( r#" (?x) ^ (?: abc\ \ | 123 | ♥ ) $"# ) ); } #[wasm_bindgen_test] fn test_without_start_anchor() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withoutStartAnchor() .build(); assert_eq!(regexp, "(?:abc |123)$"); } #[wasm_bindgen_test] fn test_without_end_anchor() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withoutEndAnchor() .build(); assert_eq!(regexp, "^(?:abc |123)"); } #[wasm_bindgen_test] fn test_without_anchors() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withoutAnchors() .build(); assert_eq!(regexp, "(?:abc |123)"); } #[wasm_bindgen_test] fn test_minimum_repetitions() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let builder = WasmRegExpBuilder::from(test_cases) .unwrap() .withMinimumRepetitions(0); assert_eq!( builder.err(), Some(JsValue::from( "Quantity of minimum repetitions must be greater than zero" )) ); } #[wasm_bindgen_test] fn test_minimum_substring_length() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let builder = WasmRegExpBuilder::from(test_cases) .unwrap() .withMinimumSubstringLength(0); assert_eq!( builder.err(), Some(JsValue::from( "Minimum substring length must be greater than zero" )) ); } ================================================ FILE: tests/wasm_node_tests.rs ================================================ /* * Copyright © 2019-today Peter M. Stahl pemistahl@gmail.com * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #![cfg(target_family = "wasm")] use grex::WasmRegExpBuilder; use indoc::indoc; use wasm_bindgen::JsValue; use wasm_bindgen_test::*; #[wasm_bindgen_test] fn assert_regexpbuilder_succeeds() { let test_cases = Box::new([JsValue::from("hello"), JsValue::from("world")]); let builder = WasmRegExpBuilder::from(test_cases); assert!(builder.is_ok()); let regexp = builder.unwrap().build(); assert_eq!(regexp, "^(?:hello|world)$"); } #[wasm_bindgen_test] fn assert_regexpbuilder_fails() { let builder = WasmRegExpBuilder::from(Box::new([])); assert_eq!( builder.err(), Some(JsValue::from( "No test cases have been provided for regular expression generation" )) ); } #[wasm_bindgen_test] fn test_conversion_of_digits() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withConversionOfDigits() .build(); assert_eq!(regexp, "^(?:abc |\\d\\d\\d)$"); } #[wasm_bindgen_test] fn test_conversion_of_non_digits() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withConversionOfNonDigits() .build(); assert_eq!(regexp, "^(?:\\D\\D\\D\\D\\D|123)$"); } #[wasm_bindgen_test] fn test_conversion_of_whitespace() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withConversionOfWhitespace() .build(); assert_eq!(regexp, "^(?:abc\\s\\s|123)$"); } #[wasm_bindgen_test] fn test_conversion_of_non_whitespace() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withConversionOfNonWhitespace() .build(); assert_eq!(regexp, "^\\S\\S\\S(?: )?$"); } #[wasm_bindgen_test] fn test_conversion_of_words() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withConversionOfWords() .build(); assert_eq!(regexp, "^\\w\\w\\w(?: )?$"); } #[wasm_bindgen_test] fn test_conversion_of_non_words() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withConversionOfNonWords() .build(); assert_eq!(regexp, "^(?:abc\\W\\W|123)$"); } #[wasm_bindgen_test] fn test_conversion_of_repetitions() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withConversionOfRepetitions() .build(); assert_eq!(regexp, "^(?:abc {2}|123)$"); } #[wasm_bindgen_test] fn test_case_insensitive_matching() { let test_cases = Box::new([ JsValue::from("ABC"), JsValue::from("abc "), JsValue::from("123"), ]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withCaseInsensitiveMatching() .build(); assert_eq!(regexp, "(?i)^(?:abc(?: )?|123)$"); } #[wasm_bindgen_test] fn test_capturing_groups() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withCapturingGroups() .build(); assert_eq!(regexp, "^(abc |123)$"); } #[wasm_bindgen_test] fn test_escaping_of_non_ascii_chars() { let test_cases = Box::new([ JsValue::from("abc "), JsValue::from("123"), JsValue::from("♥"), ]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withEscapingOfNonAsciiChars(false) .build(); assert_eq!(regexp, "^(?:abc |123|\\u{2665})$"); } #[wasm_bindgen_test] fn test_verbose_mode() { let test_cases = Box::new([ JsValue::from("abc "), JsValue::from("123"), JsValue::from("♥"), ]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withVerboseMode() .build(); assert_eq!( regexp, indoc!( r#" (?x) ^ (?: abc\ \ | 123 | ♥ ) $"# ) ); } #[wasm_bindgen_test] fn test_without_start_anchor() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withoutStartAnchor() .build(); assert_eq!(regexp, "(?:abc |123)$"); } #[wasm_bindgen_test] fn test_without_end_anchor() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withoutEndAnchor() .build(); assert_eq!(regexp, "^(?:abc |123)"); } #[wasm_bindgen_test] fn test_without_anchors() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let regexp = WasmRegExpBuilder::from(test_cases) .unwrap() .withoutAnchors() .build(); assert_eq!(regexp, "(?:abc |123)"); } #[wasm_bindgen_test] fn test_minimum_repetitions() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let builder = WasmRegExpBuilder::from(test_cases) .unwrap() .withMinimumRepetitions(0); assert_eq!( builder.err(), Some(JsValue::from( "Quantity of minimum repetitions must be greater than zero" )) ); } #[wasm_bindgen_test] fn test_minimum_substring_length() { let test_cases = Box::new([JsValue::from("abc "), JsValue::from("123")]); let builder = WasmRegExpBuilder::from(test_cases) .unwrap() .withMinimumSubstringLength(0); assert_eq!( builder.err(), Some(JsValue::from( "Minimum substring length must be greater than zero" )) ); }