Repository: huggingface/tokenizers Branch: main Commit: c4e27cfc84d7 Files: 347 Total size: 4.1 MB Directory structure: gitextract_9bwzq4yg/ ├── .github/ │ ├── conda/ │ │ ├── bld.bat │ │ ├── build.sh │ │ └── meta.yaml │ ├── stale.yml │ └── workflows/ │ ├── CI.yml │ ├── build_documentation.yml │ ├── build_pr_documentation.yml │ ├── delete_doc_comment.yml │ ├── delete_doc_comment_trigger.yml │ ├── docs-check.yml │ ├── node-release.yml │ ├── node.yml │ ├── python-release.yml │ ├── python.yml │ ├── rust-release.yml │ ├── rust.yml │ ├── stale.yml │ ├── trufflehog.yml │ └── upload_pr_documentation.yml ├── .gitignore ├── CITATION.cff ├── LICENSE ├── README.md ├── RELEASE.md ├── bindings/ │ ├── node/ │ │ ├── .cargo/ │ │ │ └── config.toml │ │ ├── .editorconfig │ │ ├── .eslintrc.yml │ │ ├── .gitattributes │ │ ├── .gitignore │ │ ├── .prettierignore │ │ ├── .taplo.toml │ │ ├── .yarn/ │ │ │ └── releases/ │ │ │ └── yarn-3.5.1.cjs │ │ ├── .yarnrc.yml │ │ ├── Cargo.toml │ │ ├── LICENSE │ │ ├── Makefile │ │ ├── README.md │ │ ├── build.rs │ │ ├── examples/ │ │ │ └── documentation/ │ │ │ ├── pipeline.test.ts │ │ │ └── quicktour.test.ts │ │ ├── index.d.ts │ │ ├── index.js │ │ ├── jest.config.js │ │ ├── lib/ │ │ │ └── bindings/ │ │ │ ├── __mocks__/ │ │ │ │ ├── merges.txt │ │ │ │ ├── vocab.json │ │ │ │ └── vocab.txt │ │ │ ├── decoders.test.ts │ │ │ ├── encoding.test.ts │ │ │ ├── models.test.ts │ │ │ ├── normalizers.test.ts │ │ │ ├── post-processors.test.ts │ │ │ ├── pre-tokenizers.test.ts │ │ │ ├── tokenizer.test.ts │ │ │ └── utils.test.ts │ │ ├── npm/ │ │ │ ├── android-arm-eabi/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── android-arm64/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── darwin-arm64/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── darwin-x64/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── freebsd-x64/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── linux-arm-gnueabihf/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── linux-arm64-gnu/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── linux-arm64-musl/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── linux-x64-gnu/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── linux-x64-musl/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── win32-arm64-msvc/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ ├── win32-ia32-msvc/ │ │ │ │ ├── README.md │ │ │ │ └── package.json │ │ │ └── win32-x64-msvc/ │ │ │ ├── README.md │ │ │ └── package.json │ │ ├── package.json │ │ ├── rustfmt.toml │ │ ├── src/ │ │ │ ├── arc_rwlock_serde.rs │ │ │ ├── decoders.rs │ │ │ ├── encoding.rs │ │ │ ├── lib.rs │ │ │ ├── models.rs │ │ │ ├── normalizers.rs │ │ │ ├── pre_tokenizers.rs │ │ │ ├── processors.rs │ │ │ ├── tasks/ │ │ │ │ ├── mod.rs │ │ │ │ ├── models.rs │ │ │ │ └── tokenizer.rs │ │ │ ├── tokenizer.rs │ │ │ ├── trainers.rs │ │ │ └── utils.rs │ │ ├── tsconfig.json │ │ └── types.ts │ └── python/ │ ├── .cargo/ │ │ └── config.toml │ ├── .gitignore │ ├── CHANGELOG.md │ ├── Cargo.toml │ ├── MANIFEST.in │ ├── Makefile │ ├── README.md │ ├── benches/ │ │ └── test_tiktoken.py │ ├── conftest.py │ ├── docs/ │ │ └── pyo3.md │ ├── examples/ │ │ ├── custom_components.py │ │ ├── example.py │ │ ├── train_bert_wordpiece.py │ │ ├── train_bytelevel_bpe.py │ │ ├── train_with_datasets.py │ │ └── using_the_visualizer.ipynb │ ├── py_src/ │ │ └── tokenizers/ │ │ ├── __init__.py │ │ ├── __init__.pyi │ │ ├── decoders/ │ │ │ └── __init__.py │ │ ├── decoders.pyi │ │ ├── implementations/ │ │ │ ├── __init__.py │ │ │ ├── base_tokenizer.py │ │ │ ├── bert_wordpiece.py │ │ │ ├── byte_level_bpe.py │ │ │ ├── char_level_bpe.py │ │ │ ├── sentencepiece_bpe.py │ │ │ └── sentencepiece_unigram.py │ │ ├── models/ │ │ │ └── __init__.py │ │ ├── models.pyi │ │ ├── normalizers/ │ │ │ └── __init__.py │ │ ├── normalizers.pyi │ │ ├── pre_tokenizers/ │ │ │ └── __init__.py │ │ ├── pre_tokenizers.pyi │ │ ├── processors/ │ │ │ └── __init__.py │ │ ├── processors.pyi │ │ ├── py.typed │ │ ├── tokenizers.pyi │ │ ├── tools/ │ │ │ ├── __init__.py │ │ │ ├── visualizer-styles.css │ │ │ └── visualizer.py │ │ ├── trainers/ │ │ │ ├── __init__.py │ │ │ └── __init__.pyi │ │ └── trainers.pyi │ ├── pyproject.toml │ ├── rust-toolchain │ ├── scripts/ │ │ ├── convert.py │ │ ├── sentencepiece_extractor.py │ │ └── spm_parity_check.py │ ├── setup.cfg │ ├── src/ │ │ ├── decoders.rs │ │ ├── encoding.rs │ │ ├── error.rs │ │ ├── lib.rs │ │ ├── models.rs │ │ ├── normalizers.rs │ │ ├── pre_tokenizers.rs │ │ ├── processors.rs │ │ ├── token.rs │ │ ├── tokenizer.rs │ │ ├── trainers.rs │ │ └── utils/ │ │ ├── iterators.rs │ │ ├── mod.rs │ │ ├── normalization.rs │ │ ├── pretokenization.rs │ │ ├── regex.rs │ │ └── serde_pyo3.rs │ ├── stub.py │ ├── test.txt │ ├── tests/ │ │ ├── __init__.py │ │ ├── bindings/ │ │ │ ├── __init__.py │ │ │ ├── test_decoders.py │ │ │ ├── test_encoding.py │ │ │ ├── test_models.py │ │ │ ├── test_normalizers.py │ │ │ ├── test_pre_tokenizers.py │ │ │ ├── test_processors.py │ │ │ ├── test_tokenizer.py │ │ │ └── test_trainers.py │ │ ├── documentation/ │ │ │ ├── __init__.py │ │ │ ├── test_pipeline.py │ │ │ ├── test_quicktour.py │ │ │ └── test_tutorial_train_from_iterators.py │ │ ├── implementations/ │ │ │ ├── __init__.py │ │ │ ├── test_base_tokenizer.py │ │ │ ├── test_bert_wordpiece.py │ │ │ ├── test_byte_level_bpe.py │ │ │ ├── test_char_bpe.py │ │ │ └── test_sentencepiece.py │ │ ├── test_serialization.py │ │ └── utils.py │ └── tools/ │ └── stub-gen/ │ ├── Cargo.toml │ └── src/ │ └── main.rs ├── docs/ │ ├── Makefile │ ├── README.md │ ├── source/ │ │ ├── _ext/ │ │ │ ├── entities.py │ │ │ ├── rust_doc.py │ │ │ └── toctree_tags.py │ │ ├── _static/ │ │ │ ├── css/ │ │ │ │ ├── Calibre-Medium.otf │ │ │ │ ├── Calibre-Regular.otf │ │ │ │ ├── Calibre-Thin.otf │ │ │ │ ├── code-snippets.css │ │ │ │ └── huggingface.css │ │ │ └── js/ │ │ │ └── custom.js │ │ ├── api/ │ │ │ ├── node.inc │ │ │ ├── python.inc │ │ │ ├── reference.rst │ │ │ └── rust.inc │ │ ├── components.rst │ │ ├── conf.py │ │ ├── entities.inc │ │ ├── index.rst │ │ ├── installation/ │ │ │ ├── main.rst │ │ │ ├── node.inc │ │ │ ├── python.inc │ │ │ └── rust.inc │ │ ├── pipeline.rst │ │ ├── quicktour.rst │ │ └── tutorials/ │ │ └── python/ │ │ └── training_from_memory.rst │ └── source-doc-builder/ │ ├── _toctree.yml │ ├── api/ │ │ ├── added-tokens.mdx │ │ ├── decoders.mdx │ │ ├── encode-inputs.mdx │ │ ├── encoding.mdx │ │ ├── input-sequences.mdx │ │ ├── models.mdx │ │ ├── normalizers.mdx │ │ ├── post-processors.mdx │ │ ├── pre-tokenizers.mdx │ │ ├── tokenizer.mdx │ │ ├── trainers.mdx │ │ └── visualizer.mdx │ ├── components.mdx │ ├── index.mdx │ ├── installation.mdx │ ├── pipeline.mdx │ ├── quicktour.mdx │ └── training_from_memory.mdx └── tokenizers/ ├── CHANGELOG.md ├── Cargo.toml ├── Makefile ├── README.md ├── README.tpl ├── benches/ │ ├── added_vocab_deserialize.rs │ ├── bert_benchmark.rs │ ├── bpe_benchmark.rs │ ├── common/ │ │ └── mod.rs │ ├── layout_benchmark.rs │ ├── llama3_benchmark.rs │ └── unigram_benchmark.rs ├── examples/ │ ├── encode_batch.rs │ ├── serialization.rs │ └── unstable_wasm/ │ ├── .gitignore │ ├── Cargo.toml │ ├── README.md │ ├── src/ │ │ ├── lib.rs │ │ └── utils.rs │ ├── tests/ │ │ └── web.rs │ └── www/ │ ├── .gitignore │ ├── .travis.yml │ ├── LICENSE-APACHE │ ├── LICENSE-MIT │ ├── README.md │ ├── bootstrap.js │ ├── index.html │ ├── index.js │ ├── package.json │ └── webpack.config.js ├── rust-toolchain ├── src/ │ ├── decoders/ │ │ ├── bpe.rs │ │ ├── byte_fallback.rs │ │ ├── ctc.rs │ │ ├── fuse.rs │ │ ├── mod.rs │ │ ├── sequence.rs │ │ ├── strip.rs │ │ └── wordpiece.rs │ ├── lib.rs │ ├── models/ │ │ ├── bpe/ │ │ │ ├── mod.rs │ │ │ ├── model.rs │ │ │ ├── serialization.rs │ │ │ ├── trainer.rs │ │ │ └── word.rs │ │ ├── mod.rs │ │ ├── unigram/ │ │ │ ├── lattice.rs │ │ │ ├── mod.rs │ │ │ ├── model.rs │ │ │ ├── serialization.rs │ │ │ ├── trainer.rs │ │ │ └── trie.rs │ │ ├── wordlevel/ │ │ │ ├── mod.rs │ │ │ ├── serialization.rs │ │ │ └── trainer.rs │ │ └── wordpiece/ │ │ ├── mod.rs │ │ ├── serialization.rs │ │ └── trainer.rs │ ├── normalizers/ │ │ ├── bert.rs │ │ ├── byte_level.rs │ │ ├── mod.rs │ │ ├── precompiled.rs │ │ ├── prepend.rs │ │ ├── replace.rs │ │ ├── strip.rs │ │ ├── unicode.rs │ │ └── utils.rs │ ├── pre_tokenizers/ │ │ ├── bert.rs │ │ ├── byte_level.rs │ │ ├── delimiter.rs │ │ ├── digits.rs │ │ ├── fixed_length.rs │ │ ├── metaspace.rs │ │ ├── mod.rs │ │ ├── punctuation.rs │ │ ├── sequence.rs │ │ ├── split.rs │ │ ├── unicode_scripts/ │ │ │ ├── mod.rs │ │ │ ├── pre_tokenizer.rs │ │ │ └── scripts.rs │ │ └── whitespace.rs │ ├── processors/ │ │ ├── bert.rs │ │ ├── mod.rs │ │ ├── roberta.rs │ │ ├── sequence.rs │ │ └── template.rs │ ├── tokenizer/ │ │ ├── added_vocabulary.rs │ │ ├── encoding.rs │ │ ├── mod.rs │ │ ├── normalizer.rs │ │ ├── pattern.rs │ │ ├── pre_tokenizer.rs │ │ └── serialization.rs │ └── utils/ │ ├── cache.rs │ ├── fancy.rs │ ├── from_pretrained.rs │ ├── iter.rs │ ├── mod.rs │ ├── onig.rs │ ├── padding.rs │ ├── parallelism.rs │ ├── progress.rs │ └── truncation.rs └── tests/ ├── added_tokens.rs ├── common/ │ └── mod.rs ├── documentation.rs ├── from_pretrained.rs ├── offsets.rs ├── serialization.rs ├── stream.rs ├── training.rs └── unigram.rs ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/conda/bld.bat ================================================ cd bindings\python %PYTHON% -m pip install . --prefix=%PREFIX% ================================================ FILE: .github/conda/build.sh ================================================ cd bindings/python $PYTHON -m pip install . --prefix=$PREFIX ================================================ FILE: .github/conda/meta.yaml ================================================ {% set name = "tokenizers" %} package: name: "{{ name|lower }}" version: "{{ TOKENIZERS_VERSION }}" source: path: ../../ requirements: host: - pip - python x.x - setuptools - setuptools-rust - pkg-config - openssl - maturin run: - python x.x test: imports: - tokenizers - tokenizers.models about: home: https://huggingface.co/docs/tokenizers license: Apache License 2.0 license_file: LICENSE summary: "💥 Fast State-of-the-Art Tokenizers optimized for Research and Production" ================================================ FILE: .github/stale.yml ================================================ # Number of days of inactivity before an issue becomes stale daysUntilStale: 60 # Number of days of inactivity before a stale issue is closed daysUntilClose: 7 # Issues with these labels will never be considered stale exemptLabels: - pinned - security # Label to use when marking an issue as stale staleLabel: wontfix # Comment to post when marking an issue as stale. Set to `false` to disable markComment: > This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions. # Comment to post when closing a stale issue. Set to `false` to disable closeComment: false ================================================ FILE: .github/workflows/CI.yml ================================================ # This file is autogenerated by maturin v1.7.4 # To update, run # # maturin generate-ci github -m bindings/python/Cargo.toml # name: CI on: push: branches: - main - master tags: - '*' pull_request: workflow_dispatch: permissions: contents: read jobs: linux: runs-on: ${{ matrix.platform.runner }} strategy: matrix: platform: - runner: ubuntu-latest target: x86_64 - runner: ubuntu-latest target: x86 - runner: ubuntu-latest target: aarch64 - runner: ubuntu-latest target: armv7 - runner: ubuntu-latest target: s390x - runner: ubuntu-latest target: ppc64le steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: 3.x - name: Build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} args: --release --out dist --manifest-path bindings/python/Cargo.toml sccache: 'true' manylinux: auto - name: Upload wheels uses: actions/upload-artifact@v4 with: name: wheels-linux-${{ matrix.platform.target }} path: dist musllinux: runs-on: ${{ matrix.platform.runner }} strategy: matrix: platform: - runner: ubuntu-latest target: x86_64 - runner: ubuntu-latest target: x86 - runner: ubuntu-latest target: aarch64 - runner: ubuntu-latest target: armv7 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: 3.x - name: Build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} args: --release --out dist --manifest-path bindings/python/Cargo.toml sccache: 'true' manylinux: musllinux_1_2 - name: Upload wheels uses: actions/upload-artifact@v4 with: name: wheels-musllinux-${{ matrix.platform.target }} path: dist windows: runs-on: ${{ matrix.platform.runner }} strategy: matrix: platform: - runner: windows-latest target: x64 architecture: x64 - runner: windows-latest target: x86 architecture: x86 - runner: windows-11-arm target: aarch64 architecture: arm64 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: 3.x architecture: ${{ matrix.platform.architecture }} - name: Build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} args: --release --out dist --manifest-path bindings/python/Cargo.toml sccache: 'true' - name: Upload wheels uses: actions/upload-artifact@v4 with: name: wheels-windows-${{ matrix.platform.target }} path: dist macos: runs-on: ${{ matrix.platform.runner }} strategy: matrix: platform: - runner: macos-15-intel target: x86_64 - runner: macos-14 target: aarch64 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: 3.x - name: Build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.platform.target }} args: --release --out dist --manifest-path bindings/python/Cargo.toml sccache: 'true' - name: Upload wheels uses: actions/upload-artifact@v4 with: name: wheels-macos-${{ matrix.platform.target }} path: dist sdist: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Build sdist uses: PyO3/maturin-action@v1 with: command: sdist args: --out dist --manifest-path bindings/python/Cargo.toml - name: Upload sdist uses: actions/upload-artifact@v4 with: name: wheels-sdist path: dist release: name: Release runs-on: ubuntu-latest if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }} needs: [linux, musllinux, windows, macos, sdist] permissions: # Use to sign the release artifacts id-token: write # Used to upload release artifacts contents: write # Used to generate artifact attestation attestations: write steps: - uses: actions/download-artifact@v4 - name: Generate artifact attestation uses: actions/attest-build-provenance@v1 with: subject-path: 'wheels-*/*' - name: Publish to PyPI if: "startsWith(github.ref, 'refs/tags/')" uses: PyO3/maturin-action@v1 env: MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_TOKEN_DIST}} with: command: upload args: --non-interactive --skip-existing wheels-*/* ================================================ FILE: .github/workflows/build_documentation.yml ================================================ name: Build documentation on: push: branches: - main - doc-builder* - v*-release - use_templates jobs: build: uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main with: commit_sha: ${{ github.sha }} package: tokenizers path_to_docs: tokenizers/docs/source-doc-builder/ package_path: tokenizers/bindings/python/ install_rust: true secrets: hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} ================================================ FILE: .github/workflows/build_pr_documentation.yml ================================================ name: Build PR Documentation on: pull_request: concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: build: uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main with: commit_sha: ${{ github.event.pull_request.head.sha }} pr_number: ${{ github.event.number }} package: tokenizers path_to_docs: tokenizers/docs/source-doc-builder/ package_path: tokenizers/bindings/python/ install_rust: true ================================================ FILE: .github/workflows/delete_doc_comment.yml ================================================ name: Delete doc comment on: workflow_run: workflows: ["Delete doc comment trigger"] types: - completed jobs: delete: uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@main secrets: comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} ================================================ FILE: .github/workflows/delete_doc_comment_trigger.yml ================================================ name: Delete doc comment trigger on: pull_request: types: [ closed ] jobs: delete: uses: huggingface/doc-builder/.github/workflows/delete_doc_comment_trigger.yml@main with: pr_number: ${{ github.event.number }} ================================================ FILE: .github/workflows/docs-check.yml ================================================ name: Documentation on: push: branches: - main pull_request: jobs: build: runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v4 - name: Install Python uses: actions/setup-python@v5 with: python-version: 3.12 - name: Install dependencies run: pip install sphinx sphinx_rtd_theme setuptools-rust - name: Install Rust uses: dtolnay/rust-toolchain@stable - name: Build tokenizers working-directory: ./bindings/python run: pip install -e . - name: Build documentation working-directory: ./docs run: make clean && make html_all O="-W --keep-going" - name: Upload built doc uses: actions/upload-artifact@v4 with: name: documentation path: ./docs/build/* ================================================ FILE: .github/workflows/node-release.yml ================================================ name: Node Release env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} AWS_DEFAULT_REGION: us-east-1 on: push: tags: - node-v* jobs: build: env: MACOSX_DEPLOYMENT_TARGET: 10.11 strategy: matrix: settings: - host: macos-latest target: x86_64-apple-darwin - host: windows-latest target: x86_64-pc-windows-msvc - host: ubuntu-latest target: x86_64-unknown-linux-gnu runs-on: ${{ matrix.settings.host }} steps: - name: Checkout repository uses: actions/checkout@v4 - name: Install Rust uses: dtolnay/rust-toolchain@stable # Necessary for now for the cargo cache: https://github.com/actions/cache/issues/133#issuecomment-599102035 - if: matrix.os == 'ubuntu-latest' run: sudo chown -R $(whoami):$(id -ng) ~/.cargo/ - name: Cache Cargo Registry uses: actions/cache@v4 with: path: ~/.cargo/registry key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.toml') }} - name: Install Node ${{ matrix.node-version }} uses: actions/setup-node@v4 with: node-version: latest cache: yarn cache-dependency-path: ./bindings/node/ - name: Install npm dependencies working-directory: ./bindings/node run: yarn install - name: Build and package rust working-directory: ./bindings/node run: | yarn build && strip -x *.node - name: Install Python uses: actions/setup-python@v5 with: python-version: 3.x - name: Upload artifact uses: actions/upload-artifact@v4 with: name: bindings-${{ matrix.settings.target }} path: ${{ env.APP_NAME }}bindings/node/*.node if-no-files-found: error publish: name: Publish runs-on: ubuntu-latest needs: - build steps: - uses: actions/checkout@v4 - name: Setup node uses: actions/setup-node@v4 with: node-version: latest check-latest: true cache: yarn cache-dependency-path: ./bindings/node/ - name: Install dependencies working-directory: ./bindings/node run: yarn install - name: Download all artifacts uses: actions/download-artifact@v4 with: path: ./bindings/node/artifacts - name: Move artifacts working-directory: ./bindings/node run: yarn artifacts - name: List packages working-directory: ./bindings/node run: ls -R ./npm shell: bash - name: Publish working-directory: ./bindings/node run: | echo "//registry.npmjs.org/:_authToken=$NPM_TOKEN" >> ~/.npmrc npm publish --access public --tag next env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} NPM_TOKEN: ${{ secrets.NPM_TOKEN }} ================================================ FILE: .github/workflows/node.yml ================================================ name: Node on: push: branches: - main paths-ignore: - bindings/python/** pull_request: paths-ignore: - bindings/python/** jobs: build_and_test: name: Check everything builds runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v4 - name: Install Rust uses: dtolnay/rust-toolchain@stable with: components: rustfmt, clippy # Necessary for now for the cargo cache: https://github.com/actions/cache/issues/133#issuecomment-599102035 - run: sudo chown -R $(whoami):$(id -ng) ~/.cargo/ - name: Cache Cargo Registry uses: actions/cache@v4 with: path: ~/.cargo/registry key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} - name: Install Node uses: actions/setup-node@v4 with: node-version: latest - name: Install dependencies working-directory: ./bindings/node run: yarn install - name: Build all working-directory: ./bindings/node run: yarn build - name: Lint Rust formatting uses: actions-rs/cargo@v1 with: command: fmt args: --manifest-path ./bindings/node/Cargo.toml -- --check - name: Lint Rust with Clippy uses: actions-rs/cargo@v1 with: command: clippy args: --manifest-path ./bindings/node/Cargo.toml --all-targets --all-features -- -D warnings - name: Lint TS working-directory: ./bindings/node run: yarn lint - name: Run JS tests working-directory: ./bindings/node run: make test ================================================ FILE: .github/workflows/python-release.yml ================================================ name: Python Release on: push: tags: - v* env: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} AWS_DEFAULT_REGION: us-east-1 PYPI_TOKEN: ${{ secrets.PYPI_TOKEN_DIST }} DIST_DIR: ${{ github.sha }} jobs: lock_exists: runs-on: ubuntu-latest name: Cargo.lock steps: - uses: actions/checkout@v4 - name: Cargo.lock lock exists run: cat Cargo.lock working-directory: ./bindings/python build: name: build on ${{ matrix.platform || matrix.os }} (${{ matrix.target }} - ${{ matrix.manylinux || 'auto' }}) # only run on push to main and on release needs: [lock_exists] if: startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || contains(github.event.pull_request.labels.*.name, 'Full Build') strategy: fail-fast: false matrix: os: [ubuntu, macos, windows] target: [x86_64, aarch64] manylinux: [auto] include: - os: ubuntu platform: linux - os: windows ls: dir interpreter: 3.9 3.10 3.11 3.12 3.13 pypy3.9 pypy3.10 - os: windows ls: dir target: x86_64 python-architecture: x64 interpreter: 3.9 3.10 3.11 3.12 3.13 - os: windows ls: dir target: i686 python-architecture: x86 python-install: | 3.9 3.10 3.11 3.12 3.13 interpreter: 3.9 3.10 3.11 3.12 3.13 - os: windows-11-arm ls: dir target: aarch64 python-architecture: arm64 python-install: | 3.11 3.12 3.13 interpreter: 3.11 3.12 3.13 # - os: windows # ls: dir # target: aarch64 # interpreter: 3.11 3.12 - os: macos target: aarch64 interpreter: 3.9 3.10 3.11 3.12 3.13 pypy3.9 pypy3.10 - os: ubuntu platform: linux target: i686 - os: ubuntu platform: linux target: aarch64 - os: ubuntu platform: linux target: armv7 interpreter: 3.9 3.10 3.11 3.12 3.13 # musllinux - os: ubuntu platform: linux target: x86_64 manylinux: musllinux_1_1 - os: ubuntu platform: linux target: aarch64 manylinux: musllinux_1_1 - os: ubuntu platform: linux target: ppc64le interpreter: 3.9 3.10 3.11 3.12 3.13 - os: ubuntu platform: linux target: s390x interpreter: 3.9 3.10 3.11 3.12 3.13 exclude: - os: windows target: aarch64 # # Optimized PGO builds for x86_64 manylinux and windows follow a different matrix, # # maybe in future maturin-action can support this automatically # - os: ubuntu # target: x86_64 # manylinux: auto # - os: windows # target: x86_64 # Windows on arm64 only supports Python 3.11+ runs-on: ${{ matrix.os == 'windows-11-arm' && matrix.os || format('{0}-latest', matrix.os) }} steps: - uses: actions/checkout@v4 - name: set up python uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-install || '3.13' }} architecture: ${{ matrix.python-architecture || 'x64' }} - run: pip install -U twine - name: build wheels uses: PyO3/maturin-action@v1 with: target: ${{ matrix.target }} working-directory: ./bindings/python manylinux: ${{ matrix.manylinux || 'auto' }} container: ${{ matrix.container }} args: --release --out dist --interpreter ${{ matrix.interpreter || '3.9 3.10 3.11 3.12 3.13 pypy3.9 pypy3.10' }} ${{ matrix.extra-build-args }} rust-toolchain: stable docker-options: -e CI - run: ${{ matrix.ls || 'ls -lh' }} dist/ working-directory: ./bindings/python - run: twine check --strict dist/* working-directory: ./bindings/python - uses: actions/upload-artifact@v4 with: name: pypi_files-${{ matrix.os }}-${{ matrix.target }}-${{ matrix.manylinux }} path: ./bindings/python/dist build-sdist: name: build sdist needs: [lock_exists] runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: PyO3/maturin-action@v1 with: working-directory: ./bindings/python command: sdist args: --out dist rust-toolchain: stable - uses: actions/upload-artifact@v4 with: name: pypi_files-srt path: ./bindings/python/dist upload_package: name: Upload package to PyPi runs-on: ubuntu-latest needs: [build, build-sdist] steps: - uses: actions/checkout@v4 - name: Install Python uses: actions/setup-python@v5 with: python-version: "3.13" architecture: x64 - uses: actions/download-artifact@v4 with: path: ./bindings/python/dist merge-multiple: true # Temporary deactivation while testing abi3 CI # - name: Upload to PyPi # working-directory: ./bindings/python # run: | # pip install twine # twine upload dist/* -u __token__ -p "$PYPI_TOKEN" ================================================ FILE: .github/workflows/python.yml ================================================ name: Python on: push: branches: - main paths-ignore: - bindings/node/** pull_request: paths-ignore: - bindings/node/** jobs: build_win_32: name: Check it builds for Windows 32-bit runs-on: windows-latest strategy: matrix: python: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14", "3.14t"] steps: - name: Checkout repository uses: actions/checkout@v4 - name: Install Rust uses: actions-rs/toolchain@v1 with: toolchain: stable-i686-pc-windows-msvc override: true - name: Override toolchain shell: bash working-directory: ./bindings/python run: echo "stable-i686-pc-windows-msvc" > rust-toolchain - name: Install Python uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} architecture: x86 - name: Build uses: actions-rs/cargo@v1 with: command: build args: --manifest-path ./bindings/python/Cargo.toml build_and_test: name: Check everything builds & tests runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, macos-latest] python: ["3.14", "3.14t"] steps: - name: Checkout repository uses: actions/checkout@v4 - name: Install Rust uses: actions-rs/toolchain@v1 with: toolchain: stable components: rustfmt, clippy - name: Install audit uses: actions-rs/cargo@v1 with: command: install args: cargo-audit - name: Install Python uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} architecture: "x64" - name: Cache Cargo Registry uses: actions/cache@v4 with: path: ~/.cargo/registry key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} # - name: Cache Cargo Build Target # uses: actions/cache@v1 # with: # path: ./bindings/python/target # key: ${{ runner.os }}-cargo-python-build-${{ hashFiles('**/Cargo.lock') }} - name: Lint with RustFmt uses: actions-rs/cargo@v1 with: toolchain: stable command: fmt args: --manifest-path ./bindings/python/Cargo.toml -- --check - name: Lint with Clippy uses: actions-rs/cargo@v1 with: command: clippy args: --manifest-path ./bindings/python/Cargo.toml --all-targets --all-features -- -D warnings - name: Install cargo-audit run: cargo install cargo-audit - name: Run Audit uses: actions-rs/cargo@v1 with: command: audit args: -D warnings -f ./bindings/python/Cargo.lock --ignore RUSTSEC-2024-0436 --ignore RUSTSEC-2025-0014 --ignore RUSTSEC-2025-0119 --ignore RUSTSEC-2024-0436 - name: Install working-directory: ./bindings/python run: | python -m venv .env source .env/bin/activate pip install -U pip pip install pytest requests setuptools_rust numpy pyarrow datasets ty pip install -e .[dev] - name: Check style working-directory: ./bindings/python run: | source .env/bin/activate make check-style - name: Type check working-directory: ./bindings/python run: | source .env/bin/activate ty check py_src --exclude py_src/tokenizers/implementations - name: Run tests working-directory: ./bindings/python run: | source .env/bin/activate make test ================================================ FILE: .github/workflows/rust-release.yml ================================================ name: Rust Release env: CRATES_TOKEN: ${{ secrets.CRATES_TOKEN }} on: push: tags: - v* jobs: rust_publish: runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v4 - name: Install Rust uses: dtolnay/rust-toolchain@stable - name: Cache Cargo Registry uses: actions/cache@v4 with: path: ~/.cargo/registry key: ubuntu-latest-cargo-registry-${{ hashFiles('**/Cargo.toml') }} - name: Publish package rust working-directory: ./tokenizers if: ${{ !contains(github.ref, 'rc') }} run: cargo publish --token ${CRATES_TOKEN} ================================================ FILE: .github/workflows/rust.yml ================================================ name: Rust on: push: branches: - main pull_request: jobs: build: runs-on: ${{ matrix.os }} env: MACOSX_DEPLOYMENT_TARGET: 10.12 strategy: matrix: os: [ubuntu-latest, windows-latest, macOS-latest] steps: - uses: actions/checkout@v4 - name: Install Rust Stable uses: actions-rs/toolchain@v1 with: toolchain: stable components: rustfmt, clippy override: true # Necessary for now for the cargo cache: https://github.com/actions/cache/issues/133#issuecomment-599102035 - if: matrix.os == 'ubuntu-latest' run: sudo chown -R $(whoami):$(id -ng) ~/.cargo/ - name: Install cargo-readme for Ubuntu if: matrix.os == 'ubuntu-latest' uses: actions-rs/cargo@v1 with: command: install args: cargo-readme - name: Install audit uses: actions-rs/cargo@v1 with: command: install args: cargo-audit - name: Build uses: actions-rs/cargo@v1 with: command: build args: --all-targets --verbose --manifest-path ./tokenizers/Cargo.toml - name: Lint with RustFmt uses: actions-rs/cargo@v1 with: command: fmt args: --manifest-path ./tokenizers/Cargo.toml -- --check - name: Lint Benchmarks with RustFmt uses: actions-rs/cargo@v1 with: command: fmt args: --manifest-path ./tokenizers/Cargo.toml -- ./tokenizers/benches/bpe_benchmark.rs --check - name: Lint with Clippy uses: actions-rs/cargo@v1 with: command: clippy args: --manifest-path ./tokenizers/Cargo.toml --all-targets --all-features -- -D warnings - name: Run Tests if: matrix.os != 'windows-latest' shell: bash working-directory: ./tokenizers run: make test # Skip integration tests for now on Windows - name: Run lib Tests on Windows if: matrix.os == 'windows-latest' uses: actions-rs/cargo@v1 with: command: test args: --verbose --manifest-path ./tokenizers/Cargo.toml --lib - name: Run doc Tests on Windows if: matrix.os == 'windows-latest' uses: actions-rs/cargo@v1 with: command: test args: --verbose --manifest-path ./tokenizers/Cargo.toml --doc - name: Install cargo-audit run: cargo install cargo-audit - name: Run Audit uses: actions-rs/cargo@v1 with: command: audit args: -D warnings -f ./tokenizers/Cargo.lock --ignore RUSTSEC-2024-0436 --ignore RUSTSEC-2025-0014 --ignore RUSTSEC-2025-0119 # Verify that Readme.md is up to date. - name: Make sure, Readme generated from lib.rs matches actual Readme if: matrix.os == 'ubuntu-latest' shell: bash working-directory: ./tokenizers run: cargo readme > must_match_readme.md && diff must_match_readme.md README.md - name: Check semver if: matrix.os == 'ubuntu-latest' uses: obi1kenobi/cargo-semver-checks-action@v2 with: manifest-path: ./tokenizers/Cargo.toml ================================================ FILE: .github/workflows/stale.yml ================================================ name: 'Close stale issues and PRs' on: schedule: - cron: '30 1 * * *' jobs: stale: runs-on: ubuntu-latest steps: - uses: actions/stale@v9 with: stale-issue-message: 'This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days.' days-before-stale: 30 days-before-close: 5 ================================================ FILE: .github/workflows/trufflehog.yml ================================================ on: push: name: Secret Leaks jobs: trufflehog: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v4 with: fetch-depth: 0 - name: Secret Scanning uses: trufflesecurity/trufflehog@853e1e8d249fd1e29d0fcc7280d29b03df3d643d with: # exclude buggy postgres detector that is causing false positives and not relevant to our codebase extra_args: --results=verified,unknown --exclude-detectors=postgres ================================================ FILE: .github/workflows/upload_pr_documentation.yml ================================================ name: Upload PR Documentation on: workflow_run: workflows: ["Build PR Documentation"] types: - completed jobs: build: uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main with: package_name: tokenizers secrets: hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} ================================================ FILE: .gitignore ================================================ .DS_Store *~ .vim .env target .idea **/Cargo.lock /data tokenizers/data bindings/python/tests/data docs/build/ docs/make.bat __pycache__ pip-wheel-metadata *.egg-info *.so /bindings/python/examples/.ipynb_checkpoints /bindings/python/build /bindings/python/dist .vscode *.code-workspace ================================================ FILE: CITATION.cff ================================================ # This CITATION.cff file was generated with cffinit. # Visit https://bit.ly/cffinit to generate yours today! cff-version: 1.2.0 title: HuggingFace's Tokenizers message: >- Fast State-of-the-Art Tokenizers optimized for Research and Production. type: software authors: - given-names: Anthony family-names: Moi email: m.anthony.moi@gmail.com affiliation: HuggingFace - given-names: Nicolas family-names: Patry affiliation: HuggingFace repository-code: 'https://github.com/huggingface/tokenizers' url: 'https://github.com/huggingface/tokenizers' repository: 'https://huggingface.co' abstract: >- Fast State-of-the-Art Tokenizers optimized for Research and Production. keywords: - Rust - Tokenizer - NLP license: Apache-2.0 commit: 37372b6 version: 0.13.4 date-released: '2023-04-05' ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================



Build GitHub

Provides an implementation of today's most used tokenizers, with a focus on performance and versatility. ## Main features: - Train new vocabularies and tokenize, using today's most used tokenizers. - Extremely fast (both training and tokenization), thanks to the Rust implementation. Takes less than 20 seconds to tokenize a GB of text on a server's CPU. - Easy to use, but also extremely versatile. - Designed for research and production. - Normalization comes with alignments tracking. It's always possible to get the part of the original sentence that corresponds to a given token. - Does all the pre-processing: Truncate, Pad, add the special tokens your model needs. ## Performances Performances can vary depending on hardware, but running the [~/bindings/python/benches/test_tiktoken.py](bindings/python/benches/test_tiktoken.py) should give the following on a g6 aws instance: ![image](https://github.com/user-attachments/assets/2b913d4b-e488-4cbc-b542-f90a6c40643d) ## Bindings We provide bindings to the following languages (more to come!): - [Rust](https://github.com/huggingface/tokenizers/tree/main/tokenizers) (Original implementation) - [Python](https://github.com/huggingface/tokenizers/tree/main/bindings/python) - [Node.js](https://github.com/huggingface/tokenizers/tree/main/bindings/node) - [Ruby](https://github.com/ankane/tokenizers-ruby) (Contributed by @ankane, external repo) ## Installation You can install from source using: ```bash pip install git+https://github.com/huggingface/tokenizers.git#subdirectory=bindings/python ``` or install the released versions with ```bash pip install tokenizers ``` ## Quick example using Python: Choose your model between Byte-Pair Encoding, WordPiece or Unigram and instantiate a tokenizer: ```python from tokenizers import Tokenizer from tokenizers.models import BPE tokenizer = Tokenizer(BPE()) ``` You can customize how pre-tokenization (e.g., splitting into words) is done: ```python from tokenizers.pre_tokenizers import Whitespace tokenizer.pre_tokenizer = Whitespace() ``` Then training your tokenizer on a set of files just takes two lines of codes: ```python from tokenizers.trainers import BpeTrainer trainer = BpeTrainer(special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"]) tokenizer.train(files=["wiki.train.raw", "wiki.valid.raw", "wiki.test.raw"], trainer=trainer) ``` Once your tokenizer is trained, encode any text with just one line: ```python output = tokenizer.encode("Hello, y'all! How are you 😁 ?") print(output.tokens) # ["Hello", ",", "y", "'", "all", "!", "How", "are", "you", "[UNK]", "?"] ``` Check the [documentation](https://huggingface.co/docs/tokenizers/index) or the [quicktour](https://huggingface.co/docs/tokenizers/quicktour) to learn more! ================================================ FILE: RELEASE.md ================================================ ## How to release # Before the release Simple checklist on how to make releases for `tokenizers`. - Freeze `master` branch. - Run all tests (Check CI has properly run) - If any significant work, check benchmarks: - `cd tokenizers && cargo bench` (needs to be run on latest release tag to measure difference if it's your first time) - Run all `transformers` tests. (`transformers` is a big user of `tokenizers` we need to make sure we don't break it, testing is one way to make sure nothing unforeseen has been done.) - Run all fast tests at the VERY least (not just the tokenization tests). (`RUN_PIPELINE_TESTS=1 CUDA_VISIBLE_DEVICES=-1 pytest -sv tests/`) - When all *fast* tests work, then we can also (it's recommended) run the whole `transformers` test suite. - Rebase this [PR](https://github.com/huggingface/transformers/pull/16708). This will create new docker images ready to run the tests suites with `tokenizers` from the main branch. - Wait for actions to finish - Rebase this [PR](https://github.com/huggingface/transformers/pull/16712) This will run the actual full test suite. - Check the results. - **If any breaking change has been done**, make sure the version can safely be increased for transformers users (`tokenizers` version need to make sure users don't upgrade before `transformers` has). [link](https://github.com/huggingface/transformers/blob/main/setup.py#L154) For instance `tokenizers>=0.10,<0.11` so we can safely upgrade to `0.11` without impacting current users - Then start a new PR containing all desired code changes from the following steps. - You will `Create release` after the code modifications are on `master`. # Rust - `tokenizers` (rust, python & node) versions don't have to be in sync but it's very common to release for all versions at once for new features. - Edit `Cargo.toml` to reflect new version - Edit `CHANGELOG.md`: - Add relevant PRs that were added (python PRs do not belong for instance). - Add links at the end of the files. - Go to [Releases](https://github.com/huggingface/tokenizers/releases) - Create new Release: - Mark it as pre-release - Use new version name with a new tag (create on publish) `vX.X.X`. - Copy paste the new part of the `CHANGELOG.md` - ⚠️ Click on `Publish release`. This will start the whole process of building a uploading the new version on `crates.io`, there's no going back after this - Go to the [Actions](https://github.com/huggingface/tokenizers/actions) tab and check everything works smoothly. - If anything fails, you need to fix the CI/CD to make it work again. Since your package was not uploaded to the repository properly, you can try again. # Python - Edit `bindings/python/setup.py` to reflect new version. - Edit `bindings/python/py_src/tokenizers/__init__.py` to reflect new version. - Edit `CHANGELOG.md`: - Add relevant PRs that were added (node PRs do not belong for instance). - Add links at the end of the files. - Go to [Releases](https://github.com/huggingface/tokenizers/releases) - Create new Release: - Mark it as pre-release - Use new version name with a new tag (create on publish) `python-vX.X.X`. - Copy paste the new part of the `CHANGELOG.md` - ⚠️ Click on `Publish release`. This will start the whole process of building a uploading the new version on `pypi`, there's no going back after this - Go to the [Actions](https://github.com/huggingface/tokenizers/actions) tab and check everything works smoothly. - If anything fails, you need to fix the CI/CD to make it work again. Since your package was not uploaded to the repository properly, you can try again. - This CI/CD has 3 distinct builds, `Pypi`(normal), `conda` and `extra`. `Extra` is REALLY slow (~4h), this is normal since it has to rebuild many things, but enables the wheel to be available for old Linuxes # Node - Edit `bindings/node/package.json` to reflect new version. - Edit `CHANGELOG.md`: - Add relevant PRs that were added (python PRs do not belong for instance). - Add links at the end of the files. - Go to [Releases](https://github.com/huggingface/tokenizers/releases) - Create new Release: - Mark it as pre-release - Use new version name with a new tag (create on publish) `node-vX.X.X`. - Copy paste the new part of the `CHANGELOG.md` - ⚠️ Click on `Publish release`. This will start the whole process of building a uploading the new version on `npm`, there's no going back after this - Go to the [Actions](https://github.com/huggingface/tokenizers/actions) tab and check everything works smoothly. - If anything fails, you need to fix the CI/CD to make it work again. Since your package was not uploaded to the repository properly, you can try again. # Testing the CI/CD for release If you want to make modifications to the CI/CD of the release GH actions, you need to : - **Comment the part that uploads the artifacts** to `crates.io`, `PyPi` or `npm`. - Change the trigger mechanism so it can trigger every time you push to your branch. - Keep pushing your changes until the artifacts are properly created. ================================================ FILE: bindings/node/.cargo/config.toml ================================================ [target.aarch64-unknown-linux-musl] linker = "aarch64-linux-musl-gcc" rustflags = ["-C", "target-feature=-crt-static"] ================================================ FILE: bindings/node/.editorconfig ================================================ # EditorConfig helps developers define and maintain consistent # coding styles between different editors or IDEs # http://editorconfig.org root = true [*] indent_style = space indent_size = 2 end_of_line = lf charset = utf-8 trim_trailing_whitespace = true insert_final_newline = true [*.md] trim_trailing_whitespace = false ================================================ FILE: bindings/node/.eslintrc.yml ================================================ parser: '@typescript-eslint/parser' parserOptions: ecmaFeatures: jsx: true ecmaVersion: latest sourceType: module project: ./tsconfig.json env: browser: true es6: true node: true jest: true ignorePatterns: ['index.js', 'target/'] plugins: - import - '@typescript-eslint' extends: - eslint:recommended - plugin:prettier/recommended rules: # 0 = off, 1 = warn, 2 = error 'space-before-function-paren': 0 'no-useless-constructor': 0 'no-undef': 2 'no-console': [2, { allow: ['error', 'warn', 'info', 'assert'] }] 'comma-dangle': ['error', 'only-multiline'] 'no-unused-vars': 0 'no-var': 2 'one-var-declaration-per-line': 2 'prefer-const': 2 'no-const-assign': 2 'no-duplicate-imports': 2 'no-use-before-define': [2, { 'functions': false, 'classes': false }] 'eqeqeq': [2, 'always', { 'null': 'ignore' }] 'no-case-declarations': 0 'no-restricted-syntax': [ 2, { 'selector': 'BinaryExpression[operator=/(==|===|!=|!==)/][left.raw=true], BinaryExpression[operator=/(==|===|!=|!==)/][right.raw=true]', 'message': Don't compare for equality against boolean literals, }, ] # https://github.com/benmosher/eslint-plugin-import/pull/334 'import/no-duplicates': 2 'import/first': 2 'import/newline-after-import': 2 'import/order': [ 2, { 'newlines-between': 'always', 'alphabetize': { 'order': 'asc' }, 'groups': ['builtin', 'external', 'internal', 'parent', 'sibling', 'index'], }, ] overrides: - files: - ./**/*{.ts,.tsx} rules: 'no-unused-vars': [2, { varsIgnorePattern: '^_', argsIgnorePattern: '^_', ignoreRestSiblings: true }] 'no-undef': 0 # TypeScript declare merge 'no-redeclare': 0 'no-useless-constructor': 0 'no-dupe-class-members': 0 'no-case-declarations': 0 'no-duplicate-imports': 0 # TypeScript Interface and Type 'no-use-before-define': 0 '@typescript-eslint/adjacent-overload-signatures': 2 '@typescript-eslint/await-thenable': 2 '@typescript-eslint/consistent-type-assertions': 2 '@typescript-eslint/ban-types': [ 'error', { 'types': { 'String': { 'message': 'Use string instead', 'fixWith': 'string' }, 'Number': { 'message': 'Use number instead', 'fixWith': 'number' }, 'Boolean': { 'message': 'Use boolean instead', 'fixWith': 'boolean' }, 'Function': { 'message': 'Use explicit type instead' }, }, }, ] '@typescript-eslint/explicit-member-accessibility': [ 'error', { accessibility: 'explicit', overrides: { accessors: 'no-public', constructors: 'no-public', methods: 'no-public', properties: 'no-public', parameterProperties: 'explicit', }, }, ] '@typescript-eslint/method-signature-style': 2 '@typescript-eslint/no-floating-promises': 2 '@typescript-eslint/no-implied-eval': 2 '@typescript-eslint/no-for-in-array': 2 '@typescript-eslint/no-inferrable-types': 2 '@typescript-eslint/no-invalid-void-type': 2 '@typescript-eslint/no-misused-new': 2 '@typescript-eslint/no-misused-promises': 2 '@typescript-eslint/no-namespace': 2 '@typescript-eslint/no-non-null-asserted-optional-chain': 2 '@typescript-eslint/no-throw-literal': 2 '@typescript-eslint/no-unnecessary-boolean-literal-compare': 2 '@typescript-eslint/prefer-for-of': 2 '@typescript-eslint/prefer-nullish-coalescing': 2 '@typescript-eslint/switch-exhaustiveness-check': 2 '@typescript-eslint/prefer-optional-chain': 2 '@typescript-eslint/prefer-readonly': 2 '@typescript-eslint/prefer-string-starts-ends-with': 0 '@typescript-eslint/no-array-constructor': 2 '@typescript-eslint/require-await': 2 '@typescript-eslint/return-await': 2 '@typescript-eslint/ban-ts-comment': [2, { 'ts-expect-error': false, 'ts-ignore': true, 'ts-nocheck': true, 'ts-check': false }] '@typescript-eslint/naming-convention': [ 2, { selector: 'memberLike', format: ['camelCase', 'PascalCase'], modifiers: ['private'], leadingUnderscore: 'forbid', }, ] '@typescript-eslint/no-unused-vars': [2, { varsIgnorePattern: '^_', argsIgnorePattern: '^_', ignoreRestSiblings: true }] '@typescript-eslint/member-ordering': [ 2, { default: [ 'public-static-field', 'protected-static-field', 'private-static-field', 'public-static-method', 'protected-static-method', 'private-static-method', 'public-instance-field', 'protected-instance-field', 'private-instance-field', 'public-constructor', 'protected-constructor', 'private-constructor', 'public-instance-method', 'protected-instance-method', 'private-instance-method', ], }, ] ================================================ FILE: bindings/node/.gitattributes ================================================ # Auto detect text files and perform LF normalization * text=auto *.ts text eol=lf merge=union *.tsx text eol=lf merge=union *.rs text eol=lf merge=union *.js text eol=lf merge=union *.json text eol=lf merge=union *.debug text eol=lf merge=union # Generated codes index.js linguist-detectable=false index.d.ts linguist-detectable=false ================================================ FILE: bindings/node/.gitignore ================================================ # Created by https://www.toptal.com/developers/gitignore/api/node # Edit at https://www.toptal.com/developers/gitignore?templates=node ### Node ### # Logs logs *.log npm-debug.log* yarn-debug.log* yarn-error.log* lerna-debug.log* # Diagnostic reports (https://nodejs.org/api/report.html) report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json # Runtime data pids *.pid *.seed *.pid.lock # Directory for instrumented libs generated by jscoverage/JSCover lib-cov # Coverage directory used by tools like istanbul coverage *.lcov # nyc test coverage .nyc_output # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) .grunt # Bower dependency directory (https://bower.io/) bower_components # node-waf configuration .lock-wscript # Compiled binary addons (https://nodejs.org/api/addons.html) build/Release # Dependency directories node_modules/ jspm_packages/ # TypeScript v1 declaration files typings/ # TypeScript cache *.tsbuildinfo # Optional npm cache directory .npm # Optional eslint cache .eslintcache # Microbundle cache .rpt2_cache/ .rts2_cache_cjs/ .rts2_cache_es/ .rts2_cache_umd/ # Optional REPL history .node_repl_history # Output of 'npm pack' *.tgz # Yarn Integrity file .yarn-integrity # dotenv environment variables file .env .env.test # parcel-bundler cache (https://parceljs.org/) .cache # Next.js build output .next # Nuxt.js build / generate output .nuxt dist # Gatsby files .cache/ # Comment in the public line in if your project uses Gatsby and not Next.js # https://nextjs.org/blog/next-9-1#public-directory-support # public # vuepress build output .vuepress/dist # Serverless directories .serverless/ # FuseBox cache .fusebox/ # DynamoDB Local files .dynamodb/ # TernJS port file .tern-port # Stores VSCode versions used for testing VSCode extensions .vscode-test # End of https://www.toptal.com/developers/gitignore/api/node #Added by cargo /target Cargo.lock *.node .pnp.* .yarn/* !.yarn/patches !.yarn/plugins !.yarn/releases !.yarn/sdks !.yarn/versions ================================================ FILE: bindings/node/.prettierignore ================================================ target .yarn ================================================ FILE: bindings/node/.taplo.toml ================================================ exclude = ["node_modules/**/*.toml"] # https://taplo.tamasfe.dev/configuration/formatter-options.html [formatting] align_entries = true indent_tables = true reorder_keys = true ================================================ FILE: bindings/node/.yarn/releases/yarn-3.5.1.cjs ================================================ #!/usr/bin/env node /* eslint-disable */ //prettier-ignore (()=>{var Sge=Object.create;var lS=Object.defineProperty;var vge=Object.getOwnPropertyDescriptor;var xge=Object.getOwnPropertyNames;var Pge=Object.getPrototypeOf,Dge=Object.prototype.hasOwnProperty;var J=(r=>typeof require<"u"?require:typeof Proxy<"u"?new Proxy(r,{get:(e,t)=>(typeof require<"u"?require:e)[t]}):r)(function(r){if(typeof require<"u")return require.apply(this,arguments);throw new Error('Dynamic require of "'+r+'" is not supported')});var kge=(r,e)=>()=>(r&&(e=r(r=0)),e);var w=(r,e)=>()=>(e||r((e={exports:{}}).exports,e),e.exports),ut=(r,e)=>{for(var t in e)lS(r,t,{get:e[t],enumerable:!0})},Rge=(r,e,t,i)=>{if(e&&typeof e=="object"||typeof e=="function")for(let n of xge(e))!Dge.call(r,n)&&n!==t&&lS(r,n,{get:()=>e[n],enumerable:!(i=vge(e,n))||i.enumerable});return r};var Pe=(r,e,t)=>(t=r!=null?Sge(Pge(r)):{},Rge(e||!r||!r.__esModule?lS(t,"default",{value:r,enumerable:!0}):t,r));var vU=w((j7e,SU)=>{SU.exports=bU;bU.sync=$ge;var BU=J("fs");function _ge(r,e){var t=e.pathExt!==void 0?e.pathExt:process.env.PATHEXT;if(!t||(t=t.split(";"),t.indexOf("")!==-1))return!0;for(var i=0;i{kU.exports=PU;PU.sync=efe;var xU=J("fs");function PU(r,e,t){xU.stat(r,function(i,n){t(i,i?!1:DU(n,e))})}function efe(r,e){return DU(xU.statSync(r),e)}function DU(r,e){return r.isFile()&&tfe(r,e)}function tfe(r,e){var t=r.mode,i=r.uid,n=r.gid,s=e.uid!==void 0?e.uid:process.getuid&&process.getuid(),o=e.gid!==void 0?e.gid:process.getgid&&process.getgid(),a=parseInt("100",8),l=parseInt("010",8),c=parseInt("001",8),u=a|l,g=t&c||t&l&&n===o||t&a&&i===s||t&u&&s===0;return g}});var NU=w((W7e,FU)=>{var J7e=J("fs"),lI;process.platform==="win32"||global.TESTING_WINDOWS?lI=vU():lI=RU();FU.exports=SS;SS.sync=rfe;function SS(r,e,t){if(typeof e=="function"&&(t=e,e={}),!t){if(typeof Promise!="function")throw new TypeError("callback not provided");return new Promise(function(i,n){SS(r,e||{},function(s,o){s?n(s):i(o)})})}lI(r,e||{},function(i,n){i&&(i.code==="EACCES"||e&&e.ignoreErrors)&&(i=null,n=!1),t(i,n)})}function rfe(r,e){try{return lI.sync(r,e||{})}catch(t){if(e&&e.ignoreErrors||t.code==="EACCES")return!1;throw t}}});var HU=w((z7e,KU)=>{var Dg=process.platform==="win32"||process.env.OSTYPE==="cygwin"||process.env.OSTYPE==="msys",LU=J("path"),ife=Dg?";":":",TU=NU(),OU=r=>Object.assign(new Error(`not found: ${r}`),{code:"ENOENT"}),MU=(r,e)=>{let t=e.colon||ife,i=r.match(/\//)||Dg&&r.match(/\\/)?[""]:[...Dg?[process.cwd()]:[],...(e.path||process.env.PATH||"").split(t)],n=Dg?e.pathExt||process.env.PATHEXT||".EXE;.CMD;.BAT;.COM":"",s=Dg?n.split(t):[""];return Dg&&r.indexOf(".")!==-1&&s[0]!==""&&s.unshift(""),{pathEnv:i,pathExt:s,pathExtExe:n}},UU=(r,e,t)=>{typeof e=="function"&&(t=e,e={}),e||(e={});let{pathEnv:i,pathExt:n,pathExtExe:s}=MU(r,e),o=[],a=c=>new Promise((u,g)=>{if(c===i.length)return e.all&&o.length?u(o):g(OU(r));let f=i[c],h=/^".*"$/.test(f)?f.slice(1,-1):f,p=LU.join(h,r),C=!h&&/^\.[\\\/]/.test(r)?r.slice(0,2)+p:p;u(l(C,c,0))}),l=(c,u,g)=>new Promise((f,h)=>{if(g===n.length)return f(a(u+1));let p=n[g];TU(c+p,{pathExt:s},(C,y)=>{if(!C&&y)if(e.all)o.push(c+p);else return f(c+p);return f(l(c,u,g+1))})});return t?a(0).then(c=>t(null,c),t):a(0)},nfe=(r,e)=>{e=e||{};let{pathEnv:t,pathExt:i,pathExtExe:n}=MU(r,e),s=[];for(let o=0;o{"use strict";var GU=(r={})=>{let e=r.env||process.env;return(r.platform||process.platform)!=="win32"?"PATH":Object.keys(e).reverse().find(i=>i.toUpperCase()==="PATH")||"Path"};vS.exports=GU;vS.exports.default=GU});var WU=w((X7e,JU)=>{"use strict";var jU=J("path"),sfe=HU(),ofe=YU();function qU(r,e){let t=r.options.env||process.env,i=process.cwd(),n=r.options.cwd!=null,s=n&&process.chdir!==void 0&&!process.chdir.disabled;if(s)try{process.chdir(r.options.cwd)}catch{}let o;try{o=sfe.sync(r.command,{path:t[ofe({env:t})],pathExt:e?jU.delimiter:void 0})}catch{}finally{s&&process.chdir(i)}return o&&(o=jU.resolve(n?r.options.cwd:"",o)),o}function afe(r){return qU(r)||qU(r,!0)}JU.exports=afe});var zU=w((Z7e,PS)=>{"use strict";var xS=/([()\][%!^"`<>&|;, *?])/g;function Afe(r){return r=r.replace(xS,"^$1"),r}function lfe(r,e){return r=`${r}`,r=r.replace(/(\\*)"/g,'$1$1\\"'),r=r.replace(/(\\*)$/,"$1$1"),r=`"${r}"`,r=r.replace(xS,"^$1"),e&&(r=r.replace(xS,"^$1")),r}PS.exports.command=Afe;PS.exports.argument=lfe});var XU=w((_7e,VU)=>{"use strict";VU.exports=/^#!(.*)/});var _U=w(($7e,ZU)=>{"use strict";var cfe=XU();ZU.exports=(r="")=>{let e=r.match(cfe);if(!e)return null;let[t,i]=e[0].replace(/#! ?/,"").split(" "),n=t.split("/").pop();return n==="env"?i:i?`${n} ${i}`:n}});var eK=w((eZe,$U)=>{"use strict";var DS=J("fs"),ufe=_U();function gfe(r){let t=Buffer.alloc(150),i;try{i=DS.openSync(r,"r"),DS.readSync(i,t,0,150,0),DS.closeSync(i)}catch{}return ufe(t.toString())}$U.exports=gfe});var nK=w((tZe,iK)=>{"use strict";var ffe=J("path"),tK=WU(),rK=zU(),hfe=eK(),pfe=process.platform==="win32",dfe=/\.(?:com|exe)$/i,Cfe=/node_modules[\\/].bin[\\/][^\\/]+\.cmd$/i;function mfe(r){r.file=tK(r);let e=r.file&&hfe(r.file);return e?(r.args.unshift(r.file),r.command=e,tK(r)):r.file}function Efe(r){if(!pfe)return r;let e=mfe(r),t=!dfe.test(e);if(r.options.forceShell||t){let i=Cfe.test(e);r.command=ffe.normalize(r.command),r.command=rK.command(r.command),r.args=r.args.map(s=>rK.argument(s,i));let n=[r.command].concat(r.args).join(" ");r.args=["/d","/s","/c",`"${n}"`],r.command=process.env.comspec||"cmd.exe",r.options.windowsVerbatimArguments=!0}return r}function Ife(r,e,t){e&&!Array.isArray(e)&&(t=e,e=null),e=e?e.slice(0):[],t=Object.assign({},t);let i={command:r,args:e,options:t,file:void 0,original:{command:r,args:e}};return t.shell?i:Efe(i)}iK.exports=Ife});var aK=w((rZe,oK)=>{"use strict";var kS=process.platform==="win32";function RS(r,e){return Object.assign(new Error(`${e} ${r.command} ENOENT`),{code:"ENOENT",errno:"ENOENT",syscall:`${e} ${r.command}`,path:r.command,spawnargs:r.args})}function yfe(r,e){if(!kS)return;let t=r.emit;r.emit=function(i,n){if(i==="exit"){let s=sK(n,e,"spawn");if(s)return t.call(r,"error",s)}return t.apply(r,arguments)}}function sK(r,e){return kS&&r===1&&!e.file?RS(e.original,"spawn"):null}function wfe(r,e){return kS&&r===1&&!e.file?RS(e.original,"spawnSync"):null}oK.exports={hookChildProcess:yfe,verifyENOENT:sK,verifyENOENTSync:wfe,notFoundError:RS}});var LS=w((iZe,kg)=>{"use strict";var AK=J("child_process"),FS=nK(),NS=aK();function lK(r,e,t){let i=FS(r,e,t),n=AK.spawn(i.command,i.args,i.options);return NS.hookChildProcess(n,i),n}function Bfe(r,e,t){let i=FS(r,e,t),n=AK.spawnSync(i.command,i.args,i.options);return n.error=n.error||NS.verifyENOENTSync(n.status,i),n}kg.exports=lK;kg.exports.spawn=lK;kg.exports.sync=Bfe;kg.exports._parse=FS;kg.exports._enoent=NS});var uK=w((nZe,cK)=>{"use strict";function Qfe(r,e){function t(){this.constructor=r}t.prototype=e.prototype,r.prototype=new t}function Zl(r,e,t,i){this.message=r,this.expected=e,this.found=t,this.location=i,this.name="SyntaxError",typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,Zl)}Qfe(Zl,Error);Zl.buildMessage=function(r,e){var t={literal:function(c){return'"'+n(c.text)+'"'},class:function(c){var u="",g;for(g=0;g0){for(g=1,f=1;g>",ie=me(">>",!1),de=">&",_e=me(">&",!1),Pt=">",It=me(">",!1),Or="<<<",ii=me("<<<",!1),gi="<&",hr=me("<&",!1),fi="<",ni=me("<",!1),Us=function(m){return{type:"argument",segments:[].concat(...m)}},pr=function(m){return m},Ii="$'",rs=me("$'",!1),ga="'",dA=me("'",!1),cg=function(m){return[{type:"text",text:m}]},is='""',CA=me('""',!1),fa=function(){return{type:"text",text:""}},wp='"',mA=me('"',!1),EA=function(m){return m},wr=function(m){return{type:"arithmetic",arithmetic:m,quoted:!0}},Ll=function(m){return{type:"shell",shell:m,quoted:!0}},ug=function(m){return{type:"variable",...m,quoted:!0}},Io=function(m){return{type:"text",text:m}},gg=function(m){return{type:"arithmetic",arithmetic:m,quoted:!1}},Bp=function(m){return{type:"shell",shell:m,quoted:!1}},Qp=function(m){return{type:"variable",...m,quoted:!1}},vr=function(m){return{type:"glob",pattern:m}},se=/^[^']/,yo=Je(["'"],!0,!1),Rn=function(m){return m.join("")},fg=/^[^$"]/,Qt=Je(["$",'"'],!0,!1),Tl=`\\ `,Fn=me(`\\ `,!1),ns=function(){return""},ss="\\",gt=me("\\",!1),wo=/^[\\$"`]/,At=Je(["\\","$",'"',"`"],!1,!1),An=function(m){return m},S="\\a",Tt=me("\\a",!1),hg=function(){return"a"},Ol="\\b",bp=me("\\b",!1),Sp=function(){return"\b"},vp=/^[Ee]/,xp=Je(["E","e"],!1,!1),Pp=function(){return"\x1B"},G="\\f",yt=me("\\f",!1),IA=function(){return"\f"},Wi="\\n",Ml=me("\\n",!1),Xe=function(){return` `},ha="\\r",pg=me("\\r",!1),OE=function(){return"\r"},Dp="\\t",ME=me("\\t",!1),ar=function(){return" "},Nn="\\v",Ul=me("\\v",!1),kp=function(){return"\v"},Ks=/^[\\'"?]/,pa=Je(["\\","'",'"',"?"],!1,!1),ln=function(m){return String.fromCharCode(parseInt(m,16))},Te="\\x",dg=me("\\x",!1),Kl="\\u",Hs=me("\\u",!1),Hl="\\U",yA=me("\\U",!1),Cg=function(m){return String.fromCodePoint(parseInt(m,16))},mg=/^[0-7]/,da=Je([["0","7"]],!1,!1),Ca=/^[0-9a-fA-f]/,rt=Je([["0","9"],["a","f"],["A","f"]],!1,!1),Bo=nt(),wA="-",Gl=me("-",!1),Gs="+",Yl=me("+",!1),UE=".",Rp=me(".",!1),Eg=function(m,b,N){return{type:"number",value:(m==="-"?-1:1)*parseFloat(b.join("")+"."+N.join(""))}},Fp=function(m,b){return{type:"number",value:(m==="-"?-1:1)*parseInt(b.join(""))}},KE=function(m){return{type:"variable",...m}},jl=function(m){return{type:"variable",name:m}},HE=function(m){return m},Ig="*",BA=me("*",!1),Rr="/",GE=me("/",!1),Ys=function(m,b,N){return{type:b==="*"?"multiplication":"division",right:N}},js=function(m,b){return b.reduce((N,K)=>({left:N,...K}),m)},yg=function(m,b,N){return{type:b==="+"?"addition":"subtraction",right:N}},QA="$((",R=me("$((",!1),q="))",Ce=me("))",!1),Ue=function(m){return m},Re="$(",ze=me("$(",!1),dt=function(m){return m},Ft="${",Ln=me("${",!1),Jb=":-",P1=me(":-",!1),D1=function(m,b){return{name:m,defaultValue:b}},Wb=":-}",k1=me(":-}",!1),R1=function(m){return{name:m,defaultValue:[]}},zb=":+",F1=me(":+",!1),N1=function(m,b){return{name:m,alternativeValue:b}},Vb=":+}",L1=me(":+}",!1),T1=function(m){return{name:m,alternativeValue:[]}},Xb=function(m){return{name:m}},O1="$",M1=me("$",!1),U1=function(m){return e.isGlobPattern(m)},K1=function(m){return m},Zb=/^[a-zA-Z0-9_]/,_b=Je([["a","z"],["A","Z"],["0","9"],"_"],!1,!1),$b=function(){return T()},eS=/^[$@*?#a-zA-Z0-9_\-]/,tS=Je(["$","@","*","?","#",["a","z"],["A","Z"],["0","9"],"_","-"],!1,!1),H1=/^[(){}<>$|&; \t"']/,wg=Je(["(",")","{","}","<",">","$","|","&",";"," "," ",'"',"'"],!1,!1),rS=/^[<>&; \t"']/,iS=Je(["<",">","&",";"," "," ",'"',"'"],!1,!1),YE=/^[ \t]/,jE=Je([" "," "],!1,!1),Q=0,Me=0,bA=[{line:1,column:1}],d=0,E=[],I=0,k;if("startRule"in e){if(!(e.startRule in i))throw new Error(`Can't start parsing from rule "`+e.startRule+'".');n=i[e.startRule]}function T(){return r.substring(Me,Q)}function Z(){return Et(Me,Q)}function te(m,b){throw b=b!==void 0?b:Et(Me,Q),Ri([lt(m)],r.substring(Me,Q),b)}function we(m,b){throw b=b!==void 0?b:Et(Me,Q),Tn(m,b)}function me(m,b){return{type:"literal",text:m,ignoreCase:b}}function Je(m,b,N){return{type:"class",parts:m,inverted:b,ignoreCase:N}}function nt(){return{type:"any"}}function wt(){return{type:"end"}}function lt(m){return{type:"other",description:m}}function it(m){var b=bA[m],N;if(b)return b;for(N=m-1;!bA[N];)N--;for(b=bA[N],b={line:b.line,column:b.column};Nd&&(d=Q,E=[]),E.push(m))}function Tn(m,b){return new Zl(m,null,null,b)}function Ri(m,b,N){return new Zl(Zl.buildMessage(m,b),m,b,N)}function SA(){var m,b;return m=Q,b=Mr(),b===t&&(b=null),b!==t&&(Me=m,b=s(b)),m=b,m}function Mr(){var m,b,N,K,ce;if(m=Q,b=Ur(),b!==t){for(N=[],K=He();K!==t;)N.push(K),K=He();N!==t?(K=ma(),K!==t?(ce=os(),ce===t&&(ce=null),ce!==t?(Me=m,b=o(b,K,ce),m=b):(Q=m,m=t)):(Q=m,m=t)):(Q=m,m=t)}else Q=m,m=t;if(m===t)if(m=Q,b=Ur(),b!==t){for(N=[],K=He();K!==t;)N.push(K),K=He();N!==t?(K=ma(),K===t&&(K=null),K!==t?(Me=m,b=a(b,K),m=b):(Q=m,m=t)):(Q=m,m=t)}else Q=m,m=t;return m}function os(){var m,b,N,K,ce;for(m=Q,b=[],N=He();N!==t;)b.push(N),N=He();if(b!==t)if(N=Mr(),N!==t){for(K=[],ce=He();ce!==t;)K.push(ce),ce=He();K!==t?(Me=m,b=l(N),m=b):(Q=m,m=t)}else Q=m,m=t;else Q=m,m=t;return m}function ma(){var m;return r.charCodeAt(Q)===59?(m=c,Q++):(m=t,I===0&&Qe(u)),m===t&&(r.charCodeAt(Q)===38?(m=g,Q++):(m=t,I===0&&Qe(f))),m}function Ur(){var m,b,N;return m=Q,b=G1(),b!==t?(N=lge(),N===t&&(N=null),N!==t?(Me=m,b=h(b,N),m=b):(Q=m,m=t)):(Q=m,m=t),m}function lge(){var m,b,N,K,ce,Se,ht;for(m=Q,b=[],N=He();N!==t;)b.push(N),N=He();if(b!==t)if(N=cge(),N!==t){for(K=[],ce=He();ce!==t;)K.push(ce),ce=He();if(K!==t)if(ce=Ur(),ce!==t){for(Se=[],ht=He();ht!==t;)Se.push(ht),ht=He();Se!==t?(Me=m,b=p(N,ce),m=b):(Q=m,m=t)}else Q=m,m=t;else Q=m,m=t}else Q=m,m=t;else Q=m,m=t;return m}function cge(){var m;return r.substr(Q,2)===C?(m=C,Q+=2):(m=t,I===0&&Qe(y)),m===t&&(r.substr(Q,2)===B?(m=B,Q+=2):(m=t,I===0&&Qe(v))),m}function G1(){var m,b,N;return m=Q,b=fge(),b!==t?(N=uge(),N===t&&(N=null),N!==t?(Me=m,b=D(b,N),m=b):(Q=m,m=t)):(Q=m,m=t),m}function uge(){var m,b,N,K,ce,Se,ht;for(m=Q,b=[],N=He();N!==t;)b.push(N),N=He();if(b!==t)if(N=gge(),N!==t){for(K=[],ce=He();ce!==t;)K.push(ce),ce=He();if(K!==t)if(ce=G1(),ce!==t){for(Se=[],ht=He();ht!==t;)Se.push(ht),ht=He();Se!==t?(Me=m,b=L(N,ce),m=b):(Q=m,m=t)}else Q=m,m=t;else Q=m,m=t}else Q=m,m=t;else Q=m,m=t;return m}function gge(){var m;return r.substr(Q,2)===H?(m=H,Q+=2):(m=t,I===0&&Qe(j)),m===t&&(r.charCodeAt(Q)===124?(m=$,Q++):(m=t,I===0&&Qe(V))),m}function qE(){var m,b,N,K,ce,Se;if(m=Q,b=eU(),b!==t)if(r.charCodeAt(Q)===61?(N=W,Q++):(N=t,I===0&&Qe(_)),N!==t)if(K=q1(),K!==t){for(ce=[],Se=He();Se!==t;)ce.push(Se),Se=He();ce!==t?(Me=m,b=A(b,K),m=b):(Q=m,m=t)}else Q=m,m=t;else Q=m,m=t;else Q=m,m=t;if(m===t)if(m=Q,b=eU(),b!==t)if(r.charCodeAt(Q)===61?(N=W,Q++):(N=t,I===0&&Qe(_)),N!==t){for(K=[],ce=He();ce!==t;)K.push(ce),ce=He();K!==t?(Me=m,b=Ae(b),m=b):(Q=m,m=t)}else Q=m,m=t;else Q=m,m=t;return m}function fge(){var m,b,N,K,ce,Se,ht,Bt,Jr,hi,as;for(m=Q,b=[],N=He();N!==t;)b.push(N),N=He();if(b!==t)if(r.charCodeAt(Q)===40?(N=ge,Q++):(N=t,I===0&&Qe(re)),N!==t){for(K=[],ce=He();ce!==t;)K.push(ce),ce=He();if(K!==t)if(ce=Mr(),ce!==t){for(Se=[],ht=He();ht!==t;)Se.push(ht),ht=He();if(Se!==t)if(r.charCodeAt(Q)===41?(ht=O,Q++):(ht=t,I===0&&Qe(F)),ht!==t){for(Bt=[],Jr=He();Jr!==t;)Bt.push(Jr),Jr=He();if(Bt!==t){for(Jr=[],hi=Np();hi!==t;)Jr.push(hi),hi=Np();if(Jr!==t){for(hi=[],as=He();as!==t;)hi.push(as),as=He();hi!==t?(Me=m,b=ue(ce,Jr),m=b):(Q=m,m=t)}else Q=m,m=t}else Q=m,m=t}else Q=m,m=t;else Q=m,m=t}else Q=m,m=t;else Q=m,m=t}else Q=m,m=t;else Q=m,m=t;if(m===t){for(m=Q,b=[],N=He();N!==t;)b.push(N),N=He();if(b!==t)if(r.charCodeAt(Q)===123?(N=pe,Q++):(N=t,I===0&&Qe(ke)),N!==t){for(K=[],ce=He();ce!==t;)K.push(ce),ce=He();if(K!==t)if(ce=Mr(),ce!==t){for(Se=[],ht=He();ht!==t;)Se.push(ht),ht=He();if(Se!==t)if(r.charCodeAt(Q)===125?(ht=Fe,Q++):(ht=t,I===0&&Qe(Ne)),ht!==t){for(Bt=[],Jr=He();Jr!==t;)Bt.push(Jr),Jr=He();if(Bt!==t){for(Jr=[],hi=Np();hi!==t;)Jr.push(hi),hi=Np();if(Jr!==t){for(hi=[],as=He();as!==t;)hi.push(as),as=He();hi!==t?(Me=m,b=oe(ce,Jr),m=b):(Q=m,m=t)}else Q=m,m=t}else Q=m,m=t}else Q=m,m=t;else Q=m,m=t}else Q=m,m=t;else Q=m,m=t}else Q=m,m=t;else Q=m,m=t;if(m===t){for(m=Q,b=[],N=He();N!==t;)b.push(N),N=He();if(b!==t){for(N=[],K=qE();K!==t;)N.push(K),K=qE();if(N!==t){for(K=[],ce=He();ce!==t;)K.push(ce),ce=He();if(K!==t){if(ce=[],Se=j1(),Se!==t)for(;Se!==t;)ce.push(Se),Se=j1();else ce=t;if(ce!==t){for(Se=[],ht=He();ht!==t;)Se.push(ht),ht=He();Se!==t?(Me=m,b=le(N,ce),m=b):(Q=m,m=t)}else Q=m,m=t}else Q=m,m=t}else Q=m,m=t}else Q=m,m=t;if(m===t){for(m=Q,b=[],N=He();N!==t;)b.push(N),N=He();if(b!==t){if(N=[],K=qE(),K!==t)for(;K!==t;)N.push(K),K=qE();else N=t;if(N!==t){for(K=[],ce=He();ce!==t;)K.push(ce),ce=He();K!==t?(Me=m,b=Be(N),m=b):(Q=m,m=t)}else Q=m,m=t}else Q=m,m=t}}}return m}function Y1(){var m,b,N,K,ce;for(m=Q,b=[],N=He();N!==t;)b.push(N),N=He();if(b!==t){if(N=[],K=JE(),K!==t)for(;K!==t;)N.push(K),K=JE();else N=t;if(N!==t){for(K=[],ce=He();ce!==t;)K.push(ce),ce=He();K!==t?(Me=m,b=fe(N),m=b):(Q=m,m=t)}else Q=m,m=t}else Q=m,m=t;return m}function j1(){var m,b,N;for(m=Q,b=[],N=He();N!==t;)b.push(N),N=He();if(b!==t?(N=Np(),N!==t?(Me=m,b=ae(N),m=b):(Q=m,m=t)):(Q=m,m=t),m===t){for(m=Q,b=[],N=He();N!==t;)b.push(N),N=He();b!==t?(N=JE(),N!==t?(Me=m,b=ae(N),m=b):(Q=m,m=t)):(Q=m,m=t)}return m}function Np(){var m,b,N,K,ce;for(m=Q,b=[],N=He();N!==t;)b.push(N),N=He();return b!==t?(qe.test(r.charAt(Q))?(N=r.charAt(Q),Q++):(N=t,I===0&&Qe(ne)),N===t&&(N=null),N!==t?(K=hge(),K!==t?(ce=JE(),ce!==t?(Me=m,b=Y(N,K,ce),m=b):(Q=m,m=t)):(Q=m,m=t)):(Q=m,m=t)):(Q=m,m=t),m}function hge(){var m;return r.substr(Q,2)===he?(m=he,Q+=2):(m=t,I===0&&Qe(ie)),m===t&&(r.substr(Q,2)===de?(m=de,Q+=2):(m=t,I===0&&Qe(_e)),m===t&&(r.charCodeAt(Q)===62?(m=Pt,Q++):(m=t,I===0&&Qe(It)),m===t&&(r.substr(Q,3)===Or?(m=Or,Q+=3):(m=t,I===0&&Qe(ii)),m===t&&(r.substr(Q,2)===gi?(m=gi,Q+=2):(m=t,I===0&&Qe(hr)),m===t&&(r.charCodeAt(Q)===60?(m=fi,Q++):(m=t,I===0&&Qe(ni))))))),m}function JE(){var m,b,N;for(m=Q,b=[],N=He();N!==t;)b.push(N),N=He();return b!==t?(N=q1(),N!==t?(Me=m,b=ae(N),m=b):(Q=m,m=t)):(Q=m,m=t),m}function q1(){var m,b,N;if(m=Q,b=[],N=J1(),N!==t)for(;N!==t;)b.push(N),N=J1();else b=t;return b!==t&&(Me=m,b=Us(b)),m=b,m}function J1(){var m,b;return m=Q,b=pge(),b!==t&&(Me=m,b=pr(b)),m=b,m===t&&(m=Q,b=dge(),b!==t&&(Me=m,b=pr(b)),m=b,m===t&&(m=Q,b=Cge(),b!==t&&(Me=m,b=pr(b)),m=b,m===t&&(m=Q,b=mge(),b!==t&&(Me=m,b=pr(b)),m=b))),m}function pge(){var m,b,N,K;return m=Q,r.substr(Q,2)===Ii?(b=Ii,Q+=2):(b=t,I===0&&Qe(rs)),b!==t?(N=yge(),N!==t?(r.charCodeAt(Q)===39?(K=ga,Q++):(K=t,I===0&&Qe(dA)),K!==t?(Me=m,b=cg(N),m=b):(Q=m,m=t)):(Q=m,m=t)):(Q=m,m=t),m}function dge(){var m,b,N,K;return m=Q,r.charCodeAt(Q)===39?(b=ga,Q++):(b=t,I===0&&Qe(dA)),b!==t?(N=Ege(),N!==t?(r.charCodeAt(Q)===39?(K=ga,Q++):(K=t,I===0&&Qe(dA)),K!==t?(Me=m,b=cg(N),m=b):(Q=m,m=t)):(Q=m,m=t)):(Q=m,m=t),m}function Cge(){var m,b,N,K;if(m=Q,r.substr(Q,2)===is?(b=is,Q+=2):(b=t,I===0&&Qe(CA)),b!==t&&(Me=m,b=fa()),m=b,m===t)if(m=Q,r.charCodeAt(Q)===34?(b=wp,Q++):(b=t,I===0&&Qe(mA)),b!==t){for(N=[],K=W1();K!==t;)N.push(K),K=W1();N!==t?(r.charCodeAt(Q)===34?(K=wp,Q++):(K=t,I===0&&Qe(mA)),K!==t?(Me=m,b=EA(N),m=b):(Q=m,m=t)):(Q=m,m=t)}else Q=m,m=t;return m}function mge(){var m,b,N;if(m=Q,b=[],N=z1(),N!==t)for(;N!==t;)b.push(N),N=z1();else b=t;return b!==t&&(Me=m,b=EA(b)),m=b,m}function W1(){var m,b;return m=Q,b=_1(),b!==t&&(Me=m,b=wr(b)),m=b,m===t&&(m=Q,b=$1(),b!==t&&(Me=m,b=Ll(b)),m=b,m===t&&(m=Q,b=aS(),b!==t&&(Me=m,b=ug(b)),m=b,m===t&&(m=Q,b=Ige(),b!==t&&(Me=m,b=Io(b)),m=b))),m}function z1(){var m,b;return m=Q,b=_1(),b!==t&&(Me=m,b=gg(b)),m=b,m===t&&(m=Q,b=$1(),b!==t&&(Me=m,b=Bp(b)),m=b,m===t&&(m=Q,b=aS(),b!==t&&(Me=m,b=Qp(b)),m=b,m===t&&(m=Q,b=Qge(),b!==t&&(Me=m,b=vr(b)),m=b,m===t&&(m=Q,b=Bge(),b!==t&&(Me=m,b=Io(b)),m=b)))),m}function Ege(){var m,b,N;for(m=Q,b=[],se.test(r.charAt(Q))?(N=r.charAt(Q),Q++):(N=t,I===0&&Qe(yo));N!==t;)b.push(N),se.test(r.charAt(Q))?(N=r.charAt(Q),Q++):(N=t,I===0&&Qe(yo));return b!==t&&(Me=m,b=Rn(b)),m=b,m}function Ige(){var m,b,N;if(m=Q,b=[],N=V1(),N===t&&(fg.test(r.charAt(Q))?(N=r.charAt(Q),Q++):(N=t,I===0&&Qe(Qt))),N!==t)for(;N!==t;)b.push(N),N=V1(),N===t&&(fg.test(r.charAt(Q))?(N=r.charAt(Q),Q++):(N=t,I===0&&Qe(Qt)));else b=t;return b!==t&&(Me=m,b=Rn(b)),m=b,m}function V1(){var m,b,N;return m=Q,r.substr(Q,2)===Tl?(b=Tl,Q+=2):(b=t,I===0&&Qe(Fn)),b!==t&&(Me=m,b=ns()),m=b,m===t&&(m=Q,r.charCodeAt(Q)===92?(b=ss,Q++):(b=t,I===0&&Qe(gt)),b!==t?(wo.test(r.charAt(Q))?(N=r.charAt(Q),Q++):(N=t,I===0&&Qe(At)),N!==t?(Me=m,b=An(N),m=b):(Q=m,m=t)):(Q=m,m=t)),m}function yge(){var m,b,N;for(m=Q,b=[],N=X1(),N===t&&(se.test(r.charAt(Q))?(N=r.charAt(Q),Q++):(N=t,I===0&&Qe(yo)));N!==t;)b.push(N),N=X1(),N===t&&(se.test(r.charAt(Q))?(N=r.charAt(Q),Q++):(N=t,I===0&&Qe(yo)));return b!==t&&(Me=m,b=Rn(b)),m=b,m}function X1(){var m,b,N;return m=Q,r.substr(Q,2)===S?(b=S,Q+=2):(b=t,I===0&&Qe(Tt)),b!==t&&(Me=m,b=hg()),m=b,m===t&&(m=Q,r.substr(Q,2)===Ol?(b=Ol,Q+=2):(b=t,I===0&&Qe(bp)),b!==t&&(Me=m,b=Sp()),m=b,m===t&&(m=Q,r.charCodeAt(Q)===92?(b=ss,Q++):(b=t,I===0&&Qe(gt)),b!==t?(vp.test(r.charAt(Q))?(N=r.charAt(Q),Q++):(N=t,I===0&&Qe(xp)),N!==t?(Me=m,b=Pp(),m=b):(Q=m,m=t)):(Q=m,m=t),m===t&&(m=Q,r.substr(Q,2)===G?(b=G,Q+=2):(b=t,I===0&&Qe(yt)),b!==t&&(Me=m,b=IA()),m=b,m===t&&(m=Q,r.substr(Q,2)===Wi?(b=Wi,Q+=2):(b=t,I===0&&Qe(Ml)),b!==t&&(Me=m,b=Xe()),m=b,m===t&&(m=Q,r.substr(Q,2)===ha?(b=ha,Q+=2):(b=t,I===0&&Qe(pg)),b!==t&&(Me=m,b=OE()),m=b,m===t&&(m=Q,r.substr(Q,2)===Dp?(b=Dp,Q+=2):(b=t,I===0&&Qe(ME)),b!==t&&(Me=m,b=ar()),m=b,m===t&&(m=Q,r.substr(Q,2)===Nn?(b=Nn,Q+=2):(b=t,I===0&&Qe(Ul)),b!==t&&(Me=m,b=kp()),m=b,m===t&&(m=Q,r.charCodeAt(Q)===92?(b=ss,Q++):(b=t,I===0&&Qe(gt)),b!==t?(Ks.test(r.charAt(Q))?(N=r.charAt(Q),Q++):(N=t,I===0&&Qe(pa)),N!==t?(Me=m,b=An(N),m=b):(Q=m,m=t)):(Q=m,m=t),m===t&&(m=wge()))))))))),m}function wge(){var m,b,N,K,ce,Se,ht,Bt,Jr,hi,as,AS;return m=Q,r.charCodeAt(Q)===92?(b=ss,Q++):(b=t,I===0&&Qe(gt)),b!==t?(N=nS(),N!==t?(Me=m,b=ln(N),m=b):(Q=m,m=t)):(Q=m,m=t),m===t&&(m=Q,r.substr(Q,2)===Te?(b=Te,Q+=2):(b=t,I===0&&Qe(dg)),b!==t?(N=Q,K=Q,ce=nS(),ce!==t?(Se=On(),Se!==t?(ce=[ce,Se],K=ce):(Q=K,K=t)):(Q=K,K=t),K===t&&(K=nS()),K!==t?N=r.substring(N,Q):N=K,N!==t?(Me=m,b=ln(N),m=b):(Q=m,m=t)):(Q=m,m=t),m===t&&(m=Q,r.substr(Q,2)===Kl?(b=Kl,Q+=2):(b=t,I===0&&Qe(Hs)),b!==t?(N=Q,K=Q,ce=On(),ce!==t?(Se=On(),Se!==t?(ht=On(),ht!==t?(Bt=On(),Bt!==t?(ce=[ce,Se,ht,Bt],K=ce):(Q=K,K=t)):(Q=K,K=t)):(Q=K,K=t)):(Q=K,K=t),K!==t?N=r.substring(N,Q):N=K,N!==t?(Me=m,b=ln(N),m=b):(Q=m,m=t)):(Q=m,m=t),m===t&&(m=Q,r.substr(Q,2)===Hl?(b=Hl,Q+=2):(b=t,I===0&&Qe(yA)),b!==t?(N=Q,K=Q,ce=On(),ce!==t?(Se=On(),Se!==t?(ht=On(),ht!==t?(Bt=On(),Bt!==t?(Jr=On(),Jr!==t?(hi=On(),hi!==t?(as=On(),as!==t?(AS=On(),AS!==t?(ce=[ce,Se,ht,Bt,Jr,hi,as,AS],K=ce):(Q=K,K=t)):(Q=K,K=t)):(Q=K,K=t)):(Q=K,K=t)):(Q=K,K=t)):(Q=K,K=t)):(Q=K,K=t)):(Q=K,K=t),K!==t?N=r.substring(N,Q):N=K,N!==t?(Me=m,b=Cg(N),m=b):(Q=m,m=t)):(Q=m,m=t)))),m}function nS(){var m;return mg.test(r.charAt(Q))?(m=r.charAt(Q),Q++):(m=t,I===0&&Qe(da)),m}function On(){var m;return Ca.test(r.charAt(Q))?(m=r.charAt(Q),Q++):(m=t,I===0&&Qe(rt)),m}function Bge(){var m,b,N,K,ce;if(m=Q,b=[],N=Q,r.charCodeAt(Q)===92?(K=ss,Q++):(K=t,I===0&&Qe(gt)),K!==t?(r.length>Q?(ce=r.charAt(Q),Q++):(ce=t,I===0&&Qe(Bo)),ce!==t?(Me=N,K=An(ce),N=K):(Q=N,N=t)):(Q=N,N=t),N===t&&(N=Q,K=Q,I++,ce=tU(),I--,ce===t?K=void 0:(Q=K,K=t),K!==t?(r.length>Q?(ce=r.charAt(Q),Q++):(ce=t,I===0&&Qe(Bo)),ce!==t?(Me=N,K=An(ce),N=K):(Q=N,N=t)):(Q=N,N=t)),N!==t)for(;N!==t;)b.push(N),N=Q,r.charCodeAt(Q)===92?(K=ss,Q++):(K=t,I===0&&Qe(gt)),K!==t?(r.length>Q?(ce=r.charAt(Q),Q++):(ce=t,I===0&&Qe(Bo)),ce!==t?(Me=N,K=An(ce),N=K):(Q=N,N=t)):(Q=N,N=t),N===t&&(N=Q,K=Q,I++,ce=tU(),I--,ce===t?K=void 0:(Q=K,K=t),K!==t?(r.length>Q?(ce=r.charAt(Q),Q++):(ce=t,I===0&&Qe(Bo)),ce!==t?(Me=N,K=An(ce),N=K):(Q=N,N=t)):(Q=N,N=t));else b=t;return b!==t&&(Me=m,b=Rn(b)),m=b,m}function sS(){var m,b,N,K,ce,Se;if(m=Q,r.charCodeAt(Q)===45?(b=wA,Q++):(b=t,I===0&&Qe(Gl)),b===t&&(r.charCodeAt(Q)===43?(b=Gs,Q++):(b=t,I===0&&Qe(Yl))),b===t&&(b=null),b!==t){if(N=[],qe.test(r.charAt(Q))?(K=r.charAt(Q),Q++):(K=t,I===0&&Qe(ne)),K!==t)for(;K!==t;)N.push(K),qe.test(r.charAt(Q))?(K=r.charAt(Q),Q++):(K=t,I===0&&Qe(ne));else N=t;if(N!==t)if(r.charCodeAt(Q)===46?(K=UE,Q++):(K=t,I===0&&Qe(Rp)),K!==t){if(ce=[],qe.test(r.charAt(Q))?(Se=r.charAt(Q),Q++):(Se=t,I===0&&Qe(ne)),Se!==t)for(;Se!==t;)ce.push(Se),qe.test(r.charAt(Q))?(Se=r.charAt(Q),Q++):(Se=t,I===0&&Qe(ne));else ce=t;ce!==t?(Me=m,b=Eg(b,N,ce),m=b):(Q=m,m=t)}else Q=m,m=t;else Q=m,m=t}else Q=m,m=t;if(m===t){if(m=Q,r.charCodeAt(Q)===45?(b=wA,Q++):(b=t,I===0&&Qe(Gl)),b===t&&(r.charCodeAt(Q)===43?(b=Gs,Q++):(b=t,I===0&&Qe(Yl))),b===t&&(b=null),b!==t){if(N=[],qe.test(r.charAt(Q))?(K=r.charAt(Q),Q++):(K=t,I===0&&Qe(ne)),K!==t)for(;K!==t;)N.push(K),qe.test(r.charAt(Q))?(K=r.charAt(Q),Q++):(K=t,I===0&&Qe(ne));else N=t;N!==t?(Me=m,b=Fp(b,N),m=b):(Q=m,m=t)}else Q=m,m=t;if(m===t&&(m=Q,b=aS(),b!==t&&(Me=m,b=KE(b)),m=b,m===t&&(m=Q,b=ql(),b!==t&&(Me=m,b=jl(b)),m=b,m===t)))if(m=Q,r.charCodeAt(Q)===40?(b=ge,Q++):(b=t,I===0&&Qe(re)),b!==t){for(N=[],K=He();K!==t;)N.push(K),K=He();if(N!==t)if(K=Z1(),K!==t){for(ce=[],Se=He();Se!==t;)ce.push(Se),Se=He();ce!==t?(r.charCodeAt(Q)===41?(Se=O,Q++):(Se=t,I===0&&Qe(F)),Se!==t?(Me=m,b=HE(K),m=b):(Q=m,m=t)):(Q=m,m=t)}else Q=m,m=t;else Q=m,m=t}else Q=m,m=t}return m}function oS(){var m,b,N,K,ce,Se,ht,Bt;if(m=Q,b=sS(),b!==t){for(N=[],K=Q,ce=[],Se=He();Se!==t;)ce.push(Se),Se=He();if(ce!==t)if(r.charCodeAt(Q)===42?(Se=Ig,Q++):(Se=t,I===0&&Qe(BA)),Se===t&&(r.charCodeAt(Q)===47?(Se=Rr,Q++):(Se=t,I===0&&Qe(GE))),Se!==t){for(ht=[],Bt=He();Bt!==t;)ht.push(Bt),Bt=He();ht!==t?(Bt=sS(),Bt!==t?(Me=K,ce=Ys(b,Se,Bt),K=ce):(Q=K,K=t)):(Q=K,K=t)}else Q=K,K=t;else Q=K,K=t;for(;K!==t;){for(N.push(K),K=Q,ce=[],Se=He();Se!==t;)ce.push(Se),Se=He();if(ce!==t)if(r.charCodeAt(Q)===42?(Se=Ig,Q++):(Se=t,I===0&&Qe(BA)),Se===t&&(r.charCodeAt(Q)===47?(Se=Rr,Q++):(Se=t,I===0&&Qe(GE))),Se!==t){for(ht=[],Bt=He();Bt!==t;)ht.push(Bt),Bt=He();ht!==t?(Bt=sS(),Bt!==t?(Me=K,ce=Ys(b,Se,Bt),K=ce):(Q=K,K=t)):(Q=K,K=t)}else Q=K,K=t;else Q=K,K=t}N!==t?(Me=m,b=js(b,N),m=b):(Q=m,m=t)}else Q=m,m=t;return m}function Z1(){var m,b,N,K,ce,Se,ht,Bt;if(m=Q,b=oS(),b!==t){for(N=[],K=Q,ce=[],Se=He();Se!==t;)ce.push(Se),Se=He();if(ce!==t)if(r.charCodeAt(Q)===43?(Se=Gs,Q++):(Se=t,I===0&&Qe(Yl)),Se===t&&(r.charCodeAt(Q)===45?(Se=wA,Q++):(Se=t,I===0&&Qe(Gl))),Se!==t){for(ht=[],Bt=He();Bt!==t;)ht.push(Bt),Bt=He();ht!==t?(Bt=oS(),Bt!==t?(Me=K,ce=yg(b,Se,Bt),K=ce):(Q=K,K=t)):(Q=K,K=t)}else Q=K,K=t;else Q=K,K=t;for(;K!==t;){for(N.push(K),K=Q,ce=[],Se=He();Se!==t;)ce.push(Se),Se=He();if(ce!==t)if(r.charCodeAt(Q)===43?(Se=Gs,Q++):(Se=t,I===0&&Qe(Yl)),Se===t&&(r.charCodeAt(Q)===45?(Se=wA,Q++):(Se=t,I===0&&Qe(Gl))),Se!==t){for(ht=[],Bt=He();Bt!==t;)ht.push(Bt),Bt=He();ht!==t?(Bt=oS(),Bt!==t?(Me=K,ce=yg(b,Se,Bt),K=ce):(Q=K,K=t)):(Q=K,K=t)}else Q=K,K=t;else Q=K,K=t}N!==t?(Me=m,b=js(b,N),m=b):(Q=m,m=t)}else Q=m,m=t;return m}function _1(){var m,b,N,K,ce,Se;if(m=Q,r.substr(Q,3)===QA?(b=QA,Q+=3):(b=t,I===0&&Qe(R)),b!==t){for(N=[],K=He();K!==t;)N.push(K),K=He();if(N!==t)if(K=Z1(),K!==t){for(ce=[],Se=He();Se!==t;)ce.push(Se),Se=He();ce!==t?(r.substr(Q,2)===q?(Se=q,Q+=2):(Se=t,I===0&&Qe(Ce)),Se!==t?(Me=m,b=Ue(K),m=b):(Q=m,m=t)):(Q=m,m=t)}else Q=m,m=t;else Q=m,m=t}else Q=m,m=t;return m}function $1(){var m,b,N,K;return m=Q,r.substr(Q,2)===Re?(b=Re,Q+=2):(b=t,I===0&&Qe(ze)),b!==t?(N=Mr(),N!==t?(r.charCodeAt(Q)===41?(K=O,Q++):(K=t,I===0&&Qe(F)),K!==t?(Me=m,b=dt(N),m=b):(Q=m,m=t)):(Q=m,m=t)):(Q=m,m=t),m}function aS(){var m,b,N,K,ce,Se;return m=Q,r.substr(Q,2)===Ft?(b=Ft,Q+=2):(b=t,I===0&&Qe(Ln)),b!==t?(N=ql(),N!==t?(r.substr(Q,2)===Jb?(K=Jb,Q+=2):(K=t,I===0&&Qe(P1)),K!==t?(ce=Y1(),ce!==t?(r.charCodeAt(Q)===125?(Se=Fe,Q++):(Se=t,I===0&&Qe(Ne)),Se!==t?(Me=m,b=D1(N,ce),m=b):(Q=m,m=t)):(Q=m,m=t)):(Q=m,m=t)):(Q=m,m=t)):(Q=m,m=t),m===t&&(m=Q,r.substr(Q,2)===Ft?(b=Ft,Q+=2):(b=t,I===0&&Qe(Ln)),b!==t?(N=ql(),N!==t?(r.substr(Q,3)===Wb?(K=Wb,Q+=3):(K=t,I===0&&Qe(k1)),K!==t?(Me=m,b=R1(N),m=b):(Q=m,m=t)):(Q=m,m=t)):(Q=m,m=t),m===t&&(m=Q,r.substr(Q,2)===Ft?(b=Ft,Q+=2):(b=t,I===0&&Qe(Ln)),b!==t?(N=ql(),N!==t?(r.substr(Q,2)===zb?(K=zb,Q+=2):(K=t,I===0&&Qe(F1)),K!==t?(ce=Y1(),ce!==t?(r.charCodeAt(Q)===125?(Se=Fe,Q++):(Se=t,I===0&&Qe(Ne)),Se!==t?(Me=m,b=N1(N,ce),m=b):(Q=m,m=t)):(Q=m,m=t)):(Q=m,m=t)):(Q=m,m=t)):(Q=m,m=t),m===t&&(m=Q,r.substr(Q,2)===Ft?(b=Ft,Q+=2):(b=t,I===0&&Qe(Ln)),b!==t?(N=ql(),N!==t?(r.substr(Q,3)===Vb?(K=Vb,Q+=3):(K=t,I===0&&Qe(L1)),K!==t?(Me=m,b=T1(N),m=b):(Q=m,m=t)):(Q=m,m=t)):(Q=m,m=t),m===t&&(m=Q,r.substr(Q,2)===Ft?(b=Ft,Q+=2):(b=t,I===0&&Qe(Ln)),b!==t?(N=ql(),N!==t?(r.charCodeAt(Q)===125?(K=Fe,Q++):(K=t,I===0&&Qe(Ne)),K!==t?(Me=m,b=Xb(N),m=b):(Q=m,m=t)):(Q=m,m=t)):(Q=m,m=t),m===t&&(m=Q,r.charCodeAt(Q)===36?(b=O1,Q++):(b=t,I===0&&Qe(M1)),b!==t?(N=ql(),N!==t?(Me=m,b=Xb(N),m=b):(Q=m,m=t)):(Q=m,m=t)))))),m}function Qge(){var m,b,N;return m=Q,b=bge(),b!==t?(Me=Q,N=U1(b),N?N=void 0:N=t,N!==t?(Me=m,b=K1(b),m=b):(Q=m,m=t)):(Q=m,m=t),m}function bge(){var m,b,N,K,ce;if(m=Q,b=[],N=Q,K=Q,I++,ce=rU(),I--,ce===t?K=void 0:(Q=K,K=t),K!==t?(r.length>Q?(ce=r.charAt(Q),Q++):(ce=t,I===0&&Qe(Bo)),ce!==t?(Me=N,K=An(ce),N=K):(Q=N,N=t)):(Q=N,N=t),N!==t)for(;N!==t;)b.push(N),N=Q,K=Q,I++,ce=rU(),I--,ce===t?K=void 0:(Q=K,K=t),K!==t?(r.length>Q?(ce=r.charAt(Q),Q++):(ce=t,I===0&&Qe(Bo)),ce!==t?(Me=N,K=An(ce),N=K):(Q=N,N=t)):(Q=N,N=t);else b=t;return b!==t&&(Me=m,b=Rn(b)),m=b,m}function eU(){var m,b,N;if(m=Q,b=[],Zb.test(r.charAt(Q))?(N=r.charAt(Q),Q++):(N=t,I===0&&Qe(_b)),N!==t)for(;N!==t;)b.push(N),Zb.test(r.charAt(Q))?(N=r.charAt(Q),Q++):(N=t,I===0&&Qe(_b));else b=t;return b!==t&&(Me=m,b=$b()),m=b,m}function ql(){var m,b,N;if(m=Q,b=[],eS.test(r.charAt(Q))?(N=r.charAt(Q),Q++):(N=t,I===0&&Qe(tS)),N!==t)for(;N!==t;)b.push(N),eS.test(r.charAt(Q))?(N=r.charAt(Q),Q++):(N=t,I===0&&Qe(tS));else b=t;return b!==t&&(Me=m,b=$b()),m=b,m}function tU(){var m;return H1.test(r.charAt(Q))?(m=r.charAt(Q),Q++):(m=t,I===0&&Qe(wg)),m}function rU(){var m;return rS.test(r.charAt(Q))?(m=r.charAt(Q),Q++):(m=t,I===0&&Qe(iS)),m}function He(){var m,b;if(m=[],YE.test(r.charAt(Q))?(b=r.charAt(Q),Q++):(b=t,I===0&&Qe(jE)),b!==t)for(;b!==t;)m.push(b),YE.test(r.charAt(Q))?(b=r.charAt(Q),Q++):(b=t,I===0&&Qe(jE));else m=t;return m}if(k=n(),k!==t&&Q===r.length)return k;throw k!==t&&Q{"use strict";function Sfe(r,e){function t(){this.constructor=r}t.prototype=e.prototype,r.prototype=new t}function $l(r,e,t,i){this.message=r,this.expected=e,this.found=t,this.location=i,this.name="SyntaxError",typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,$l)}Sfe($l,Error);$l.buildMessage=function(r,e){var t={literal:function(c){return'"'+n(c.text)+'"'},class:function(c){var u="",g;for(g=0;g0){for(g=1,f=1;gH&&(H=v,j=[]),j.push(ne))}function Ne(ne,Y){return new $l(ne,null,null,Y)}function oe(ne,Y,he){return new $l($l.buildMessage(ne,Y),ne,Y,he)}function le(){var ne,Y,he,ie;return ne=v,Y=Be(),Y!==t?(r.charCodeAt(v)===47?(he=s,v++):(he=t,$===0&&Fe(o)),he!==t?(ie=Be(),ie!==t?(D=ne,Y=a(Y,ie),ne=Y):(v=ne,ne=t)):(v=ne,ne=t)):(v=ne,ne=t),ne===t&&(ne=v,Y=Be(),Y!==t&&(D=ne,Y=l(Y)),ne=Y),ne}function Be(){var ne,Y,he,ie;return ne=v,Y=fe(),Y!==t?(r.charCodeAt(v)===64?(he=c,v++):(he=t,$===0&&Fe(u)),he!==t?(ie=qe(),ie!==t?(D=ne,Y=g(Y,ie),ne=Y):(v=ne,ne=t)):(v=ne,ne=t)):(v=ne,ne=t),ne===t&&(ne=v,Y=fe(),Y!==t&&(D=ne,Y=f(Y)),ne=Y),ne}function fe(){var ne,Y,he,ie,de;return ne=v,r.charCodeAt(v)===64?(Y=c,v++):(Y=t,$===0&&Fe(u)),Y!==t?(he=ae(),he!==t?(r.charCodeAt(v)===47?(ie=s,v++):(ie=t,$===0&&Fe(o)),ie!==t?(de=ae(),de!==t?(D=ne,Y=h(),ne=Y):(v=ne,ne=t)):(v=ne,ne=t)):(v=ne,ne=t)):(v=ne,ne=t),ne===t&&(ne=v,Y=ae(),Y!==t&&(D=ne,Y=h()),ne=Y),ne}function ae(){var ne,Y,he;if(ne=v,Y=[],p.test(r.charAt(v))?(he=r.charAt(v),v++):(he=t,$===0&&Fe(C)),he!==t)for(;he!==t;)Y.push(he),p.test(r.charAt(v))?(he=r.charAt(v),v++):(he=t,$===0&&Fe(C));else Y=t;return Y!==t&&(D=ne,Y=h()),ne=Y,ne}function qe(){var ne,Y,he;if(ne=v,Y=[],y.test(r.charAt(v))?(he=r.charAt(v),v++):(he=t,$===0&&Fe(B)),he!==t)for(;he!==t;)Y.push(he),y.test(r.charAt(v))?(he=r.charAt(v),v++):(he=t,$===0&&Fe(B));else Y=t;return Y!==t&&(D=ne,Y=h()),ne=Y,ne}if(V=n(),V!==t&&v===r.length)return V;throw V!==t&&v{"use strict";function dK(r){return typeof r>"u"||r===null}function xfe(r){return typeof r=="object"&&r!==null}function Pfe(r){return Array.isArray(r)?r:dK(r)?[]:[r]}function Dfe(r,e){var t,i,n,s;if(e)for(s=Object.keys(e),t=0,i=s.length;t{"use strict";function Vp(r,e){Error.call(this),this.name="YAMLException",this.reason=r,this.mark=e,this.message=(this.reason||"(unknown reason)")+(this.mark?" "+this.mark.toString():""),Error.captureStackTrace?Error.captureStackTrace(this,this.constructor):this.stack=new Error().stack||""}Vp.prototype=Object.create(Error.prototype);Vp.prototype.constructor=Vp;Vp.prototype.toString=function(e){var t=this.name+": ";return t+=this.reason||"(unknown reason)",!e&&this.mark&&(t+=" "+this.mark.toString()),t};CK.exports=Vp});var IK=w((wZe,EK)=>{"use strict";var mK=tc();function HS(r,e,t,i,n){this.name=r,this.buffer=e,this.position=t,this.line=i,this.column=n}HS.prototype.getSnippet=function(e,t){var i,n,s,o,a;if(!this.buffer)return null;for(e=e||4,t=t||75,i="",n=this.position;n>0&&`\0\r \x85\u2028\u2029`.indexOf(this.buffer.charAt(n-1))===-1;)if(n-=1,this.position-n>t/2-1){i=" ... ",n+=5;break}for(s="",o=this.position;ot/2-1){s=" ... ",o-=5;break}return a=this.buffer.slice(n,o),mK.repeat(" ",e)+i+a+s+` `+mK.repeat(" ",e+this.position-n+i.length)+"^"};HS.prototype.toString=function(e){var t,i="";return this.name&&(i+='in "'+this.name+'" '),i+="at line "+(this.line+1)+", column "+(this.column+1),e||(t=this.getSnippet(),t&&(i+=`: `+t)),i};EK.exports=HS});var si=w((BZe,wK)=>{"use strict";var yK=Ng(),Ffe=["kind","resolve","construct","instanceOf","predicate","represent","defaultStyle","styleAliases"],Nfe=["scalar","sequence","mapping"];function Lfe(r){var e={};return r!==null&&Object.keys(r).forEach(function(t){r[t].forEach(function(i){e[String(i)]=t})}),e}function Tfe(r,e){if(e=e||{},Object.keys(e).forEach(function(t){if(Ffe.indexOf(t)===-1)throw new yK('Unknown option "'+t+'" is met in definition of "'+r+'" YAML type.')}),this.tag=r,this.kind=e.kind||null,this.resolve=e.resolve||function(){return!0},this.construct=e.construct||function(t){return t},this.instanceOf=e.instanceOf||null,this.predicate=e.predicate||null,this.represent=e.represent||null,this.defaultStyle=e.defaultStyle||null,this.styleAliases=Lfe(e.styleAliases||null),Nfe.indexOf(this.kind)===-1)throw new yK('Unknown kind "'+this.kind+'" is specified for "'+r+'" YAML type.')}wK.exports=Tfe});var rc=w((QZe,QK)=>{"use strict";var BK=tc(),dI=Ng(),Ofe=si();function GS(r,e,t){var i=[];return r.include.forEach(function(n){t=GS(n,e,t)}),r[e].forEach(function(n){t.forEach(function(s,o){s.tag===n.tag&&s.kind===n.kind&&i.push(o)}),t.push(n)}),t.filter(function(n,s){return i.indexOf(s)===-1})}function Mfe(){var r={scalar:{},sequence:{},mapping:{},fallback:{}},e,t;function i(n){r[n.kind][n.tag]=r.fallback[n.tag]=n}for(e=0,t=arguments.length;e{"use strict";var Ufe=si();bK.exports=new Ufe("tag:yaml.org,2002:str",{kind:"scalar",construct:function(r){return r!==null?r:""}})});var xK=w((SZe,vK)=>{"use strict";var Kfe=si();vK.exports=new Kfe("tag:yaml.org,2002:seq",{kind:"sequence",construct:function(r){return r!==null?r:[]}})});var DK=w((vZe,PK)=>{"use strict";var Hfe=si();PK.exports=new Hfe("tag:yaml.org,2002:map",{kind:"mapping",construct:function(r){return r!==null?r:{}}})});var CI=w((xZe,kK)=>{"use strict";var Gfe=rc();kK.exports=new Gfe({explicit:[SK(),xK(),DK()]})});var FK=w((PZe,RK)=>{"use strict";var Yfe=si();function jfe(r){if(r===null)return!0;var e=r.length;return e===1&&r==="~"||e===4&&(r==="null"||r==="Null"||r==="NULL")}function qfe(){return null}function Jfe(r){return r===null}RK.exports=new Yfe("tag:yaml.org,2002:null",{kind:"scalar",resolve:jfe,construct:qfe,predicate:Jfe,represent:{canonical:function(){return"~"},lowercase:function(){return"null"},uppercase:function(){return"NULL"},camelcase:function(){return"Null"}},defaultStyle:"lowercase"})});var LK=w((DZe,NK)=>{"use strict";var Wfe=si();function zfe(r){if(r===null)return!1;var e=r.length;return e===4&&(r==="true"||r==="True"||r==="TRUE")||e===5&&(r==="false"||r==="False"||r==="FALSE")}function Vfe(r){return r==="true"||r==="True"||r==="TRUE"}function Xfe(r){return Object.prototype.toString.call(r)==="[object Boolean]"}NK.exports=new Wfe("tag:yaml.org,2002:bool",{kind:"scalar",resolve:zfe,construct:Vfe,predicate:Xfe,represent:{lowercase:function(r){return r?"true":"false"},uppercase:function(r){return r?"TRUE":"FALSE"},camelcase:function(r){return r?"True":"False"}},defaultStyle:"lowercase"})});var OK=w((kZe,TK)=>{"use strict";var Zfe=tc(),_fe=si();function $fe(r){return 48<=r&&r<=57||65<=r&&r<=70||97<=r&&r<=102}function ehe(r){return 48<=r&&r<=55}function the(r){return 48<=r&&r<=57}function rhe(r){if(r===null)return!1;var e=r.length,t=0,i=!1,n;if(!e)return!1;if(n=r[t],(n==="-"||n==="+")&&(n=r[++t]),n==="0"){if(t+1===e)return!0;if(n=r[++t],n==="b"){for(t++;t=0?"0b"+r.toString(2):"-0b"+r.toString(2).slice(1)},octal:function(r){return r>=0?"0"+r.toString(8):"-0"+r.toString(8).slice(1)},decimal:function(r){return r.toString(10)},hexadecimal:function(r){return r>=0?"0x"+r.toString(16).toUpperCase():"-0x"+r.toString(16).toUpperCase().slice(1)}},defaultStyle:"decimal",styleAliases:{binary:[2,"bin"],octal:[8,"oct"],decimal:[10,"dec"],hexadecimal:[16,"hex"]}})});var KK=w((RZe,UK)=>{"use strict";var MK=tc(),she=si(),ohe=new RegExp("^(?:[-+]?(?:0|[1-9][0-9_]*)(?:\\.[0-9_]*)?(?:[eE][-+]?[0-9]+)?|\\.[0-9_]+(?:[eE][-+]?[0-9]+)?|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*|[-+]?\\.(?:inf|Inf|INF)|\\.(?:nan|NaN|NAN))$");function ahe(r){return!(r===null||!ohe.test(r)||r[r.length-1]==="_")}function Ahe(r){var e,t,i,n;return e=r.replace(/_/g,"").toLowerCase(),t=e[0]==="-"?-1:1,n=[],"+-".indexOf(e[0])>=0&&(e=e.slice(1)),e===".inf"?t===1?Number.POSITIVE_INFINITY:Number.NEGATIVE_INFINITY:e===".nan"?NaN:e.indexOf(":")>=0?(e.split(":").forEach(function(s){n.unshift(parseFloat(s,10))}),e=0,i=1,n.forEach(function(s){e+=s*i,i*=60}),t*e):t*parseFloat(e,10)}var lhe=/^[-+]?[0-9]+e/;function che(r,e){var t;if(isNaN(r))switch(e){case"lowercase":return".nan";case"uppercase":return".NAN";case"camelcase":return".NaN"}else if(Number.POSITIVE_INFINITY===r)switch(e){case"lowercase":return".inf";case"uppercase":return".INF";case"camelcase":return".Inf"}else if(Number.NEGATIVE_INFINITY===r)switch(e){case"lowercase":return"-.inf";case"uppercase":return"-.INF";case"camelcase":return"-.Inf"}else if(MK.isNegativeZero(r))return"-0.0";return t=r.toString(10),lhe.test(t)?t.replace("e",".e"):t}function uhe(r){return Object.prototype.toString.call(r)==="[object Number]"&&(r%1!==0||MK.isNegativeZero(r))}UK.exports=new she("tag:yaml.org,2002:float",{kind:"scalar",resolve:ahe,construct:Ahe,predicate:uhe,represent:che,defaultStyle:"lowercase"})});var YS=w((FZe,HK)=>{"use strict";var ghe=rc();HK.exports=new ghe({include:[CI()],implicit:[FK(),LK(),OK(),KK()]})});var jS=w((NZe,GK)=>{"use strict";var fhe=rc();GK.exports=new fhe({include:[YS()]})});var JK=w((LZe,qK)=>{"use strict";var hhe=si(),YK=new RegExp("^([0-9][0-9][0-9][0-9])-([0-9][0-9])-([0-9][0-9])$"),jK=new RegExp("^([0-9][0-9][0-9][0-9])-([0-9][0-9]?)-([0-9][0-9]?)(?:[Tt]|[ \\t]+)([0-9][0-9]?):([0-9][0-9]):([0-9][0-9])(?:\\.([0-9]*))?(?:[ \\t]*(Z|([-+])([0-9][0-9]?)(?::([0-9][0-9]))?))?$");function phe(r){return r===null?!1:YK.exec(r)!==null||jK.exec(r)!==null}function dhe(r){var e,t,i,n,s,o,a,l=0,c=null,u,g,f;if(e=YK.exec(r),e===null&&(e=jK.exec(r)),e===null)throw new Error("Date resolve error");if(t=+e[1],i=+e[2]-1,n=+e[3],!e[4])return new Date(Date.UTC(t,i,n));if(s=+e[4],o=+e[5],a=+e[6],e[7]){for(l=e[7].slice(0,3);l.length<3;)l+="0";l=+l}return e[9]&&(u=+e[10],g=+(e[11]||0),c=(u*60+g)*6e4,e[9]==="-"&&(c=-c)),f=new Date(Date.UTC(t,i,n,s,o,a,l)),c&&f.setTime(f.getTime()-c),f}function Che(r){return r.toISOString()}qK.exports=new hhe("tag:yaml.org,2002:timestamp",{kind:"scalar",resolve:phe,construct:dhe,instanceOf:Date,represent:Che})});var zK=w((TZe,WK)=>{"use strict";var mhe=si();function Ehe(r){return r==="<<"||r===null}WK.exports=new mhe("tag:yaml.org,2002:merge",{kind:"scalar",resolve:Ehe})});var ZK=w((OZe,XK)=>{"use strict";var ic;try{VK=J,ic=VK("buffer").Buffer}catch{}var VK,Ihe=si(),qS=`ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/= \r`;function yhe(r){if(r===null)return!1;var e,t,i=0,n=r.length,s=qS;for(t=0;t64)){if(e<0)return!1;i+=6}return i%8===0}function whe(r){var e,t,i=r.replace(/[\r\n=]/g,""),n=i.length,s=qS,o=0,a=[];for(e=0;e>16&255),a.push(o>>8&255),a.push(o&255)),o=o<<6|s.indexOf(i.charAt(e));return t=n%4*6,t===0?(a.push(o>>16&255),a.push(o>>8&255),a.push(o&255)):t===18?(a.push(o>>10&255),a.push(o>>2&255)):t===12&&a.push(o>>4&255),ic?ic.from?ic.from(a):new ic(a):a}function Bhe(r){var e="",t=0,i,n,s=r.length,o=qS;for(i=0;i>18&63],e+=o[t>>12&63],e+=o[t>>6&63],e+=o[t&63]),t=(t<<8)+r[i];return n=s%3,n===0?(e+=o[t>>18&63],e+=o[t>>12&63],e+=o[t>>6&63],e+=o[t&63]):n===2?(e+=o[t>>10&63],e+=o[t>>4&63],e+=o[t<<2&63],e+=o[64]):n===1&&(e+=o[t>>2&63],e+=o[t<<4&63],e+=o[64],e+=o[64]),e}function Qhe(r){return ic&&ic.isBuffer(r)}XK.exports=new Ihe("tag:yaml.org,2002:binary",{kind:"scalar",resolve:yhe,construct:whe,predicate:Qhe,represent:Bhe})});var $K=w((UZe,_K)=>{"use strict";var bhe=si(),She=Object.prototype.hasOwnProperty,vhe=Object.prototype.toString;function xhe(r){if(r===null)return!0;var e=[],t,i,n,s,o,a=r;for(t=0,i=a.length;t{"use strict";var Dhe=si(),khe=Object.prototype.toString;function Rhe(r){if(r===null)return!0;var e,t,i,n,s,o=r;for(s=new Array(o.length),e=0,t=o.length;e{"use strict";var Nhe=si(),Lhe=Object.prototype.hasOwnProperty;function The(r){if(r===null)return!0;var e,t=r;for(e in t)if(Lhe.call(t,e)&&t[e]!==null)return!1;return!0}function Ohe(r){return r!==null?r:{}}r2.exports=new Nhe("tag:yaml.org,2002:set",{kind:"mapping",resolve:The,construct:Ohe})});var Tg=w((GZe,n2)=>{"use strict";var Mhe=rc();n2.exports=new Mhe({include:[jS()],implicit:[JK(),zK()],explicit:[ZK(),$K(),t2(),i2()]})});var o2=w((YZe,s2)=>{"use strict";var Uhe=si();function Khe(){return!0}function Hhe(){}function Ghe(){return""}function Yhe(r){return typeof r>"u"}s2.exports=new Uhe("tag:yaml.org,2002:js/undefined",{kind:"scalar",resolve:Khe,construct:Hhe,predicate:Yhe,represent:Ghe})});var A2=w((jZe,a2)=>{"use strict";var jhe=si();function qhe(r){if(r===null||r.length===0)return!1;var e=r,t=/\/([gim]*)$/.exec(r),i="";return!(e[0]==="/"&&(t&&(i=t[1]),i.length>3||e[e.length-i.length-1]!=="/"))}function Jhe(r){var e=r,t=/\/([gim]*)$/.exec(r),i="";return e[0]==="/"&&(t&&(i=t[1]),e=e.slice(1,e.length-i.length-1)),new RegExp(e,i)}function Whe(r){var e="/"+r.source+"/";return r.global&&(e+="g"),r.multiline&&(e+="m"),r.ignoreCase&&(e+="i"),e}function zhe(r){return Object.prototype.toString.call(r)==="[object RegExp]"}a2.exports=new jhe("tag:yaml.org,2002:js/regexp",{kind:"scalar",resolve:qhe,construct:Jhe,predicate:zhe,represent:Whe})});var u2=w((qZe,c2)=>{"use strict";var mI;try{l2=J,mI=l2("esprima")}catch{typeof window<"u"&&(mI=window.esprima)}var l2,Vhe=si();function Xhe(r){if(r===null)return!1;try{var e="("+r+")",t=mI.parse(e,{range:!0});return!(t.type!=="Program"||t.body.length!==1||t.body[0].type!=="ExpressionStatement"||t.body[0].expression.type!=="ArrowFunctionExpression"&&t.body[0].expression.type!=="FunctionExpression")}catch{return!1}}function Zhe(r){var e="("+r+")",t=mI.parse(e,{range:!0}),i=[],n;if(t.type!=="Program"||t.body.length!==1||t.body[0].type!=="ExpressionStatement"||t.body[0].expression.type!=="ArrowFunctionExpression"&&t.body[0].expression.type!=="FunctionExpression")throw new Error("Failed to resolve function");return t.body[0].expression.params.forEach(function(s){i.push(s.name)}),n=t.body[0].expression.body.range,t.body[0].expression.body.type==="BlockStatement"?new Function(i,e.slice(n[0]+1,n[1]-1)):new Function(i,"return "+e.slice(n[0],n[1]))}function _he(r){return r.toString()}function $he(r){return Object.prototype.toString.call(r)==="[object Function]"}c2.exports=new Vhe("tag:yaml.org,2002:js/function",{kind:"scalar",resolve:Xhe,construct:Zhe,predicate:$he,represent:_he})});var Xp=w((WZe,f2)=>{"use strict";var g2=rc();f2.exports=g2.DEFAULT=new g2({include:[Tg()],explicit:[o2(),A2(),u2()]})});var R2=w((zZe,Zp)=>{"use strict";var wa=tc(),I2=Ng(),epe=IK(),y2=Tg(),tpe=Xp(),kA=Object.prototype.hasOwnProperty,EI=1,w2=2,B2=3,II=4,JS=1,rpe=2,h2=3,ipe=/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x84\x86-\x9F\uFFFE\uFFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/,npe=/[\x85\u2028\u2029]/,spe=/[,\[\]\{\}]/,Q2=/^(?:!|!!|![a-z\-]+!)$/i,b2=/^(?:!|[^,\[\]\{\}])(?:%[0-9a-f]{2}|[0-9a-z\-#;\/\?:@&=\+\$,_\.!~\*'\(\)\[\]])*$/i;function p2(r){return Object.prototype.toString.call(r)}function vo(r){return r===10||r===13}function sc(r){return r===9||r===32}function gn(r){return r===9||r===32||r===10||r===13}function Og(r){return r===44||r===91||r===93||r===123||r===125}function ope(r){var e;return 48<=r&&r<=57?r-48:(e=r|32,97<=e&&e<=102?e-97+10:-1)}function ape(r){return r===120?2:r===117?4:r===85?8:0}function Ape(r){return 48<=r&&r<=57?r-48:-1}function d2(r){return r===48?"\0":r===97?"\x07":r===98?"\b":r===116||r===9?" ":r===110?` `:r===118?"\v":r===102?"\f":r===114?"\r":r===101?"\x1B":r===32?" ":r===34?'"':r===47?"/":r===92?"\\":r===78?"\x85":r===95?"\xA0":r===76?"\u2028":r===80?"\u2029":""}function lpe(r){return r<=65535?String.fromCharCode(r):String.fromCharCode((r-65536>>10)+55296,(r-65536&1023)+56320)}var S2=new Array(256),v2=new Array(256);for(nc=0;nc<256;nc++)S2[nc]=d2(nc)?1:0,v2[nc]=d2(nc);var nc;function cpe(r,e){this.input=r,this.filename=e.filename||null,this.schema=e.schema||tpe,this.onWarning=e.onWarning||null,this.legacy=e.legacy||!1,this.json=e.json||!1,this.listener=e.listener||null,this.implicitTypes=this.schema.compiledImplicit,this.typeMap=this.schema.compiledTypeMap,this.length=r.length,this.position=0,this.line=0,this.lineStart=0,this.lineIndent=0,this.documents=[]}function x2(r,e){return new I2(e,new epe(r.filename,r.input,r.position,r.line,r.position-r.lineStart))}function ft(r,e){throw x2(r,e)}function yI(r,e){r.onWarning&&r.onWarning.call(null,x2(r,e))}var C2={YAML:function(e,t,i){var n,s,o;e.version!==null&&ft(e,"duplication of %YAML directive"),i.length!==1&&ft(e,"YAML directive accepts exactly one argument"),n=/^([0-9]+)\.([0-9]+)$/.exec(i[0]),n===null&&ft(e,"ill-formed argument of the YAML directive"),s=parseInt(n[1],10),o=parseInt(n[2],10),s!==1&&ft(e,"unacceptable YAML version of the document"),e.version=i[0],e.checkLineBreaks=o<2,o!==1&&o!==2&&yI(e,"unsupported YAML version of the document")},TAG:function(e,t,i){var n,s;i.length!==2&&ft(e,"TAG directive accepts exactly two arguments"),n=i[0],s=i[1],Q2.test(n)||ft(e,"ill-formed tag handle (first argument) of the TAG directive"),kA.call(e.tagMap,n)&&ft(e,'there is a previously declared suffix for "'+n+'" tag handle'),b2.test(s)||ft(e,"ill-formed tag prefix (second argument) of the TAG directive"),e.tagMap[n]=s}};function DA(r,e,t,i){var n,s,o,a;if(e1&&(r.result+=wa.repeat(` `,e-1))}function upe(r,e,t){var i,n,s,o,a,l,c,u,g=r.kind,f=r.result,h;if(h=r.input.charCodeAt(r.position),gn(h)||Og(h)||h===35||h===38||h===42||h===33||h===124||h===62||h===39||h===34||h===37||h===64||h===96||(h===63||h===45)&&(n=r.input.charCodeAt(r.position+1),gn(n)||t&&Og(n)))return!1;for(r.kind="scalar",r.result="",s=o=r.position,a=!1;h!==0;){if(h===58){if(n=r.input.charCodeAt(r.position+1),gn(n)||t&&Og(n))break}else if(h===35){if(i=r.input.charCodeAt(r.position-1),gn(i))break}else{if(r.position===r.lineStart&&wI(r)||t&&Og(h))break;if(vo(h))if(l=r.line,c=r.lineStart,u=r.lineIndent,zr(r,!1,-1),r.lineIndent>=e){a=!0,h=r.input.charCodeAt(r.position);continue}else{r.position=o,r.line=l,r.lineStart=c,r.lineIndent=u;break}}a&&(DA(r,s,o,!1),zS(r,r.line-l),s=o=r.position,a=!1),sc(h)||(o=r.position+1),h=r.input.charCodeAt(++r.position)}return DA(r,s,o,!1),r.result?!0:(r.kind=g,r.result=f,!1)}function gpe(r,e){var t,i,n;if(t=r.input.charCodeAt(r.position),t!==39)return!1;for(r.kind="scalar",r.result="",r.position++,i=n=r.position;(t=r.input.charCodeAt(r.position))!==0;)if(t===39)if(DA(r,i,r.position,!0),t=r.input.charCodeAt(++r.position),t===39)i=r.position,r.position++,n=r.position;else return!0;else vo(t)?(DA(r,i,n,!0),zS(r,zr(r,!1,e)),i=n=r.position):r.position===r.lineStart&&wI(r)?ft(r,"unexpected end of the document within a single quoted scalar"):(r.position++,n=r.position);ft(r,"unexpected end of the stream within a single quoted scalar")}function fpe(r,e){var t,i,n,s,o,a;if(a=r.input.charCodeAt(r.position),a!==34)return!1;for(r.kind="scalar",r.result="",r.position++,t=i=r.position;(a=r.input.charCodeAt(r.position))!==0;){if(a===34)return DA(r,t,r.position,!0),r.position++,!0;if(a===92){if(DA(r,t,r.position,!0),a=r.input.charCodeAt(++r.position),vo(a))zr(r,!1,e);else if(a<256&&S2[a])r.result+=v2[a],r.position++;else if((o=ape(a))>0){for(n=o,s=0;n>0;n--)a=r.input.charCodeAt(++r.position),(o=ope(a))>=0?s=(s<<4)+o:ft(r,"expected hexadecimal character");r.result+=lpe(s),r.position++}else ft(r,"unknown escape sequence");t=i=r.position}else vo(a)?(DA(r,t,i,!0),zS(r,zr(r,!1,e)),t=i=r.position):r.position===r.lineStart&&wI(r)?ft(r,"unexpected end of the document within a double quoted scalar"):(r.position++,i=r.position)}ft(r,"unexpected end of the stream within a double quoted scalar")}function hpe(r,e){var t=!0,i,n=r.tag,s,o=r.anchor,a,l,c,u,g,f={},h,p,C,y;if(y=r.input.charCodeAt(r.position),y===91)l=93,g=!1,s=[];else if(y===123)l=125,g=!0,s={};else return!1;for(r.anchor!==null&&(r.anchorMap[r.anchor]=s),y=r.input.charCodeAt(++r.position);y!==0;){if(zr(r,!0,e),y=r.input.charCodeAt(r.position),y===l)return r.position++,r.tag=n,r.anchor=o,r.kind=g?"mapping":"sequence",r.result=s,!0;t||ft(r,"missed comma between flow collection entries"),p=h=C=null,c=u=!1,y===63&&(a=r.input.charCodeAt(r.position+1),gn(a)&&(c=u=!0,r.position++,zr(r,!0,e))),i=r.line,Ug(r,e,EI,!1,!0),p=r.tag,h=r.result,zr(r,!0,e),y=r.input.charCodeAt(r.position),(u||r.line===i)&&y===58&&(c=!0,y=r.input.charCodeAt(++r.position),zr(r,!0,e),Ug(r,e,EI,!1,!0),C=r.result),g?Mg(r,s,f,p,h,C):c?s.push(Mg(r,null,f,p,h,C)):s.push(h),zr(r,!0,e),y=r.input.charCodeAt(r.position),y===44?(t=!0,y=r.input.charCodeAt(++r.position)):t=!1}ft(r,"unexpected end of the stream within a flow collection")}function ppe(r,e){var t,i,n=JS,s=!1,o=!1,a=e,l=0,c=!1,u,g;if(g=r.input.charCodeAt(r.position),g===124)i=!1;else if(g===62)i=!0;else return!1;for(r.kind="scalar",r.result="";g!==0;)if(g=r.input.charCodeAt(++r.position),g===43||g===45)JS===n?n=g===43?h2:rpe:ft(r,"repeat of a chomping mode identifier");else if((u=Ape(g))>=0)u===0?ft(r,"bad explicit indentation width of a block scalar; it cannot be less than one"):o?ft(r,"repeat of an indentation width identifier"):(a=e+u-1,o=!0);else break;if(sc(g)){do g=r.input.charCodeAt(++r.position);while(sc(g));if(g===35)do g=r.input.charCodeAt(++r.position);while(!vo(g)&&g!==0)}for(;g!==0;){for(WS(r),r.lineIndent=0,g=r.input.charCodeAt(r.position);(!o||r.lineIndenta&&(a=r.lineIndent),vo(g)){l++;continue}if(r.lineIndente)&&l!==0)ft(r,"bad indentation of a sequence entry");else if(r.lineIndente)&&(Ug(r,e,II,!0,n)&&(p?f=r.result:h=r.result),p||(Mg(r,c,u,g,f,h,s,o),g=f=h=null),zr(r,!0,-1),y=r.input.charCodeAt(r.position)),r.lineIndent>e&&y!==0)ft(r,"bad indentation of a mapping entry");else if(r.lineIndente?l=1:r.lineIndent===e?l=0:r.lineIndente?l=1:r.lineIndent===e?l=0:r.lineIndent tag; it should be "scalar", not "'+r.kind+'"'),g=0,f=r.implicitTypes.length;g tag; it should be "'+h.kind+'", not "'+r.kind+'"'),h.resolve(r.result)?(r.result=h.construct(r.result),r.anchor!==null&&(r.anchorMap[r.anchor]=r.result)):ft(r,"cannot resolve a node with !<"+r.tag+"> explicit tag")):ft(r,"unknown tag !<"+r.tag+">");return r.listener!==null&&r.listener("close",r),r.tag!==null||r.anchor!==null||u}function Ipe(r){var e=r.position,t,i,n,s=!1,o;for(r.version=null,r.checkLineBreaks=r.legacy,r.tagMap={},r.anchorMap={};(o=r.input.charCodeAt(r.position))!==0&&(zr(r,!0,-1),o=r.input.charCodeAt(r.position),!(r.lineIndent>0||o!==37));){for(s=!0,o=r.input.charCodeAt(++r.position),t=r.position;o!==0&&!gn(o);)o=r.input.charCodeAt(++r.position);for(i=r.input.slice(t,r.position),n=[],i.length<1&&ft(r,"directive name must not be less than one character in length");o!==0;){for(;sc(o);)o=r.input.charCodeAt(++r.position);if(o===35){do o=r.input.charCodeAt(++r.position);while(o!==0&&!vo(o));break}if(vo(o))break;for(t=r.position;o!==0&&!gn(o);)o=r.input.charCodeAt(++r.position);n.push(r.input.slice(t,r.position))}o!==0&&WS(r),kA.call(C2,i)?C2[i](r,i,n):yI(r,'unknown document directive "'+i+'"')}if(zr(r,!0,-1),r.lineIndent===0&&r.input.charCodeAt(r.position)===45&&r.input.charCodeAt(r.position+1)===45&&r.input.charCodeAt(r.position+2)===45?(r.position+=3,zr(r,!0,-1)):s&&ft(r,"directives end mark is expected"),Ug(r,r.lineIndent-1,II,!1,!0),zr(r,!0,-1),r.checkLineBreaks&&npe.test(r.input.slice(e,r.position))&&yI(r,"non-ASCII line breaks are interpreted as content"),r.documents.push(r.result),r.position===r.lineStart&&wI(r)){r.input.charCodeAt(r.position)===46&&(r.position+=3,zr(r,!0,-1));return}if(r.position"u"&&(t=e,e=null);var i=P2(r,t);if(typeof e!="function")return i;for(var n=0,s=i.length;n"u"&&(t=e,e=null),D2(r,e,wa.extend({schema:y2},t))}function wpe(r,e){return k2(r,wa.extend({schema:y2},e))}Zp.exports.loadAll=D2;Zp.exports.load=k2;Zp.exports.safeLoadAll=ype;Zp.exports.safeLoad=wpe});var tH=w((VZe,_S)=>{"use strict";var $p=tc(),ed=Ng(),Bpe=Xp(),Qpe=Tg(),K2=Object.prototype.toString,H2=Object.prototype.hasOwnProperty,bpe=9,_p=10,Spe=13,vpe=32,xpe=33,Ppe=34,G2=35,Dpe=37,kpe=38,Rpe=39,Fpe=42,Y2=44,Npe=45,j2=58,Lpe=61,Tpe=62,Ope=63,Mpe=64,q2=91,J2=93,Upe=96,W2=123,Kpe=124,z2=125,Ni={};Ni[0]="\\0";Ni[7]="\\a";Ni[8]="\\b";Ni[9]="\\t";Ni[10]="\\n";Ni[11]="\\v";Ni[12]="\\f";Ni[13]="\\r";Ni[27]="\\e";Ni[34]='\\"';Ni[92]="\\\\";Ni[133]="\\N";Ni[160]="\\_";Ni[8232]="\\L";Ni[8233]="\\P";var Hpe=["y","Y","yes","Yes","YES","on","On","ON","n","N","no","No","NO","off","Off","OFF"];function Gpe(r,e){var t,i,n,s,o,a,l;if(e===null)return{};for(t={},i=Object.keys(e),n=0,s=i.length;n0?r.charCodeAt(s-1):null,f=f&&L2(o,a)}else{for(s=0;si&&r[g+1]!==" ",g=s);else if(!Kg(o))return BI;a=s>0?r.charCodeAt(s-1):null,f=f&&L2(o,a)}c=c||u&&s-g-1>i&&r[g+1]!==" "}return!l&&!c?f&&!n(r)?X2:Z2:t>9&&V2(r)?BI:c?$2:_2}function zpe(r,e,t,i){r.dump=function(){if(e.length===0)return"''";if(!r.noCompatMode&&Hpe.indexOf(e)!==-1)return"'"+e+"'";var n=r.indent*Math.max(1,t),s=r.lineWidth===-1?-1:Math.max(Math.min(r.lineWidth,40),r.lineWidth-n),o=i||r.flowLevel>-1&&t>=r.flowLevel;function a(l){return jpe(r,l)}switch(Wpe(e,o,r.indent,s,a)){case X2:return e;case Z2:return"'"+e.replace(/'/g,"''")+"'";case _2:return"|"+T2(e,r.indent)+O2(N2(e,n));case $2:return">"+T2(e,r.indent)+O2(N2(Vpe(e,s),n));case BI:return'"'+Xpe(e,s)+'"';default:throw new ed("impossible error: invalid scalar style")}}()}function T2(r,e){var t=V2(r)?String(e):"",i=r[r.length-1]===` `,n=i&&(r[r.length-2]===` `||r===` `),s=n?"+":i?"":"-";return t+s+` `}function O2(r){return r[r.length-1]===` `?r.slice(0,-1):r}function Vpe(r,e){for(var t=/(\n+)([^\n]*)/g,i=function(){var c=r.indexOf(` `);return c=c!==-1?c:r.length,t.lastIndex=c,M2(r.slice(0,c),e)}(),n=r[0]===` `||r[0]===" ",s,o;o=t.exec(r);){var a=o[1],l=o[2];s=l[0]===" ",i+=a+(!n&&!s&&l!==""?` `:"")+M2(l,e),n=s}return i}function M2(r,e){if(r===""||r[0]===" ")return r;for(var t=/ [^ ]/g,i,n=0,s,o=0,a=0,l="";i=t.exec(r);)a=i.index,a-n>e&&(s=o>n?o:a,l+=` `+r.slice(n,s),n=s+1),o=a;return l+=` `,r.length-n>e&&o>n?l+=r.slice(n,o)+` `+r.slice(o+1):l+=r.slice(n),l.slice(1)}function Xpe(r){for(var e="",t,i,n,s=0;s=55296&&t<=56319&&(i=r.charCodeAt(s+1),i>=56320&&i<=57343)){e+=F2((t-55296)*1024+i-56320+65536),s++;continue}n=Ni[t],e+=!n&&Kg(t)?r[s]:n||F2(t)}return e}function Zpe(r,e,t){var i="",n=r.tag,s,o;for(s=0,o=t.length;s1024&&(u+="? "),u+=r.dump+(r.condenseFlow?'"':"")+":"+(r.condenseFlow?"":" "),oc(r,e,c,!1,!1)&&(u+=r.dump,i+=u));r.tag=n,r.dump="{"+i+"}"}function ede(r,e,t,i){var n="",s=r.tag,o=Object.keys(t),a,l,c,u,g,f;if(r.sortKeys===!0)o.sort();else if(typeof r.sortKeys=="function")o.sort(r.sortKeys);else if(r.sortKeys)throw new ed("sortKeys must be a boolean or a function");for(a=0,l=o.length;a1024,g&&(r.dump&&_p===r.dump.charCodeAt(0)?f+="?":f+="? "),f+=r.dump,g&&(f+=VS(r,e)),oc(r,e+1,u,!0,g)&&(r.dump&&_p===r.dump.charCodeAt(0)?f+=":":f+=": ",f+=r.dump,n+=f));r.tag=s,r.dump=n||"{}"}function U2(r,e,t){var i,n,s,o,a,l;for(n=t?r.explicitTypes:r.implicitTypes,s=0,o=n.length;s tag resolver accepts not "'+l+'" style');r.dump=i}return!0}return!1}function oc(r,e,t,i,n,s){r.tag=null,r.dump=t,U2(r,t,!1)||U2(r,t,!0);var o=K2.call(r.dump);i&&(i=r.flowLevel<0||r.flowLevel>e);var a=o==="[object Object]"||o==="[object Array]",l,c;if(a&&(l=r.duplicates.indexOf(t),c=l!==-1),(r.tag!==null&&r.tag!=="?"||c||r.indent!==2&&e>0)&&(n=!1),c&&r.usedDuplicates[l])r.dump="*ref_"+l;else{if(a&&c&&!r.usedDuplicates[l]&&(r.usedDuplicates[l]=!0),o==="[object Object]")i&&Object.keys(r.dump).length!==0?(ede(r,e,r.dump,n),c&&(r.dump="&ref_"+l+r.dump)):($pe(r,e,r.dump),c&&(r.dump="&ref_"+l+" "+r.dump));else if(o==="[object Array]"){var u=r.noArrayIndent&&e>0?e-1:e;i&&r.dump.length!==0?(_pe(r,u,r.dump,n),c&&(r.dump="&ref_"+l+r.dump)):(Zpe(r,u,r.dump),c&&(r.dump="&ref_"+l+" "+r.dump))}else if(o==="[object String]")r.tag!=="?"&&zpe(r,r.dump,e,s);else{if(r.skipInvalid)return!1;throw new ed("unacceptable kind of an object to dump "+o)}r.tag!==null&&r.tag!=="?"&&(r.dump="!<"+r.tag+"> "+r.dump)}return!0}function tde(r,e){var t=[],i=[],n,s;for(XS(r,t,i),n=0,s=i.length;n{"use strict";var QI=R2(),rH=tH();function bI(r){return function(){throw new Error("Function "+r+" is deprecated and cannot be used.")}}Fr.exports.Type=si();Fr.exports.Schema=rc();Fr.exports.FAILSAFE_SCHEMA=CI();Fr.exports.JSON_SCHEMA=YS();Fr.exports.CORE_SCHEMA=jS();Fr.exports.DEFAULT_SAFE_SCHEMA=Tg();Fr.exports.DEFAULT_FULL_SCHEMA=Xp();Fr.exports.load=QI.load;Fr.exports.loadAll=QI.loadAll;Fr.exports.safeLoad=QI.safeLoad;Fr.exports.safeLoadAll=QI.safeLoadAll;Fr.exports.dump=rH.dump;Fr.exports.safeDump=rH.safeDump;Fr.exports.YAMLException=Ng();Fr.exports.MINIMAL_SCHEMA=CI();Fr.exports.SAFE_SCHEMA=Tg();Fr.exports.DEFAULT_SCHEMA=Xp();Fr.exports.scan=bI("scan");Fr.exports.parse=bI("parse");Fr.exports.compose=bI("compose");Fr.exports.addConstructor=bI("addConstructor")});var sH=w((ZZe,nH)=>{"use strict";var ide=iH();nH.exports=ide});var aH=w((_Ze,oH)=>{"use strict";function nde(r,e){function t(){this.constructor=r}t.prototype=e.prototype,r.prototype=new t}function ac(r,e,t,i){this.message=r,this.expected=e,this.found=t,this.location=i,this.name="SyntaxError",typeof Error.captureStackTrace=="function"&&Error.captureStackTrace(this,ac)}nde(ac,Error);ac.buildMessage=function(r,e){var t={literal:function(c){return'"'+n(c.text)+'"'},class:function(c){var u="",g;for(g=0;g0){for(g=1,f=1;g({[Ue]:Ce})))},H=function(R){return R},j=function(R){return R},$=Ks("correct indentation"),V=" ",W=ar(" ",!1),_=function(R){return R.length===QA*yg},A=function(R){return R.length===(QA+1)*yg},Ae=function(){return QA++,!0},ge=function(){return QA--,!0},re=function(){return pg()},O=Ks("pseudostring"),F=/^[^\r\n\t ?:,\][{}#&*!|>'"%@`\-]/,ue=Nn(["\r",` `," "," ","?",":",",","]","[","{","}","#","&","*","!","|",">","'",'"',"%","@","`","-"],!0,!1),pe=/^[^\r\n\t ,\][{}:#"']/,ke=Nn(["\r",` `," "," ",",","]","[","{","}",":","#",'"',"'"],!0,!1),Fe=function(){return pg().replace(/^ *| *$/g,"")},Ne="--",oe=ar("--",!1),le=/^[a-zA-Z\/0-9]/,Be=Nn([["a","z"],["A","Z"],"/",["0","9"]],!1,!1),fe=/^[^\r\n\t :,]/,ae=Nn(["\r",` `," "," ",":",","],!0,!1),qe="null",ne=ar("null",!1),Y=function(){return null},he="true",ie=ar("true",!1),de=function(){return!0},_e="false",Pt=ar("false",!1),It=function(){return!1},Or=Ks("string"),ii='"',gi=ar('"',!1),hr=function(){return""},fi=function(R){return R},ni=function(R){return R.join("")},Us=/^[^"\\\0-\x1F\x7F]/,pr=Nn(['"',"\\",["\0",""],"\x7F"],!0,!1),Ii='\\"',rs=ar('\\"',!1),ga=function(){return'"'},dA="\\\\",cg=ar("\\\\",!1),is=function(){return"\\"},CA="\\/",fa=ar("\\/",!1),wp=function(){return"/"},mA="\\b",EA=ar("\\b",!1),wr=function(){return"\b"},Ll="\\f",ug=ar("\\f",!1),Io=function(){return"\f"},gg="\\n",Bp=ar("\\n",!1),Qp=function(){return` `},vr="\\r",se=ar("\\r",!1),yo=function(){return"\r"},Rn="\\t",fg=ar("\\t",!1),Qt=function(){return" "},Tl="\\u",Fn=ar("\\u",!1),ns=function(R,q,Ce,Ue){return String.fromCharCode(parseInt(`0x${R}${q}${Ce}${Ue}`))},ss=/^[0-9a-fA-F]/,gt=Nn([["0","9"],["a","f"],["A","F"]],!1,!1),wo=Ks("blank space"),At=/^[ \t]/,An=Nn([" "," "],!1,!1),S=Ks("white space"),Tt=/^[ \t\n\r]/,hg=Nn([" "," ",` `,"\r"],!1,!1),Ol=`\r `,bp=ar(`\r `,!1),Sp=` `,vp=ar(` `,!1),xp="\r",Pp=ar("\r",!1),G=0,yt=0,IA=[{line:1,column:1}],Wi=0,Ml=[],Xe=0,ha;if("startRule"in e){if(!(e.startRule in i))throw new Error(`Can't start parsing from rule "`+e.startRule+'".');n=i[e.startRule]}function pg(){return r.substring(yt,G)}function OE(){return ln(yt,G)}function Dp(R,q){throw q=q!==void 0?q:ln(yt,G),Kl([Ks(R)],r.substring(yt,G),q)}function ME(R,q){throw q=q!==void 0?q:ln(yt,G),dg(R,q)}function ar(R,q){return{type:"literal",text:R,ignoreCase:q}}function Nn(R,q,Ce){return{type:"class",parts:R,inverted:q,ignoreCase:Ce}}function Ul(){return{type:"any"}}function kp(){return{type:"end"}}function Ks(R){return{type:"other",description:R}}function pa(R){var q=IA[R],Ce;if(q)return q;for(Ce=R-1;!IA[Ce];)Ce--;for(q=IA[Ce],q={line:q.line,column:q.column};CeWi&&(Wi=G,Ml=[]),Ml.push(R))}function dg(R,q){return new ac(R,null,null,q)}function Kl(R,q,Ce){return new ac(ac.buildMessage(R,q),R,q,Ce)}function Hs(){var R;return R=Cg(),R}function Hl(){var R,q,Ce;for(R=G,q=[],Ce=yA();Ce!==t;)q.push(Ce),Ce=yA();return q!==t&&(yt=R,q=s(q)),R=q,R}function yA(){var R,q,Ce,Ue,Re;return R=G,q=Ca(),q!==t?(r.charCodeAt(G)===45?(Ce=o,G++):(Ce=t,Xe===0&&Te(a)),Ce!==t?(Ue=Rr(),Ue!==t?(Re=da(),Re!==t?(yt=R,q=l(Re),R=q):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)):(G=R,R=t),R}function Cg(){var R,q,Ce;for(R=G,q=[],Ce=mg();Ce!==t;)q.push(Ce),Ce=mg();return q!==t&&(yt=R,q=c(q)),R=q,R}function mg(){var R,q,Ce,Ue,Re,ze,dt,Ft,Ln;if(R=G,q=Rr(),q===t&&(q=null),q!==t){if(Ce=G,r.charCodeAt(G)===35?(Ue=u,G++):(Ue=t,Xe===0&&Te(g)),Ue!==t){if(Re=[],ze=G,dt=G,Xe++,Ft=js(),Xe--,Ft===t?dt=void 0:(G=dt,dt=t),dt!==t?(r.length>G?(Ft=r.charAt(G),G++):(Ft=t,Xe===0&&Te(f)),Ft!==t?(dt=[dt,Ft],ze=dt):(G=ze,ze=t)):(G=ze,ze=t),ze!==t)for(;ze!==t;)Re.push(ze),ze=G,dt=G,Xe++,Ft=js(),Xe--,Ft===t?dt=void 0:(G=dt,dt=t),dt!==t?(r.length>G?(Ft=r.charAt(G),G++):(Ft=t,Xe===0&&Te(f)),Ft!==t?(dt=[dt,Ft],ze=dt):(G=ze,ze=t)):(G=ze,ze=t);else Re=t;Re!==t?(Ue=[Ue,Re],Ce=Ue):(G=Ce,Ce=t)}else G=Ce,Ce=t;if(Ce===t&&(Ce=null),Ce!==t){if(Ue=[],Re=Ys(),Re!==t)for(;Re!==t;)Ue.push(Re),Re=Ys();else Ue=t;Ue!==t?(yt=R,q=h(),R=q):(G=R,R=t)}else G=R,R=t}else G=R,R=t;if(R===t&&(R=G,q=Ca(),q!==t?(Ce=Gl(),Ce!==t?(Ue=Rr(),Ue===t&&(Ue=null),Ue!==t?(r.charCodeAt(G)===58?(Re=p,G++):(Re=t,Xe===0&&Te(C)),Re!==t?(ze=Rr(),ze===t&&(ze=null),ze!==t?(dt=da(),dt!==t?(yt=R,q=y(Ce,dt),R=q):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)):(G=R,R=t),R===t&&(R=G,q=Ca(),q!==t?(Ce=Gs(),Ce!==t?(Ue=Rr(),Ue===t&&(Ue=null),Ue!==t?(r.charCodeAt(G)===58?(Re=p,G++):(Re=t,Xe===0&&Te(C)),Re!==t?(ze=Rr(),ze===t&&(ze=null),ze!==t?(dt=da(),dt!==t?(yt=R,q=y(Ce,dt),R=q):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)):(G=R,R=t),R===t))){if(R=G,q=Ca(),q!==t)if(Ce=Gs(),Ce!==t)if(Ue=Rr(),Ue!==t)if(Re=UE(),Re!==t){if(ze=[],dt=Ys(),dt!==t)for(;dt!==t;)ze.push(dt),dt=Ys();else ze=t;ze!==t?(yt=R,q=y(Ce,Re),R=q):(G=R,R=t)}else G=R,R=t;else G=R,R=t;else G=R,R=t;else G=R,R=t;if(R===t)if(R=G,q=Ca(),q!==t)if(Ce=Gs(),Ce!==t){if(Ue=[],Re=G,ze=Rr(),ze===t&&(ze=null),ze!==t?(r.charCodeAt(G)===44?(dt=B,G++):(dt=t,Xe===0&&Te(v)),dt!==t?(Ft=Rr(),Ft===t&&(Ft=null),Ft!==t?(Ln=Gs(),Ln!==t?(yt=Re,ze=D(Ce,Ln),Re=ze):(G=Re,Re=t)):(G=Re,Re=t)):(G=Re,Re=t)):(G=Re,Re=t),Re!==t)for(;Re!==t;)Ue.push(Re),Re=G,ze=Rr(),ze===t&&(ze=null),ze!==t?(r.charCodeAt(G)===44?(dt=B,G++):(dt=t,Xe===0&&Te(v)),dt!==t?(Ft=Rr(),Ft===t&&(Ft=null),Ft!==t?(Ln=Gs(),Ln!==t?(yt=Re,ze=D(Ce,Ln),Re=ze):(G=Re,Re=t)):(G=Re,Re=t)):(G=Re,Re=t)):(G=Re,Re=t);else Ue=t;Ue!==t?(Re=Rr(),Re===t&&(Re=null),Re!==t?(r.charCodeAt(G)===58?(ze=p,G++):(ze=t,Xe===0&&Te(C)),ze!==t?(dt=Rr(),dt===t&&(dt=null),dt!==t?(Ft=da(),Ft!==t?(yt=R,q=L(Ce,Ue,Ft),R=q):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)}else G=R,R=t;else G=R,R=t}return R}function da(){var R,q,Ce,Ue,Re,ze,dt;if(R=G,q=G,Xe++,Ce=G,Ue=js(),Ue!==t?(Re=rt(),Re!==t?(r.charCodeAt(G)===45?(ze=o,G++):(ze=t,Xe===0&&Te(a)),ze!==t?(dt=Rr(),dt!==t?(Ue=[Ue,Re,ze,dt],Ce=Ue):(G=Ce,Ce=t)):(G=Ce,Ce=t)):(G=Ce,Ce=t)):(G=Ce,Ce=t),Xe--,Ce!==t?(G=q,q=void 0):q=t,q!==t?(Ce=Ys(),Ce!==t?(Ue=Bo(),Ue!==t?(Re=Hl(),Re!==t?(ze=wA(),ze!==t?(yt=R,q=H(Re),R=q):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)):(G=R,R=t),R===t&&(R=G,q=js(),q!==t?(Ce=Bo(),Ce!==t?(Ue=Cg(),Ue!==t?(Re=wA(),Re!==t?(yt=R,q=H(Ue),R=q):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)):(G=R,R=t),R===t))if(R=G,q=Yl(),q!==t){if(Ce=[],Ue=Ys(),Ue!==t)for(;Ue!==t;)Ce.push(Ue),Ue=Ys();else Ce=t;Ce!==t?(yt=R,q=j(q),R=q):(G=R,R=t)}else G=R,R=t;return R}function Ca(){var R,q,Ce;for(Xe++,R=G,q=[],r.charCodeAt(G)===32?(Ce=V,G++):(Ce=t,Xe===0&&Te(W));Ce!==t;)q.push(Ce),r.charCodeAt(G)===32?(Ce=V,G++):(Ce=t,Xe===0&&Te(W));return q!==t?(yt=G,Ce=_(q),Ce?Ce=void 0:Ce=t,Ce!==t?(q=[q,Ce],R=q):(G=R,R=t)):(G=R,R=t),Xe--,R===t&&(q=t,Xe===0&&Te($)),R}function rt(){var R,q,Ce;for(R=G,q=[],r.charCodeAt(G)===32?(Ce=V,G++):(Ce=t,Xe===0&&Te(W));Ce!==t;)q.push(Ce),r.charCodeAt(G)===32?(Ce=V,G++):(Ce=t,Xe===0&&Te(W));return q!==t?(yt=G,Ce=A(q),Ce?Ce=void 0:Ce=t,Ce!==t?(q=[q,Ce],R=q):(G=R,R=t)):(G=R,R=t),R}function Bo(){var R;return yt=G,R=Ae(),R?R=void 0:R=t,R}function wA(){var R;return yt=G,R=ge(),R?R=void 0:R=t,R}function Gl(){var R;return R=jl(),R===t&&(R=Rp()),R}function Gs(){var R,q,Ce;if(R=jl(),R===t){if(R=G,q=[],Ce=Eg(),Ce!==t)for(;Ce!==t;)q.push(Ce),Ce=Eg();else q=t;q!==t&&(yt=R,q=re()),R=q}return R}function Yl(){var R;return R=Fp(),R===t&&(R=KE(),R===t&&(R=jl(),R===t&&(R=Rp()))),R}function UE(){var R;return R=Fp(),R===t&&(R=jl(),R===t&&(R=Eg())),R}function Rp(){var R,q,Ce,Ue,Re,ze;if(Xe++,R=G,F.test(r.charAt(G))?(q=r.charAt(G),G++):(q=t,Xe===0&&Te(ue)),q!==t){for(Ce=[],Ue=G,Re=Rr(),Re===t&&(Re=null),Re!==t?(pe.test(r.charAt(G))?(ze=r.charAt(G),G++):(ze=t,Xe===0&&Te(ke)),ze!==t?(Re=[Re,ze],Ue=Re):(G=Ue,Ue=t)):(G=Ue,Ue=t);Ue!==t;)Ce.push(Ue),Ue=G,Re=Rr(),Re===t&&(Re=null),Re!==t?(pe.test(r.charAt(G))?(ze=r.charAt(G),G++):(ze=t,Xe===0&&Te(ke)),ze!==t?(Re=[Re,ze],Ue=Re):(G=Ue,Ue=t)):(G=Ue,Ue=t);Ce!==t?(yt=R,q=Fe(),R=q):(G=R,R=t)}else G=R,R=t;return Xe--,R===t&&(q=t,Xe===0&&Te(O)),R}function Eg(){var R,q,Ce,Ue,Re;if(R=G,r.substr(G,2)===Ne?(q=Ne,G+=2):(q=t,Xe===0&&Te(oe)),q===t&&(q=null),q!==t)if(le.test(r.charAt(G))?(Ce=r.charAt(G),G++):(Ce=t,Xe===0&&Te(Be)),Ce!==t){for(Ue=[],fe.test(r.charAt(G))?(Re=r.charAt(G),G++):(Re=t,Xe===0&&Te(ae));Re!==t;)Ue.push(Re),fe.test(r.charAt(G))?(Re=r.charAt(G),G++):(Re=t,Xe===0&&Te(ae));Ue!==t?(yt=R,q=Fe(),R=q):(G=R,R=t)}else G=R,R=t;else G=R,R=t;return R}function Fp(){var R,q;return R=G,r.substr(G,4)===qe?(q=qe,G+=4):(q=t,Xe===0&&Te(ne)),q!==t&&(yt=R,q=Y()),R=q,R}function KE(){var R,q;return R=G,r.substr(G,4)===he?(q=he,G+=4):(q=t,Xe===0&&Te(ie)),q!==t&&(yt=R,q=de()),R=q,R===t&&(R=G,r.substr(G,5)===_e?(q=_e,G+=5):(q=t,Xe===0&&Te(Pt)),q!==t&&(yt=R,q=It()),R=q),R}function jl(){var R,q,Ce,Ue;return Xe++,R=G,r.charCodeAt(G)===34?(q=ii,G++):(q=t,Xe===0&&Te(gi)),q!==t?(r.charCodeAt(G)===34?(Ce=ii,G++):(Ce=t,Xe===0&&Te(gi)),Ce!==t?(yt=R,q=hr(),R=q):(G=R,R=t)):(G=R,R=t),R===t&&(R=G,r.charCodeAt(G)===34?(q=ii,G++):(q=t,Xe===0&&Te(gi)),q!==t?(Ce=HE(),Ce!==t?(r.charCodeAt(G)===34?(Ue=ii,G++):(Ue=t,Xe===0&&Te(gi)),Ue!==t?(yt=R,q=fi(Ce),R=q):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)),Xe--,R===t&&(q=t,Xe===0&&Te(Or)),R}function HE(){var R,q,Ce;if(R=G,q=[],Ce=Ig(),Ce!==t)for(;Ce!==t;)q.push(Ce),Ce=Ig();else q=t;return q!==t&&(yt=R,q=ni(q)),R=q,R}function Ig(){var R,q,Ce,Ue,Re,ze;return Us.test(r.charAt(G))?(R=r.charAt(G),G++):(R=t,Xe===0&&Te(pr)),R===t&&(R=G,r.substr(G,2)===Ii?(q=Ii,G+=2):(q=t,Xe===0&&Te(rs)),q!==t&&(yt=R,q=ga()),R=q,R===t&&(R=G,r.substr(G,2)===dA?(q=dA,G+=2):(q=t,Xe===0&&Te(cg)),q!==t&&(yt=R,q=is()),R=q,R===t&&(R=G,r.substr(G,2)===CA?(q=CA,G+=2):(q=t,Xe===0&&Te(fa)),q!==t&&(yt=R,q=wp()),R=q,R===t&&(R=G,r.substr(G,2)===mA?(q=mA,G+=2):(q=t,Xe===0&&Te(EA)),q!==t&&(yt=R,q=wr()),R=q,R===t&&(R=G,r.substr(G,2)===Ll?(q=Ll,G+=2):(q=t,Xe===0&&Te(ug)),q!==t&&(yt=R,q=Io()),R=q,R===t&&(R=G,r.substr(G,2)===gg?(q=gg,G+=2):(q=t,Xe===0&&Te(Bp)),q!==t&&(yt=R,q=Qp()),R=q,R===t&&(R=G,r.substr(G,2)===vr?(q=vr,G+=2):(q=t,Xe===0&&Te(se)),q!==t&&(yt=R,q=yo()),R=q,R===t&&(R=G,r.substr(G,2)===Rn?(q=Rn,G+=2):(q=t,Xe===0&&Te(fg)),q!==t&&(yt=R,q=Qt()),R=q,R===t&&(R=G,r.substr(G,2)===Tl?(q=Tl,G+=2):(q=t,Xe===0&&Te(Fn)),q!==t?(Ce=BA(),Ce!==t?(Ue=BA(),Ue!==t?(Re=BA(),Re!==t?(ze=BA(),ze!==t?(yt=R,q=ns(Ce,Ue,Re,ze),R=q):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)):(G=R,R=t)))))))))),R}function BA(){var R;return ss.test(r.charAt(G))?(R=r.charAt(G),G++):(R=t,Xe===0&&Te(gt)),R}function Rr(){var R,q;if(Xe++,R=[],At.test(r.charAt(G))?(q=r.charAt(G),G++):(q=t,Xe===0&&Te(An)),q!==t)for(;q!==t;)R.push(q),At.test(r.charAt(G))?(q=r.charAt(G),G++):(q=t,Xe===0&&Te(An));else R=t;return Xe--,R===t&&(q=t,Xe===0&&Te(wo)),R}function GE(){var R,q;if(Xe++,R=[],Tt.test(r.charAt(G))?(q=r.charAt(G),G++):(q=t,Xe===0&&Te(hg)),q!==t)for(;q!==t;)R.push(q),Tt.test(r.charAt(G))?(q=r.charAt(G),G++):(q=t,Xe===0&&Te(hg));else R=t;return Xe--,R===t&&(q=t,Xe===0&&Te(S)),R}function Ys(){var R,q,Ce,Ue,Re,ze;if(R=G,q=js(),q!==t){for(Ce=[],Ue=G,Re=Rr(),Re===t&&(Re=null),Re!==t?(ze=js(),ze!==t?(Re=[Re,ze],Ue=Re):(G=Ue,Ue=t)):(G=Ue,Ue=t);Ue!==t;)Ce.push(Ue),Ue=G,Re=Rr(),Re===t&&(Re=null),Re!==t?(ze=js(),ze!==t?(Re=[Re,ze],Ue=Re):(G=Ue,Ue=t)):(G=Ue,Ue=t);Ce!==t?(q=[q,Ce],R=q):(G=R,R=t)}else G=R,R=t;return R}function js(){var R;return r.substr(G,2)===Ol?(R=Ol,G+=2):(R=t,Xe===0&&Te(bp)),R===t&&(r.charCodeAt(G)===10?(R=Sp,G++):(R=t,Xe===0&&Te(vp)),R===t&&(r.charCodeAt(G)===13?(R=xp,G++):(R=t,Xe===0&&Te(Pp)))),R}let yg=2,QA=0;if(ha=n(),ha!==t&&G===r.length)return ha;throw ha!==t&&G{"use strict";var cde=r=>{let e=!1,t=!1,i=!1;for(let n=0;n{if(!(typeof r=="string"||Array.isArray(r)))throw new TypeError("Expected the input to be `string | string[]`");e=Object.assign({pascalCase:!1},e);let t=n=>e.pascalCase?n.charAt(0).toUpperCase()+n.slice(1):n;return Array.isArray(r)?r=r.map(n=>n.trim()).filter(n=>n.length).join("-"):r=r.trim(),r.length===0?"":r.length===1?e.pascalCase?r.toUpperCase():r.toLowerCase():(r!==r.toLowerCase()&&(r=cde(r)),r=r.replace(/^[_.\- ]+/,"").toLowerCase().replace(/[_.\- ]+(\w|$)/g,(n,s)=>s.toUpperCase()).replace(/\d+(\w|$)/g,n=>n.toUpperCase()),t(r))};ev.exports=gH;ev.exports.default=gH});var hH=w((n_e,ude)=>{ude.exports=[{name:"AppVeyor",constant:"APPVEYOR",env:"APPVEYOR",pr:"APPVEYOR_PULL_REQUEST_NUMBER"},{name:"Azure Pipelines",constant:"AZURE_PIPELINES",env:"SYSTEM_TEAMFOUNDATIONCOLLECTIONURI",pr:"SYSTEM_PULLREQUEST_PULLREQUESTID"},{name:"Appcircle",constant:"APPCIRCLE",env:"AC_APPCIRCLE"},{name:"Bamboo",constant:"BAMBOO",env:"bamboo_planKey"},{name:"Bitbucket Pipelines",constant:"BITBUCKET",env:"BITBUCKET_COMMIT",pr:"BITBUCKET_PR_ID"},{name:"Bitrise",constant:"BITRISE",env:"BITRISE_IO",pr:"BITRISE_PULL_REQUEST"},{name:"Buddy",constant:"BUDDY",env:"BUDDY_WORKSPACE_ID",pr:"BUDDY_EXECUTION_PULL_REQUEST_ID"},{name:"Buildkite",constant:"BUILDKITE",env:"BUILDKITE",pr:{env:"BUILDKITE_PULL_REQUEST",ne:"false"}},{name:"CircleCI",constant:"CIRCLE",env:"CIRCLECI",pr:"CIRCLE_PULL_REQUEST"},{name:"Cirrus CI",constant:"CIRRUS",env:"CIRRUS_CI",pr:"CIRRUS_PR"},{name:"AWS CodeBuild",constant:"CODEBUILD",env:"CODEBUILD_BUILD_ARN"},{name:"Codefresh",constant:"CODEFRESH",env:"CF_BUILD_ID",pr:{any:["CF_PULL_REQUEST_NUMBER","CF_PULL_REQUEST_ID"]}},{name:"Codeship",constant:"CODESHIP",env:{CI_NAME:"codeship"}},{name:"Drone",constant:"DRONE",env:"DRONE",pr:{DRONE_BUILD_EVENT:"pull_request"}},{name:"dsari",constant:"DSARI",env:"DSARI"},{name:"GitHub Actions",constant:"GITHUB_ACTIONS",env:"GITHUB_ACTIONS",pr:{GITHUB_EVENT_NAME:"pull_request"}},{name:"GitLab CI",constant:"GITLAB",env:"GITLAB_CI",pr:"CI_MERGE_REQUEST_ID"},{name:"GoCD",constant:"GOCD",env:"GO_PIPELINE_LABEL"},{name:"LayerCI",constant:"LAYERCI",env:"LAYERCI",pr:"LAYERCI_PULL_REQUEST"},{name:"Hudson",constant:"HUDSON",env:"HUDSON_URL"},{name:"Jenkins",constant:"JENKINS",env:["JENKINS_URL","BUILD_ID"],pr:{any:["ghprbPullId","CHANGE_ID"]}},{name:"Magnum CI",constant:"MAGNUM",env:"MAGNUM"},{name:"Netlify CI",constant:"NETLIFY",env:"NETLIFY",pr:{env:"PULL_REQUEST",ne:"false"}},{name:"Nevercode",constant:"NEVERCODE",env:"NEVERCODE",pr:{env:"NEVERCODE_PULL_REQUEST",ne:"false"}},{name:"Render",constant:"RENDER",env:"RENDER",pr:{IS_PULL_REQUEST:"true"}},{name:"Sail CI",constant:"SAIL",env:"SAILCI",pr:"SAIL_PULL_REQUEST_NUMBER"},{name:"Semaphore",constant:"SEMAPHORE",env:"SEMAPHORE",pr:"PULL_REQUEST_NUMBER"},{name:"Screwdriver",constant:"SCREWDRIVER",env:"SCREWDRIVER",pr:{env:"SD_PULL_REQUEST",ne:"false"}},{name:"Shippable",constant:"SHIPPABLE",env:"SHIPPABLE",pr:{IS_PULL_REQUEST:"true"}},{name:"Solano CI",constant:"SOLANO",env:"TDDIUM",pr:"TDDIUM_PR_ID"},{name:"Strider CD",constant:"STRIDER",env:"STRIDER"},{name:"TaskCluster",constant:"TASKCLUSTER",env:["TASK_ID","RUN_ID"]},{name:"TeamCity",constant:"TEAMCITY",env:"TEAMCITY_VERSION"},{name:"Travis CI",constant:"TRAVIS",env:"TRAVIS",pr:{env:"TRAVIS_PULL_REQUEST",ne:"false"}},{name:"Vercel",constant:"VERCEL",env:"NOW_BUILDER"},{name:"Visual Studio App Center",constant:"APPCENTER",env:"APPCENTER_BUILD_ID"}]});var Ac=w(Un=>{"use strict";var dH=hH(),xo=process.env;Object.defineProperty(Un,"_vendors",{value:dH.map(function(r){return r.constant})});Un.name=null;Un.isPR=null;dH.forEach(function(r){let t=(Array.isArray(r.env)?r.env:[r.env]).every(function(i){return pH(i)});if(Un[r.constant]=t,t)switch(Un.name=r.name,typeof r.pr){case"string":Un.isPR=!!xo[r.pr];break;case"object":"env"in r.pr?Un.isPR=r.pr.env in xo&&xo[r.pr.env]!==r.pr.ne:"any"in r.pr?Un.isPR=r.pr.any.some(function(i){return!!xo[i]}):Un.isPR=pH(r.pr);break;default:Un.isPR=null}});Un.isCI=!!(xo.CI||xo.CONTINUOUS_INTEGRATION||xo.BUILD_NUMBER||xo.RUN_ID||Un.name);function pH(r){return typeof r=="string"?!!xo[r]:Object.keys(r).every(function(e){return xo[e]===r[e]})}});var fn={};ut(fn,{KeyRelationship:()=>lc,applyCascade:()=>od,base64RegExp:()=>yH,colorStringAlphaRegExp:()=>IH,colorStringRegExp:()=>EH,computeKey:()=>RA,getPrintable:()=>Vr,hasExactLength:()=>SH,hasForbiddenKeys:()=>Yde,hasKeyRelationship:()=>av,hasMaxLength:()=>Sde,hasMinLength:()=>bde,hasMutuallyExclusiveKeys:()=>jde,hasRequiredKeys:()=>Gde,hasUniqueItems:()=>vde,isArray:()=>Cde,isAtLeast:()=>Dde,isAtMost:()=>kde,isBase64:()=>Kde,isBoolean:()=>hde,isDate:()=>dde,isDict:()=>Ede,isEnum:()=>Xi,isHexColor:()=>Ude,isISO8601:()=>Mde,isInExclusiveRange:()=>Fde,isInInclusiveRange:()=>Rde,isInstanceOf:()=>yde,isInteger:()=>Nde,isJSON:()=>Hde,isLiteral:()=>gde,isLowerCase:()=>Lde,isNegative:()=>xde,isNullable:()=>Qde,isNumber:()=>pde,isObject:()=>Ide,isOneOf:()=>wde,isOptional:()=>Bde,isPositive:()=>Pde,isString:()=>sd,isTuple:()=>mde,isUUID4:()=>Ode,isUnknown:()=>bH,isUpperCase:()=>Tde,iso8601RegExp:()=>ov,makeCoercionFn:()=>cc,makeSetter:()=>QH,makeTrait:()=>BH,makeValidator:()=>bt,matchesRegExp:()=>ad,plural:()=>kI,pushError:()=>pt,simpleKeyRegExp:()=>mH,uuid4RegExp:()=>wH});function bt({test:r}){return BH(r)()}function Vr(r){return r===null?"null":r===void 0?"undefined":r===""?"an empty string":JSON.stringify(r)}function RA(r,e){var t,i,n;return typeof e=="number"?`${(t=r==null?void 0:r.p)!==null&&t!==void 0?t:"."}[${e}]`:mH.test(e)?`${(i=r==null?void 0:r.p)!==null&&i!==void 0?i:""}.${e}`:`${(n=r==null?void 0:r.p)!==null&&n!==void 0?n:"."}[${JSON.stringify(e)}]`}function cc(r,e){return t=>{let i=r[e];return r[e]=t,cc(r,e).bind(null,i)}}function QH(r,e){return t=>{r[e]=t}}function kI(r,e,t){return r===1?e:t}function pt({errors:r,p:e}={},t){return r==null||r.push(`${e!=null?e:"."}: ${t}`),!1}function gde(r){return bt({test:(e,t)=>e!==r?pt(t,`Expected a literal (got ${Vr(r)})`):!0})}function Xi(r){let e=Array.isArray(r)?r:Object.values(r),t=new Set(e);return bt({test:(i,n)=>t.has(i)?!0:pt(n,`Expected a valid enumeration value (got ${Vr(i)})`)})}var mH,EH,IH,yH,wH,ov,BH,bH,sd,fde,hde,pde,dde,Cde,mde,Ede,Ide,yde,wde,od,Bde,Qde,bde,Sde,SH,vde,xde,Pde,Dde,kde,Rde,Fde,Nde,ad,Lde,Tde,Ode,Mde,Ude,Kde,Hde,Gde,Yde,jde,lc,qde,av,ls=kge(()=>{mH=/^[a-zA-Z_][a-zA-Z0-9_]*$/,EH=/^#[0-9a-f]{6}$/i,IH=/^#[0-9a-f]{6}([0-9a-f]{2})?$/i,yH=/^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$/,wH=/^[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89aAbB][a-f0-9]{3}-[a-f0-9]{12}$/i,ov=/^(?:[1-9]\d{3}(-?)(?:(?:0[1-9]|1[0-2])\1(?:0[1-9]|1\d|2[0-8])|(?:0[13-9]|1[0-2])\1(?:29|30)|(?:0[13578]|1[02])(?:\1)31|00[1-9]|0[1-9]\d|[12]\d{2}|3(?:[0-5]\d|6[0-5]))|(?:[1-9]\d(?:0[48]|[2468][048]|[13579][26])|(?:[2468][048]|[13579][26])00)(?:(-?)02(?:\2)29|-?366))T(?:[01]\d|2[0-3])(:?)[0-5]\d(?:\3[0-5]\d)?(?:Z|[+-][01]\d(?:\3[0-5]\d)?)$/,BH=r=>()=>r;bH=()=>bt({test:(r,e)=>!0});sd=()=>bt({test:(r,e)=>typeof r!="string"?pt(e,`Expected a string (got ${Vr(r)})`):!0});fde=new Map([["true",!0],["True",!0],["1",!0],[1,!0],["false",!1],["False",!1],["0",!1],[0,!1]]),hde=()=>bt({test:(r,e)=>{var t;if(typeof r!="boolean"){if(typeof(e==null?void 0:e.coercions)<"u"){if(typeof(e==null?void 0:e.coercion)>"u")return pt(e,"Unbound coercion result");let i=fde.get(r);if(typeof i<"u")return e.coercions.push([(t=e.p)!==null&&t!==void 0?t:".",e.coercion.bind(null,i)]),!0}return pt(e,`Expected a boolean (got ${Vr(r)})`)}return!0}}),pde=()=>bt({test:(r,e)=>{var t;if(typeof r!="number"){if(typeof(e==null?void 0:e.coercions)<"u"){if(typeof(e==null?void 0:e.coercion)>"u")return pt(e,"Unbound coercion result");let i;if(typeof r=="string"){let n;try{n=JSON.parse(r)}catch{}if(typeof n=="number")if(JSON.stringify(n)===r)i=n;else return pt(e,`Received a number that can't be safely represented by the runtime (${r})`)}if(typeof i<"u")return e.coercions.push([(t=e.p)!==null&&t!==void 0?t:".",e.coercion.bind(null,i)]),!0}return pt(e,`Expected a number (got ${Vr(r)})`)}return!0}}),dde=()=>bt({test:(r,e)=>{var t;if(!(r instanceof Date)){if(typeof(e==null?void 0:e.coercions)<"u"){if(typeof(e==null?void 0:e.coercion)>"u")return pt(e,"Unbound coercion result");let i;if(typeof r=="string"&&ov.test(r))i=new Date(r);else{let n;if(typeof r=="string"){let s;try{s=JSON.parse(r)}catch{}typeof s=="number"&&(n=s)}else typeof r=="number"&&(n=r);if(typeof n<"u")if(Number.isSafeInteger(n)||!Number.isSafeInteger(n*1e3))i=new Date(n*1e3);else return pt(e,`Received a timestamp that can't be safely represented by the runtime (${r})`)}if(typeof i<"u")return e.coercions.push([(t=e.p)!==null&&t!==void 0?t:".",e.coercion.bind(null,i)]),!0}return pt(e,`Expected a date (got ${Vr(r)})`)}return!0}}),Cde=(r,{delimiter:e}={})=>bt({test:(t,i)=>{var n;if(typeof t=="string"&&typeof e<"u"&&typeof(i==null?void 0:i.coercions)<"u"){if(typeof(i==null?void 0:i.coercion)>"u")return pt(i,"Unbound coercion result");t=t.split(e),i.coercions.push([(n=i.p)!==null&&n!==void 0?n:".",i.coercion.bind(null,t)])}if(!Array.isArray(t))return pt(i,`Expected an array (got ${Vr(t)})`);let s=!0;for(let o=0,a=t.length;o{let t=SH(r.length);return bt({test:(i,n)=>{var s;if(typeof i=="string"&&typeof e<"u"&&typeof(n==null?void 0:n.coercions)<"u"){if(typeof(n==null?void 0:n.coercion)>"u")return pt(n,"Unbound coercion result");i=i.split(e),n.coercions.push([(s=n.p)!==null&&s!==void 0?s:".",n.coercion.bind(null,i)])}if(!Array.isArray(i))return pt(n,`Expected a tuple (got ${Vr(i)})`);let o=t(i,Object.assign({},n));for(let a=0,l=i.length;abt({test:(t,i)=>{if(typeof t!="object"||t===null)return pt(i,`Expected an object (got ${Vr(t)})`);let n=Object.keys(t),s=!0;for(let o=0,a=n.length;o{let t=Object.keys(r);return bt({test:(i,n)=>{if(typeof i!="object"||i===null)return pt(n,`Expected an object (got ${Vr(i)})`);let s=new Set([...t,...Object.keys(i)]),o={},a=!0;for(let l of s){if(l==="constructor"||l==="__proto__")a=pt(Object.assign(Object.assign({},n),{p:RA(n,l)}),"Unsafe property name");else{let c=Object.prototype.hasOwnProperty.call(r,l)?r[l]:void 0,u=Object.prototype.hasOwnProperty.call(i,l)?i[l]:void 0;typeof c<"u"?a=c(u,Object.assign(Object.assign({},n),{p:RA(n,l),coercion:cc(i,l)}))&&a:e===null?a=pt(Object.assign(Object.assign({},n),{p:RA(n,l)}),`Extraneous property (got ${Vr(u)})`):Object.defineProperty(o,l,{enumerable:!0,get:()=>u,set:QH(i,l)})}if(!a&&(n==null?void 0:n.errors)==null)break}return e!==null&&(a||(n==null?void 0:n.errors)!=null)&&(a=e(o,n)&&a),a}})},yde=r=>bt({test:(e,t)=>e instanceof r?!0:pt(t,`Expected an instance of ${r.name} (got ${Vr(e)})`)}),wde=(r,{exclusive:e=!1}={})=>bt({test:(t,i)=>{var n,s,o;let a=[],l=typeof(i==null?void 0:i.errors)<"u"?[]:void 0;for(let c=0,u=r.length;c1?pt(i,`Expected to match exactly a single predicate (matched ${a.join(", ")})`):(o=i==null?void 0:i.errors)===null||o===void 0||o.push(...l),!1}}),od=(r,e)=>bt({test:(t,i)=>{var n,s;let o={value:t},a=typeof(i==null?void 0:i.coercions)<"u"?cc(o,"value"):void 0,l=typeof(i==null?void 0:i.coercions)<"u"?[]:void 0;if(!r(t,Object.assign(Object.assign({},i),{coercion:a,coercions:l})))return!1;let c=[];if(typeof l<"u")for(let[,u]of l)c.push(u());try{if(typeof(i==null?void 0:i.coercions)<"u"){if(o.value!==t){if(typeof(i==null?void 0:i.coercion)>"u")return pt(i,"Unbound coercion result");i.coercions.push([(n=i.p)!==null&&n!==void 0?n:".",i.coercion.bind(null,o.value)])}(s=i==null?void 0:i.coercions)===null||s===void 0||s.push(...l)}return e.every(u=>u(o.value,i))}finally{for(let u of c)u()}}}),Bde=r=>bt({test:(e,t)=>typeof e>"u"?!0:r(e,t)}),Qde=r=>bt({test:(e,t)=>e===null?!0:r(e,t)}),bde=r=>bt({test:(e,t)=>e.length>=r?!0:pt(t,`Expected to have a length of at least ${r} elements (got ${e.length})`)}),Sde=r=>bt({test:(e,t)=>e.length<=r?!0:pt(t,`Expected to have a length of at most ${r} elements (got ${e.length})`)}),SH=r=>bt({test:(e,t)=>e.length!==r?pt(t,`Expected to have a length of exactly ${r} elements (got ${e.length})`):!0}),vde=({map:r}={})=>bt({test:(e,t)=>{let i=new Set,n=new Set;for(let s=0,o=e.length;sbt({test:(r,e)=>r<=0?!0:pt(e,`Expected to be negative (got ${r})`)}),Pde=()=>bt({test:(r,e)=>r>=0?!0:pt(e,`Expected to be positive (got ${r})`)}),Dde=r=>bt({test:(e,t)=>e>=r?!0:pt(t,`Expected to be at least ${r} (got ${e})`)}),kde=r=>bt({test:(e,t)=>e<=r?!0:pt(t,`Expected to be at most ${r} (got ${e})`)}),Rde=(r,e)=>bt({test:(t,i)=>t>=r&&t<=e?!0:pt(i,`Expected to be in the [${r}; ${e}] range (got ${t})`)}),Fde=(r,e)=>bt({test:(t,i)=>t>=r&&tbt({test:(e,t)=>e!==Math.round(e)?pt(t,`Expected to be an integer (got ${e})`):Number.isSafeInteger(e)?!0:pt(t,`Expected to be a safe integer (got ${e})`)}),ad=r=>bt({test:(e,t)=>r.test(e)?!0:pt(t,`Expected to match the pattern ${r.toString()} (got ${Vr(e)})`)}),Lde=()=>bt({test:(r,e)=>r!==r.toLowerCase()?pt(e,`Expected to be all-lowercase (got ${r})`):!0}),Tde=()=>bt({test:(r,e)=>r!==r.toUpperCase()?pt(e,`Expected to be all-uppercase (got ${r})`):!0}),Ode=()=>bt({test:(r,e)=>wH.test(r)?!0:pt(e,`Expected to be a valid UUID v4 (got ${Vr(r)})`)}),Mde=()=>bt({test:(r,e)=>ov.test(r)?!1:pt(e,`Expected to be a valid ISO 8601 date string (got ${Vr(r)})`)}),Ude=({alpha:r=!1})=>bt({test:(e,t)=>(r?EH.test(e):IH.test(e))?!0:pt(t,`Expected to be a valid hexadecimal color string (got ${Vr(e)})`)}),Kde=()=>bt({test:(r,e)=>yH.test(r)?!0:pt(e,`Expected to be a valid base 64 string (got ${Vr(r)})`)}),Hde=(r=bH())=>bt({test:(e,t)=>{let i;try{i=JSON.parse(e)}catch{return pt(t,`Expected to be a valid JSON string (got ${Vr(e)})`)}return r(i,t)}}),Gde=r=>{let e=new Set(r);return bt({test:(t,i)=>{let n=new Set(Object.keys(t)),s=[];for(let o of e)n.has(o)||s.push(o);return s.length>0?pt(i,`Missing required ${kI(s.length,"property","properties")} ${s.map(o=>`"${o}"`).join(", ")}`):!0}})},Yde=r=>{let e=new Set(r);return bt({test:(t,i)=>{let n=new Set(Object.keys(t)),s=[];for(let o of e)n.has(o)&&s.push(o);return s.length>0?pt(i,`Forbidden ${kI(s.length,"property","properties")} ${s.map(o=>`"${o}"`).join(", ")}`):!0}})},jde=r=>{let e=new Set(r);return bt({test:(t,i)=>{let n=new Set(Object.keys(t)),s=[];for(let o of e)n.has(o)&&s.push(o);return s.length>1?pt(i,`Mutually exclusive properties ${s.map(o=>`"${o}"`).join(", ")}`):!0}})};(function(r){r.Forbids="Forbids",r.Requires="Requires"})(lc||(lc={}));qde={[lc.Forbids]:{expect:!1,message:"forbids using"},[lc.Requires]:{expect:!0,message:"requires using"}},av=(r,e,t,{ignore:i=[]}={})=>{let n=new Set(i),s=new Set(t),o=qde[e];return bt({test:(a,l)=>{let c=new Set(Object.keys(a));if(!c.has(r)||n.has(a[r]))return!0;let u=[];for(let g of s)(c.has(g)&&!n.has(a[g]))!==o.expect&&u.push(g);return u.length>=1?pt(l,`Property "${r}" ${o.message} ${kI(u.length,"property","properties")} ${u.map(g=>`"${g}"`).join(", ")}`):!0}})}});var YH=w((n$e,GH)=>{"use strict";GH.exports=(r,...e)=>new Promise(t=>{t(r(...e))})});var Jg=w((s$e,pv)=>{"use strict";var ACe=YH(),jH=r=>{if(r<1)throw new TypeError("Expected `concurrency` to be a number from 1 and up");let e=[],t=0,i=()=>{t--,e.length>0&&e.shift()()},n=(a,l,...c)=>{t++;let u=ACe(a,...c);l(u),u.then(i,i)},s=(a,l,...c)=>{tnew Promise(c=>s(a,c,...l));return Object.defineProperties(o,{activeCount:{get:()=>t},pendingCount:{get:()=>e.length}}),o};pv.exports=jH;pv.exports.default=jH});var gd=w((a$e,qH)=>{var lCe="2.0.0",cCe=Number.MAX_SAFE_INTEGER||9007199254740991,uCe=16;qH.exports={SEMVER_SPEC_VERSION:lCe,MAX_LENGTH:256,MAX_SAFE_INTEGER:cCe,MAX_SAFE_COMPONENT_LENGTH:uCe}});var fd=w((A$e,JH)=>{var gCe=typeof process=="object"&&process.env&&process.env.NODE_DEBUG&&/\bsemver\b/i.test(process.env.NODE_DEBUG)?(...r)=>console.error("SEMVER",...r):()=>{};JH.exports=gCe});var uc=w((NA,WH)=>{var{MAX_SAFE_COMPONENT_LENGTH:dv}=gd(),fCe=fd();NA=WH.exports={};var hCe=NA.re=[],et=NA.src=[],tt=NA.t={},pCe=0,St=(r,e,t)=>{let i=pCe++;fCe(i,e),tt[r]=i,et[i]=e,hCe[i]=new RegExp(e,t?"g":void 0)};St("NUMERICIDENTIFIER","0|[1-9]\\d*");St("NUMERICIDENTIFIERLOOSE","[0-9]+");St("NONNUMERICIDENTIFIER","\\d*[a-zA-Z-][a-zA-Z0-9-]*");St("MAINVERSION",`(${et[tt.NUMERICIDENTIFIER]})\\.(${et[tt.NUMERICIDENTIFIER]})\\.(${et[tt.NUMERICIDENTIFIER]})`);St("MAINVERSIONLOOSE",`(${et[tt.NUMERICIDENTIFIERLOOSE]})\\.(${et[tt.NUMERICIDENTIFIERLOOSE]})\\.(${et[tt.NUMERICIDENTIFIERLOOSE]})`);St("PRERELEASEIDENTIFIER",`(?:${et[tt.NUMERICIDENTIFIER]}|${et[tt.NONNUMERICIDENTIFIER]})`);St("PRERELEASEIDENTIFIERLOOSE",`(?:${et[tt.NUMERICIDENTIFIERLOOSE]}|${et[tt.NONNUMERICIDENTIFIER]})`);St("PRERELEASE",`(?:-(${et[tt.PRERELEASEIDENTIFIER]}(?:\\.${et[tt.PRERELEASEIDENTIFIER]})*))`);St("PRERELEASELOOSE",`(?:-?(${et[tt.PRERELEASEIDENTIFIERLOOSE]}(?:\\.${et[tt.PRERELEASEIDENTIFIERLOOSE]})*))`);St("BUILDIDENTIFIER","[0-9A-Za-z-]+");St("BUILD",`(?:\\+(${et[tt.BUILDIDENTIFIER]}(?:\\.${et[tt.BUILDIDENTIFIER]})*))`);St("FULLPLAIN",`v?${et[tt.MAINVERSION]}${et[tt.PRERELEASE]}?${et[tt.BUILD]}?`);St("FULL",`^${et[tt.FULLPLAIN]}$`);St("LOOSEPLAIN",`[v=\\s]*${et[tt.MAINVERSIONLOOSE]}${et[tt.PRERELEASELOOSE]}?${et[tt.BUILD]}?`);St("LOOSE",`^${et[tt.LOOSEPLAIN]}$`);St("GTLT","((?:<|>)?=?)");St("XRANGEIDENTIFIERLOOSE",`${et[tt.NUMERICIDENTIFIERLOOSE]}|x|X|\\*`);St("XRANGEIDENTIFIER",`${et[tt.NUMERICIDENTIFIER]}|x|X|\\*`);St("XRANGEPLAIN",`[v=\\s]*(${et[tt.XRANGEIDENTIFIER]})(?:\\.(${et[tt.XRANGEIDENTIFIER]})(?:\\.(${et[tt.XRANGEIDENTIFIER]})(?:${et[tt.PRERELEASE]})?${et[tt.BUILD]}?)?)?`);St("XRANGEPLAINLOOSE",`[v=\\s]*(${et[tt.XRANGEIDENTIFIERLOOSE]})(?:\\.(${et[tt.XRANGEIDENTIFIERLOOSE]})(?:\\.(${et[tt.XRANGEIDENTIFIERLOOSE]})(?:${et[tt.PRERELEASELOOSE]})?${et[tt.BUILD]}?)?)?`);St("XRANGE",`^${et[tt.GTLT]}\\s*${et[tt.XRANGEPLAIN]}$`);St("XRANGELOOSE",`^${et[tt.GTLT]}\\s*${et[tt.XRANGEPLAINLOOSE]}$`);St("COERCE",`(^|[^\\d])(\\d{1,${dv}})(?:\\.(\\d{1,${dv}}))?(?:\\.(\\d{1,${dv}}))?(?:$|[^\\d])`);St("COERCERTL",et[tt.COERCE],!0);St("LONETILDE","(?:~>?)");St("TILDETRIM",`(\\s*)${et[tt.LONETILDE]}\\s+`,!0);NA.tildeTrimReplace="$1~";St("TILDE",`^${et[tt.LONETILDE]}${et[tt.XRANGEPLAIN]}$`);St("TILDELOOSE",`^${et[tt.LONETILDE]}${et[tt.XRANGEPLAINLOOSE]}$`);St("LONECARET","(?:\\^)");St("CARETTRIM",`(\\s*)${et[tt.LONECARET]}\\s+`,!0);NA.caretTrimReplace="$1^";St("CARET",`^${et[tt.LONECARET]}${et[tt.XRANGEPLAIN]}$`);St("CARETLOOSE",`^${et[tt.LONECARET]}${et[tt.XRANGEPLAINLOOSE]}$`);St("COMPARATORLOOSE",`^${et[tt.GTLT]}\\s*(${et[tt.LOOSEPLAIN]})$|^$`);St("COMPARATOR",`^${et[tt.GTLT]}\\s*(${et[tt.FULLPLAIN]})$|^$`);St("COMPARATORTRIM",`(\\s*)${et[tt.GTLT]}\\s*(${et[tt.LOOSEPLAIN]}|${et[tt.XRANGEPLAIN]})`,!0);NA.comparatorTrimReplace="$1$2$3";St("HYPHENRANGE",`^\\s*(${et[tt.XRANGEPLAIN]})\\s+-\\s+(${et[tt.XRANGEPLAIN]})\\s*$`);St("HYPHENRANGELOOSE",`^\\s*(${et[tt.XRANGEPLAINLOOSE]})\\s+-\\s+(${et[tt.XRANGEPLAINLOOSE]})\\s*$`);St("STAR","(<|>)?=?\\s*\\*");St("GTE0","^\\s*>=\\s*0.0.0\\s*$");St("GTE0PRE","^\\s*>=\\s*0.0.0-0\\s*$")});var hd=w((l$e,zH)=>{var dCe=["includePrerelease","loose","rtl"],CCe=r=>r?typeof r!="object"?{loose:!0}:dCe.filter(e=>r[e]).reduce((e,t)=>(e[t]=!0,e),{}):{};zH.exports=CCe});var OI=w((c$e,ZH)=>{var VH=/^[0-9]+$/,XH=(r,e)=>{let t=VH.test(r),i=VH.test(e);return t&&i&&(r=+r,e=+e),r===e?0:t&&!i?-1:i&&!t?1:rXH(e,r);ZH.exports={compareIdentifiers:XH,rcompareIdentifiers:mCe}});var Ti=w((u$e,tG)=>{var MI=fd(),{MAX_LENGTH:_H,MAX_SAFE_INTEGER:UI}=gd(),{re:$H,t:eG}=uc(),ECe=hd(),{compareIdentifiers:pd}=OI(),Gn=class{constructor(e,t){if(t=ECe(t),e instanceof Gn){if(e.loose===!!t.loose&&e.includePrerelease===!!t.includePrerelease)return e;e=e.version}else if(typeof e!="string")throw new TypeError(`Invalid Version: ${e}`);if(e.length>_H)throw new TypeError(`version is longer than ${_H} characters`);MI("SemVer",e,t),this.options=t,this.loose=!!t.loose,this.includePrerelease=!!t.includePrerelease;let i=e.trim().match(t.loose?$H[eG.LOOSE]:$H[eG.FULL]);if(!i)throw new TypeError(`Invalid Version: ${e}`);if(this.raw=e,this.major=+i[1],this.minor=+i[2],this.patch=+i[3],this.major>UI||this.major<0)throw new TypeError("Invalid major version");if(this.minor>UI||this.minor<0)throw new TypeError("Invalid minor version");if(this.patch>UI||this.patch<0)throw new TypeError("Invalid patch version");i[4]?this.prerelease=i[4].split(".").map(n=>{if(/^[0-9]+$/.test(n)){let s=+n;if(s>=0&&s=0;)typeof this.prerelease[i]=="number"&&(this.prerelease[i]++,i=-2);i===-1&&this.prerelease.push(0)}t&&(this.prerelease[0]===t?isNaN(this.prerelease[1])&&(this.prerelease=[t,0]):this.prerelease=[t,0]);break;default:throw new Error(`invalid increment argument: ${e}`)}return this.format(),this.raw=this.version,this}};tG.exports=Gn});var gc=w((g$e,sG)=>{var{MAX_LENGTH:ICe}=gd(),{re:rG,t:iG}=uc(),nG=Ti(),yCe=hd(),wCe=(r,e)=>{if(e=yCe(e),r instanceof nG)return r;if(typeof r!="string"||r.length>ICe||!(e.loose?rG[iG.LOOSE]:rG[iG.FULL]).test(r))return null;try{return new nG(r,e)}catch{return null}};sG.exports=wCe});var aG=w((f$e,oG)=>{var BCe=gc(),QCe=(r,e)=>{let t=BCe(r,e);return t?t.version:null};oG.exports=QCe});var lG=w((h$e,AG)=>{var bCe=gc(),SCe=(r,e)=>{let t=bCe(r.trim().replace(/^[=v]+/,""),e);return t?t.version:null};AG.exports=SCe});var uG=w((p$e,cG)=>{var vCe=Ti(),xCe=(r,e,t,i)=>{typeof t=="string"&&(i=t,t=void 0);try{return new vCe(r,t).inc(e,i).version}catch{return null}};cG.exports=xCe});var cs=w((d$e,fG)=>{var gG=Ti(),PCe=(r,e,t)=>new gG(r,t).compare(new gG(e,t));fG.exports=PCe});var KI=w((C$e,hG)=>{var DCe=cs(),kCe=(r,e,t)=>DCe(r,e,t)===0;hG.exports=kCe});var CG=w((m$e,dG)=>{var pG=gc(),RCe=KI(),FCe=(r,e)=>{if(RCe(r,e))return null;{let t=pG(r),i=pG(e),n=t.prerelease.length||i.prerelease.length,s=n?"pre":"",o=n?"prerelease":"";for(let a in t)if((a==="major"||a==="minor"||a==="patch")&&t[a]!==i[a])return s+a;return o}};dG.exports=FCe});var EG=w((E$e,mG)=>{var NCe=Ti(),LCe=(r,e)=>new NCe(r,e).major;mG.exports=LCe});var yG=w((I$e,IG)=>{var TCe=Ti(),OCe=(r,e)=>new TCe(r,e).minor;IG.exports=OCe});var BG=w((y$e,wG)=>{var MCe=Ti(),UCe=(r,e)=>new MCe(r,e).patch;wG.exports=UCe});var bG=w((w$e,QG)=>{var KCe=gc(),HCe=(r,e)=>{let t=KCe(r,e);return t&&t.prerelease.length?t.prerelease:null};QG.exports=HCe});var vG=w((B$e,SG)=>{var GCe=cs(),YCe=(r,e,t)=>GCe(e,r,t);SG.exports=YCe});var PG=w((Q$e,xG)=>{var jCe=cs(),qCe=(r,e)=>jCe(r,e,!0);xG.exports=qCe});var HI=w((b$e,kG)=>{var DG=Ti(),JCe=(r,e,t)=>{let i=new DG(r,t),n=new DG(e,t);return i.compare(n)||i.compareBuild(n)};kG.exports=JCe});var FG=w((S$e,RG)=>{var WCe=HI(),zCe=(r,e)=>r.sort((t,i)=>WCe(t,i,e));RG.exports=zCe});var LG=w((v$e,NG)=>{var VCe=HI(),XCe=(r,e)=>r.sort((t,i)=>VCe(i,t,e));NG.exports=XCe});var dd=w((x$e,TG)=>{var ZCe=cs(),_Ce=(r,e,t)=>ZCe(r,e,t)>0;TG.exports=_Ce});var GI=w((P$e,OG)=>{var $Ce=cs(),eme=(r,e,t)=>$Ce(r,e,t)<0;OG.exports=eme});var Cv=w((D$e,MG)=>{var tme=cs(),rme=(r,e,t)=>tme(r,e,t)!==0;MG.exports=rme});var YI=w((k$e,UG)=>{var ime=cs(),nme=(r,e,t)=>ime(r,e,t)>=0;UG.exports=nme});var jI=w((R$e,KG)=>{var sme=cs(),ome=(r,e,t)=>sme(r,e,t)<=0;KG.exports=ome});var mv=w((F$e,HG)=>{var ame=KI(),Ame=Cv(),lme=dd(),cme=YI(),ume=GI(),gme=jI(),fme=(r,e,t,i)=>{switch(e){case"===":return typeof r=="object"&&(r=r.version),typeof t=="object"&&(t=t.version),r===t;case"!==":return typeof r=="object"&&(r=r.version),typeof t=="object"&&(t=t.version),r!==t;case"":case"=":case"==":return ame(r,t,i);case"!=":return Ame(r,t,i);case">":return lme(r,t,i);case">=":return cme(r,t,i);case"<":return ume(r,t,i);case"<=":return gme(r,t,i);default:throw new TypeError(`Invalid operator: ${e}`)}};HG.exports=fme});var YG=w((N$e,GG)=>{var hme=Ti(),pme=gc(),{re:qI,t:JI}=uc(),dme=(r,e)=>{if(r instanceof hme)return r;if(typeof r=="number"&&(r=String(r)),typeof r!="string")return null;e=e||{};let t=null;if(!e.rtl)t=r.match(qI[JI.COERCE]);else{let i;for(;(i=qI[JI.COERCERTL].exec(r))&&(!t||t.index+t[0].length!==r.length);)(!t||i.index+i[0].length!==t.index+t[0].length)&&(t=i),qI[JI.COERCERTL].lastIndex=i.index+i[1].length+i[2].length;qI[JI.COERCERTL].lastIndex=-1}return t===null?null:pme(`${t[2]}.${t[3]||"0"}.${t[4]||"0"}`,e)};GG.exports=dme});var qG=w((L$e,jG)=>{"use strict";jG.exports=function(r){r.prototype[Symbol.iterator]=function*(){for(let e=this.head;e;e=e.next)yield e.value}}});var WI=w((T$e,JG)=>{"use strict";JG.exports=Ht;Ht.Node=fc;Ht.create=Ht;function Ht(r){var e=this;if(e instanceof Ht||(e=new Ht),e.tail=null,e.head=null,e.length=0,r&&typeof r.forEach=="function")r.forEach(function(n){e.push(n)});else if(arguments.length>0)for(var t=0,i=arguments.length;t1)t=e;else if(this.head)i=this.head.next,t=this.head.value;else throw new TypeError("Reduce of empty list with no initial value");for(var n=0;i!==null;n++)t=r(t,i.value,n),i=i.next;return t};Ht.prototype.reduceReverse=function(r,e){var t,i=this.tail;if(arguments.length>1)t=e;else if(this.tail)i=this.tail.prev,t=this.tail.value;else throw new TypeError("Reduce of empty list with no initial value");for(var n=this.length-1;i!==null;n--)t=r(t,i.value,n),i=i.prev;return t};Ht.prototype.toArray=function(){for(var r=new Array(this.length),e=0,t=this.head;t!==null;e++)r[e]=t.value,t=t.next;return r};Ht.prototype.toArrayReverse=function(){for(var r=new Array(this.length),e=0,t=this.tail;t!==null;e++)r[e]=t.value,t=t.prev;return r};Ht.prototype.slice=function(r,e){e=e||this.length,e<0&&(e+=this.length),r=r||0,r<0&&(r+=this.length);var t=new Ht;if(ethis.length&&(e=this.length);for(var i=0,n=this.head;n!==null&&ithis.length&&(e=this.length);for(var i=this.length,n=this.tail;n!==null&&i>e;i--)n=n.prev;for(;n!==null&&i>r;i--,n=n.prev)t.push(n.value);return t};Ht.prototype.splice=function(r,e,...t){r>this.length&&(r=this.length-1),r<0&&(r=this.length+r);for(var i=0,n=this.head;n!==null&&i{"use strict";var Ime=WI(),hc=Symbol("max"),Sa=Symbol("length"),Wg=Symbol("lengthCalculator"),md=Symbol("allowStale"),pc=Symbol("maxAge"),ba=Symbol("dispose"),WG=Symbol("noDisposeOnSet"),di=Symbol("lruList"),Zs=Symbol("cache"),VG=Symbol("updateAgeOnGet"),Ev=()=>1,yv=class{constructor(e){if(typeof e=="number"&&(e={max:e}),e||(e={}),e.max&&(typeof e.max!="number"||e.max<0))throw new TypeError("max must be a non-negative number");let t=this[hc]=e.max||1/0,i=e.length||Ev;if(this[Wg]=typeof i!="function"?Ev:i,this[md]=e.stale||!1,e.maxAge&&typeof e.maxAge!="number")throw new TypeError("maxAge must be a number");this[pc]=e.maxAge||0,this[ba]=e.dispose,this[WG]=e.noDisposeOnSet||!1,this[VG]=e.updateAgeOnGet||!1,this.reset()}set max(e){if(typeof e!="number"||e<0)throw new TypeError("max must be a non-negative number");this[hc]=e||1/0,Cd(this)}get max(){return this[hc]}set allowStale(e){this[md]=!!e}get allowStale(){return this[md]}set maxAge(e){if(typeof e!="number")throw new TypeError("maxAge must be a non-negative number");this[pc]=e,Cd(this)}get maxAge(){return this[pc]}set lengthCalculator(e){typeof e!="function"&&(e=Ev),e!==this[Wg]&&(this[Wg]=e,this[Sa]=0,this[di].forEach(t=>{t.length=this[Wg](t.value,t.key),this[Sa]+=t.length})),Cd(this)}get lengthCalculator(){return this[Wg]}get length(){return this[Sa]}get itemCount(){return this[di].length}rforEach(e,t){t=t||this;for(let i=this[di].tail;i!==null;){let n=i.prev;zG(this,e,i,t),i=n}}forEach(e,t){t=t||this;for(let i=this[di].head;i!==null;){let n=i.next;zG(this,e,i,t),i=n}}keys(){return this[di].toArray().map(e=>e.key)}values(){return this[di].toArray().map(e=>e.value)}reset(){this[ba]&&this[di]&&this[di].length&&this[di].forEach(e=>this[ba](e.key,e.value)),this[Zs]=new Map,this[di]=new Ime,this[Sa]=0}dump(){return this[di].map(e=>zI(this,e)?!1:{k:e.key,v:e.value,e:e.now+(e.maxAge||0)}).toArray().filter(e=>e)}dumpLru(){return this[di]}set(e,t,i){if(i=i||this[pc],i&&typeof i!="number")throw new TypeError("maxAge must be a number");let n=i?Date.now():0,s=this[Wg](t,e);if(this[Zs].has(e)){if(s>this[hc])return zg(this,this[Zs].get(e)),!1;let l=this[Zs].get(e).value;return this[ba]&&(this[WG]||this[ba](e,l.value)),l.now=n,l.maxAge=i,l.value=t,this[Sa]+=s-l.length,l.length=s,this.get(e),Cd(this),!0}let o=new wv(e,t,s,n,i);return o.length>this[hc]?(this[ba]&&this[ba](e,t),!1):(this[Sa]+=o.length,this[di].unshift(o),this[Zs].set(e,this[di].head),Cd(this),!0)}has(e){if(!this[Zs].has(e))return!1;let t=this[Zs].get(e).value;return!zI(this,t)}get(e){return Iv(this,e,!0)}peek(e){return Iv(this,e,!1)}pop(){let e=this[di].tail;return e?(zg(this,e),e.value):null}del(e){zg(this,this[Zs].get(e))}load(e){this.reset();let t=Date.now();for(let i=e.length-1;i>=0;i--){let n=e[i],s=n.e||0;if(s===0)this.set(n.k,n.v);else{let o=s-t;o>0&&this.set(n.k,n.v,o)}}}prune(){this[Zs].forEach((e,t)=>Iv(this,t,!1))}},Iv=(r,e,t)=>{let i=r[Zs].get(e);if(i){let n=i.value;if(zI(r,n)){if(zg(r,i),!r[md])return}else t&&(r[VG]&&(i.value.now=Date.now()),r[di].unshiftNode(i));return n.value}},zI=(r,e)=>{if(!e||!e.maxAge&&!r[pc])return!1;let t=Date.now()-e.now;return e.maxAge?t>e.maxAge:r[pc]&&t>r[pc]},Cd=r=>{if(r[Sa]>r[hc])for(let e=r[di].tail;r[Sa]>r[hc]&&e!==null;){let t=e.prev;zg(r,e),e=t}},zg=(r,e)=>{if(e){let t=e.value;r[ba]&&r[ba](t.key,t.value),r[Sa]-=t.length,r[Zs].delete(t.key),r[di].removeNode(e)}},wv=class{constructor(e,t,i,n,s){this.key=e,this.value=t,this.length=i,this.now=n,this.maxAge=s||0}},zG=(r,e,t,i)=>{let n=t.value;zI(r,n)&&(zg(r,t),r[md]||(n=void 0)),n&&e.call(i,n.value,n.key,r)};XG.exports=yv});var us=w((M$e,tY)=>{var dc=class{constructor(e,t){if(t=wme(t),e instanceof dc)return e.loose===!!t.loose&&e.includePrerelease===!!t.includePrerelease?e:new dc(e.raw,t);if(e instanceof Bv)return this.raw=e.value,this.set=[[e]],this.format(),this;if(this.options=t,this.loose=!!t.loose,this.includePrerelease=!!t.includePrerelease,this.raw=e,this.set=e.split(/\s*\|\|\s*/).map(i=>this.parseRange(i.trim())).filter(i=>i.length),!this.set.length)throw new TypeError(`Invalid SemVer Range: ${e}`);if(this.set.length>1){let i=this.set[0];if(this.set=this.set.filter(n=>!$G(n[0])),this.set.length===0)this.set=[i];else if(this.set.length>1){for(let n of this.set)if(n.length===1&&vme(n[0])){this.set=[n];break}}}this.format()}format(){return this.range=this.set.map(e=>e.join(" ").trim()).join("||").trim(),this.range}toString(){return this.range}parseRange(e){e=e.trim();let i=`parseRange:${Object.keys(this.options).join(",")}:${e}`,n=_G.get(i);if(n)return n;let s=this.options.loose,o=s?Oi[Qi.HYPHENRANGELOOSE]:Oi[Qi.HYPHENRANGE];e=e.replace(o,Ome(this.options.includePrerelease)),Gr("hyphen replace",e),e=e.replace(Oi[Qi.COMPARATORTRIM],Qme),Gr("comparator trim",e,Oi[Qi.COMPARATORTRIM]),e=e.replace(Oi[Qi.TILDETRIM],bme),e=e.replace(Oi[Qi.CARETTRIM],Sme),e=e.split(/\s+/).join(" ");let a=s?Oi[Qi.COMPARATORLOOSE]:Oi[Qi.COMPARATOR],l=e.split(" ").map(f=>xme(f,this.options)).join(" ").split(/\s+/).map(f=>Tme(f,this.options)).filter(this.options.loose?f=>!!f.match(a):()=>!0).map(f=>new Bv(f,this.options)),c=l.length,u=new Map;for(let f of l){if($G(f))return[f];u.set(f.value,f)}u.size>1&&u.has("")&&u.delete("");let g=[...u.values()];return _G.set(i,g),g}intersects(e,t){if(!(e instanceof dc))throw new TypeError("a Range is required");return this.set.some(i=>eY(i,t)&&e.set.some(n=>eY(n,t)&&i.every(s=>n.every(o=>s.intersects(o,t)))))}test(e){if(!e)return!1;if(typeof e=="string")try{e=new Bme(e,this.options)}catch{return!1}for(let t=0;tr.value==="<0.0.0-0",vme=r=>r.value==="",eY=(r,e)=>{let t=!0,i=r.slice(),n=i.pop();for(;t&&i.length;)t=i.every(s=>n.intersects(s,e)),n=i.pop();return t},xme=(r,e)=>(Gr("comp",r,e),r=kme(r,e),Gr("caret",r),r=Pme(r,e),Gr("tildes",r),r=Fme(r,e),Gr("xrange",r),r=Lme(r,e),Gr("stars",r),r),_i=r=>!r||r.toLowerCase()==="x"||r==="*",Pme=(r,e)=>r.trim().split(/\s+/).map(t=>Dme(t,e)).join(" "),Dme=(r,e)=>{let t=e.loose?Oi[Qi.TILDELOOSE]:Oi[Qi.TILDE];return r.replace(t,(i,n,s,o,a)=>{Gr("tilde",r,i,n,s,o,a);let l;return _i(n)?l="":_i(s)?l=`>=${n}.0.0 <${+n+1}.0.0-0`:_i(o)?l=`>=${n}.${s}.0 <${n}.${+s+1}.0-0`:a?(Gr("replaceTilde pr",a),l=`>=${n}.${s}.${o}-${a} <${n}.${+s+1}.0-0`):l=`>=${n}.${s}.${o} <${n}.${+s+1}.0-0`,Gr("tilde return",l),l})},kme=(r,e)=>r.trim().split(/\s+/).map(t=>Rme(t,e)).join(" "),Rme=(r,e)=>{Gr("caret",r,e);let t=e.loose?Oi[Qi.CARETLOOSE]:Oi[Qi.CARET],i=e.includePrerelease?"-0":"";return r.replace(t,(n,s,o,a,l)=>{Gr("caret",r,n,s,o,a,l);let c;return _i(s)?c="":_i(o)?c=`>=${s}.0.0${i} <${+s+1}.0.0-0`:_i(a)?s==="0"?c=`>=${s}.${o}.0${i} <${s}.${+o+1}.0-0`:c=`>=${s}.${o}.0${i} <${+s+1}.0.0-0`:l?(Gr("replaceCaret pr",l),s==="0"?o==="0"?c=`>=${s}.${o}.${a}-${l} <${s}.${o}.${+a+1}-0`:c=`>=${s}.${o}.${a}-${l} <${s}.${+o+1}.0-0`:c=`>=${s}.${o}.${a}-${l} <${+s+1}.0.0-0`):(Gr("no pr"),s==="0"?o==="0"?c=`>=${s}.${o}.${a}${i} <${s}.${o}.${+a+1}-0`:c=`>=${s}.${o}.${a}${i} <${s}.${+o+1}.0-0`:c=`>=${s}.${o}.${a} <${+s+1}.0.0-0`),Gr("caret return",c),c})},Fme=(r,e)=>(Gr("replaceXRanges",r,e),r.split(/\s+/).map(t=>Nme(t,e)).join(" ")),Nme=(r,e)=>{r=r.trim();let t=e.loose?Oi[Qi.XRANGELOOSE]:Oi[Qi.XRANGE];return r.replace(t,(i,n,s,o,a,l)=>{Gr("xRange",r,i,n,s,o,a,l);let c=_i(s),u=c||_i(o),g=u||_i(a),f=g;return n==="="&&f&&(n=""),l=e.includePrerelease?"-0":"",c?n===">"||n==="<"?i="<0.0.0-0":i="*":n&&f?(u&&(o=0),a=0,n===">"?(n=">=",u?(s=+s+1,o=0,a=0):(o=+o+1,a=0)):n==="<="&&(n="<",u?s=+s+1:o=+o+1),n==="<"&&(l="-0"),i=`${n+s}.${o}.${a}${l}`):u?i=`>=${s}.0.0${l} <${+s+1}.0.0-0`:g&&(i=`>=${s}.${o}.0${l} <${s}.${+o+1}.0-0`),Gr("xRange return",i),i})},Lme=(r,e)=>(Gr("replaceStars",r,e),r.trim().replace(Oi[Qi.STAR],"")),Tme=(r,e)=>(Gr("replaceGTE0",r,e),r.trim().replace(Oi[e.includePrerelease?Qi.GTE0PRE:Qi.GTE0],"")),Ome=r=>(e,t,i,n,s,o,a,l,c,u,g,f,h)=>(_i(i)?t="":_i(n)?t=`>=${i}.0.0${r?"-0":""}`:_i(s)?t=`>=${i}.${n}.0${r?"-0":""}`:o?t=`>=${t}`:t=`>=${t}${r?"-0":""}`,_i(c)?l="":_i(u)?l=`<${+c+1}.0.0-0`:_i(g)?l=`<${c}.${+u+1}.0-0`:f?l=`<=${c}.${u}.${g}-${f}`:r?l=`<${c}.${u}.${+g+1}-0`:l=`<=${l}`,`${t} ${l}`.trim()),Mme=(r,e,t)=>{for(let i=0;i0){let n=r[i].semver;if(n.major===e.major&&n.minor===e.minor&&n.patch===e.patch)return!0}return!1}return!0}});var Ed=w((U$e,oY)=>{var Id=Symbol("SemVer ANY"),Vg=class{static get ANY(){return Id}constructor(e,t){if(t=Ume(t),e instanceof Vg){if(e.loose===!!t.loose)return e;e=e.value}bv("comparator",e,t),this.options=t,this.loose=!!t.loose,this.parse(e),this.semver===Id?this.value="":this.value=this.operator+this.semver.version,bv("comp",this)}parse(e){let t=this.options.loose?rY[iY.COMPARATORLOOSE]:rY[iY.COMPARATOR],i=e.match(t);if(!i)throw new TypeError(`Invalid comparator: ${e}`);this.operator=i[1]!==void 0?i[1]:"",this.operator==="="&&(this.operator=""),i[2]?this.semver=new nY(i[2],this.options.loose):this.semver=Id}toString(){return this.value}test(e){if(bv("Comparator.test",e,this.options.loose),this.semver===Id||e===Id)return!0;if(typeof e=="string")try{e=new nY(e,this.options)}catch{return!1}return Qv(e,this.operator,this.semver,this.options)}intersects(e,t){if(!(e instanceof Vg))throw new TypeError("a Comparator is required");if((!t||typeof t!="object")&&(t={loose:!!t,includePrerelease:!1}),this.operator==="")return this.value===""?!0:new sY(e.value,t).test(this.value);if(e.operator==="")return e.value===""?!0:new sY(this.value,t).test(e.semver);let i=(this.operator===">="||this.operator===">")&&(e.operator===">="||e.operator===">"),n=(this.operator==="<="||this.operator==="<")&&(e.operator==="<="||e.operator==="<"),s=this.semver.version===e.semver.version,o=(this.operator===">="||this.operator==="<=")&&(e.operator===">="||e.operator==="<="),a=Qv(this.semver,"<",e.semver,t)&&(this.operator===">="||this.operator===">")&&(e.operator==="<="||e.operator==="<"),l=Qv(this.semver,">",e.semver,t)&&(this.operator==="<="||this.operator==="<")&&(e.operator===">="||e.operator===">");return i||n||s&&o||a||l}};oY.exports=Vg;var Ume=hd(),{re:rY,t:iY}=uc(),Qv=mv(),bv=fd(),nY=Ti(),sY=us()});var yd=w((K$e,aY)=>{var Kme=us(),Hme=(r,e,t)=>{try{e=new Kme(e,t)}catch{return!1}return e.test(r)};aY.exports=Hme});var lY=w((H$e,AY)=>{var Gme=us(),Yme=(r,e)=>new Gme(r,e).set.map(t=>t.map(i=>i.value).join(" ").trim().split(" "));AY.exports=Yme});var uY=w((G$e,cY)=>{var jme=Ti(),qme=us(),Jme=(r,e,t)=>{let i=null,n=null,s=null;try{s=new qme(e,t)}catch{return null}return r.forEach(o=>{s.test(o)&&(!i||n.compare(o)===-1)&&(i=o,n=new jme(i,t))}),i};cY.exports=Jme});var fY=w((Y$e,gY)=>{var Wme=Ti(),zme=us(),Vme=(r,e,t)=>{let i=null,n=null,s=null;try{s=new zme(e,t)}catch{return null}return r.forEach(o=>{s.test(o)&&(!i||n.compare(o)===1)&&(i=o,n=new Wme(i,t))}),i};gY.exports=Vme});var dY=w((j$e,pY)=>{var Sv=Ti(),Xme=us(),hY=dd(),Zme=(r,e)=>{r=new Xme(r,e);let t=new Sv("0.0.0");if(r.test(t)||(t=new Sv("0.0.0-0"),r.test(t)))return t;t=null;for(let i=0;i{let a=new Sv(o.semver.version);switch(o.operator){case">":a.prerelease.length===0?a.patch++:a.prerelease.push(0),a.raw=a.format();case"":case">=":(!s||hY(a,s))&&(s=a);break;case"<":case"<=":break;default:throw new Error(`Unexpected operation: ${o.operator}`)}}),s&&(!t||hY(t,s))&&(t=s)}return t&&r.test(t)?t:null};pY.exports=Zme});var mY=w((q$e,CY)=>{var _me=us(),$me=(r,e)=>{try{return new _me(r,e).range||"*"}catch{return null}};CY.exports=$me});var VI=w((J$e,wY)=>{var eEe=Ti(),yY=Ed(),{ANY:tEe}=yY,rEe=us(),iEe=yd(),EY=dd(),IY=GI(),nEe=jI(),sEe=YI(),oEe=(r,e,t,i)=>{r=new eEe(r,i),e=new rEe(e,i);let n,s,o,a,l;switch(t){case">":n=EY,s=nEe,o=IY,a=">",l=">=";break;case"<":n=IY,s=sEe,o=EY,a="<",l="<=";break;default:throw new TypeError('Must provide a hilo val of "<" or ">"')}if(iEe(r,e,i))return!1;for(let c=0;c{h.semver===tEe&&(h=new yY(">=0.0.0")),g=g||h,f=f||h,n(h.semver,g.semver,i)?g=h:o(h.semver,f.semver,i)&&(f=h)}),g.operator===a||g.operator===l||(!f.operator||f.operator===a)&&s(r,f.semver))return!1;if(f.operator===l&&o(r,f.semver))return!1}return!0};wY.exports=oEe});var QY=w((W$e,BY)=>{var aEe=VI(),AEe=(r,e,t)=>aEe(r,e,">",t);BY.exports=AEe});var SY=w((z$e,bY)=>{var lEe=VI(),cEe=(r,e,t)=>lEe(r,e,"<",t);bY.exports=cEe});var PY=w((V$e,xY)=>{var vY=us(),uEe=(r,e,t)=>(r=new vY(r,t),e=new vY(e,t),r.intersects(e));xY.exports=uEe});var kY=w((X$e,DY)=>{var gEe=yd(),fEe=cs();DY.exports=(r,e,t)=>{let i=[],n=null,s=null,o=r.sort((u,g)=>fEe(u,g,t));for(let u of o)gEe(u,e,t)?(s=u,n||(n=u)):(s&&i.push([n,s]),s=null,n=null);n&&i.push([n,null]);let a=[];for(let[u,g]of i)u===g?a.push(u):!g&&u===o[0]?a.push("*"):g?u===o[0]?a.push(`<=${g}`):a.push(`${u} - ${g}`):a.push(`>=${u}`);let l=a.join(" || "),c=typeof e.raw=="string"?e.raw:String(e);return l.length{var RY=us(),XI=Ed(),{ANY:vv}=XI,wd=yd(),xv=cs(),hEe=(r,e,t={})=>{if(r===e)return!0;r=new RY(r,t),e=new RY(e,t);let i=!1;e:for(let n of r.set){for(let s of e.set){let o=pEe(n,s,t);if(i=i||o!==null,o)continue e}if(i)return!1}return!0},pEe=(r,e,t)=>{if(r===e)return!0;if(r.length===1&&r[0].semver===vv){if(e.length===1&&e[0].semver===vv)return!0;t.includePrerelease?r=[new XI(">=0.0.0-0")]:r=[new XI(">=0.0.0")]}if(e.length===1&&e[0].semver===vv){if(t.includePrerelease)return!0;e=[new XI(">=0.0.0")]}let i=new Set,n,s;for(let h of r)h.operator===">"||h.operator===">="?n=FY(n,h,t):h.operator==="<"||h.operator==="<="?s=NY(s,h,t):i.add(h.semver);if(i.size>1)return null;let o;if(n&&s){if(o=xv(n.semver,s.semver,t),o>0)return null;if(o===0&&(n.operator!==">="||s.operator!=="<="))return null}for(let h of i){if(n&&!wd(h,String(n),t)||s&&!wd(h,String(s),t))return null;for(let p of e)if(!wd(h,String(p),t))return!1;return!0}let a,l,c,u,g=s&&!t.includePrerelease&&s.semver.prerelease.length?s.semver:!1,f=n&&!t.includePrerelease&&n.semver.prerelease.length?n.semver:!1;g&&g.prerelease.length===1&&s.operator==="<"&&g.prerelease[0]===0&&(g=!1);for(let h of e){if(u=u||h.operator===">"||h.operator===">=",c=c||h.operator==="<"||h.operator==="<=",n){if(f&&h.semver.prerelease&&h.semver.prerelease.length&&h.semver.major===f.major&&h.semver.minor===f.minor&&h.semver.patch===f.patch&&(f=!1),h.operator===">"||h.operator===">="){if(a=FY(n,h,t),a===h&&a!==n)return!1}else if(n.operator===">="&&!wd(n.semver,String(h),t))return!1}if(s){if(g&&h.semver.prerelease&&h.semver.prerelease.length&&h.semver.major===g.major&&h.semver.minor===g.minor&&h.semver.patch===g.patch&&(g=!1),h.operator==="<"||h.operator==="<="){if(l=NY(s,h,t),l===h&&l!==s)return!1}else if(s.operator==="<="&&!wd(s.semver,String(h),t))return!1}if(!h.operator&&(s||n)&&o!==0)return!1}return!(n&&c&&!s&&o!==0||s&&u&&!n&&o!==0||f||g)},FY=(r,e,t)=>{if(!r)return e;let i=xv(r.semver,e.semver,t);return i>0?r:i<0||e.operator===">"&&r.operator===">="?e:r},NY=(r,e,t)=>{if(!r)return e;let i=xv(r.semver,e.semver,t);return i<0?r:i>0||e.operator==="<"&&r.operator==="<="?e:r};LY.exports=hEe});var Xr=w((_$e,OY)=>{var Pv=uc();OY.exports={re:Pv.re,src:Pv.src,tokens:Pv.t,SEMVER_SPEC_VERSION:gd().SEMVER_SPEC_VERSION,SemVer:Ti(),compareIdentifiers:OI().compareIdentifiers,rcompareIdentifiers:OI().rcompareIdentifiers,parse:gc(),valid:aG(),clean:lG(),inc:uG(),diff:CG(),major:EG(),minor:yG(),patch:BG(),prerelease:bG(),compare:cs(),rcompare:vG(),compareLoose:PG(),compareBuild:HI(),sort:FG(),rsort:LG(),gt:dd(),lt:GI(),eq:KI(),neq:Cv(),gte:YI(),lte:jI(),cmp:mv(),coerce:YG(),Comparator:Ed(),Range:us(),satisfies:yd(),toComparators:lY(),maxSatisfying:uY(),minSatisfying:fY(),minVersion:dY(),validRange:mY(),outside:VI(),gtr:QY(),ltr:SY(),intersects:PY(),simplifyRange:kY(),subset:TY()}});var Dv=w(ZI=>{"use strict";Object.defineProperty(ZI,"__esModule",{value:!0});ZI.VERSION=void 0;ZI.VERSION="9.1.0"});var Gt=w((exports,module)=>{"use strict";var __spreadArray=exports&&exports.__spreadArray||function(r,e,t){if(t||arguments.length===2)for(var i=0,n=e.length,s;i{(function(r,e){typeof define=="function"&&define.amd?define([],e):typeof _I=="object"&&_I.exports?_I.exports=e():r.regexpToAst=e()})(typeof self<"u"?self:MY,function(){function r(){}r.prototype.saveState=function(){return{idx:this.idx,input:this.input,groupIdx:this.groupIdx}},r.prototype.restoreState=function(p){this.idx=p.idx,this.input=p.input,this.groupIdx=p.groupIdx},r.prototype.pattern=function(p){this.idx=0,this.input=p,this.groupIdx=0,this.consumeChar("/");var C=this.disjunction();this.consumeChar("/");for(var y={type:"Flags",loc:{begin:this.idx,end:p.length},global:!1,ignoreCase:!1,multiLine:!1,unicode:!1,sticky:!1};this.isRegExpFlag();)switch(this.popChar()){case"g":o(y,"global");break;case"i":o(y,"ignoreCase");break;case"m":o(y,"multiLine");break;case"u":o(y,"unicode");break;case"y":o(y,"sticky");break}if(this.idx!==this.input.length)throw Error("Redundant input: "+this.input.substring(this.idx));return{type:"Pattern",flags:y,value:C,loc:this.loc(0)}},r.prototype.disjunction=function(){var p=[],C=this.idx;for(p.push(this.alternative());this.peekChar()==="|";)this.consumeChar("|"),p.push(this.alternative());return{type:"Disjunction",value:p,loc:this.loc(C)}},r.prototype.alternative=function(){for(var p=[],C=this.idx;this.isTerm();)p.push(this.term());return{type:"Alternative",value:p,loc:this.loc(C)}},r.prototype.term=function(){return this.isAssertion()?this.assertion():this.atom()},r.prototype.assertion=function(){var p=this.idx;switch(this.popChar()){case"^":return{type:"StartAnchor",loc:this.loc(p)};case"$":return{type:"EndAnchor",loc:this.loc(p)};case"\\":switch(this.popChar()){case"b":return{type:"WordBoundary",loc:this.loc(p)};case"B":return{type:"NonWordBoundary",loc:this.loc(p)}}throw Error("Invalid Assertion Escape");case"(":this.consumeChar("?");var C;switch(this.popChar()){case"=":C="Lookahead";break;case"!":C="NegativeLookahead";break}a(C);var y=this.disjunction();return this.consumeChar(")"),{type:C,value:y,loc:this.loc(p)}}l()},r.prototype.quantifier=function(p){var C,y=this.idx;switch(this.popChar()){case"*":C={atLeast:0,atMost:1/0};break;case"+":C={atLeast:1,atMost:1/0};break;case"?":C={atLeast:0,atMost:1};break;case"{":var B=this.integerIncludingZero();switch(this.popChar()){case"}":C={atLeast:B,atMost:B};break;case",":var v;this.isDigit()?(v=this.integerIncludingZero(),C={atLeast:B,atMost:v}):C={atLeast:B,atMost:1/0},this.consumeChar("}");break}if(p===!0&&C===void 0)return;a(C);break}if(!(p===!0&&C===void 0))return a(C),this.peekChar(0)==="?"?(this.consumeChar("?"),C.greedy=!1):C.greedy=!0,C.type="Quantifier",C.loc=this.loc(y),C},r.prototype.atom=function(){var p,C=this.idx;switch(this.peekChar()){case".":p=this.dotAll();break;case"\\":p=this.atomEscape();break;case"[":p=this.characterClass();break;case"(":p=this.group();break}return p===void 0&&this.isPatternCharacter()&&(p=this.patternCharacter()),a(p),p.loc=this.loc(C),this.isQuantifier()&&(p.quantifier=this.quantifier()),p},r.prototype.dotAll=function(){return this.consumeChar("."),{type:"Set",complement:!0,value:[n(` `),n("\r"),n("\u2028"),n("\u2029")]}},r.prototype.atomEscape=function(){switch(this.consumeChar("\\"),this.peekChar()){case"1":case"2":case"3":case"4":case"5":case"6":case"7":case"8":case"9":return this.decimalEscapeAtom();case"d":case"D":case"s":case"S":case"w":case"W":return this.characterClassEscape();case"f":case"n":case"r":case"t":case"v":return this.controlEscapeAtom();case"c":return this.controlLetterEscapeAtom();case"0":return this.nulCharacterAtom();case"x":return this.hexEscapeSequenceAtom();case"u":return this.regExpUnicodeEscapeSequenceAtom();default:return this.identityEscapeAtom()}},r.prototype.decimalEscapeAtom=function(){var p=this.positiveInteger();return{type:"GroupBackReference",value:p}},r.prototype.characterClassEscape=function(){var p,C=!1;switch(this.popChar()){case"d":p=u;break;case"D":p=u,C=!0;break;case"s":p=f;break;case"S":p=f,C=!0;break;case"w":p=g;break;case"W":p=g,C=!0;break}return a(p),{type:"Set",value:p,complement:C}},r.prototype.controlEscapeAtom=function(){var p;switch(this.popChar()){case"f":p=n("\f");break;case"n":p=n(` `);break;case"r":p=n("\r");break;case"t":p=n(" ");break;case"v":p=n("\v");break}return a(p),{type:"Character",value:p}},r.prototype.controlLetterEscapeAtom=function(){this.consumeChar("c");var p=this.popChar();if(/[a-zA-Z]/.test(p)===!1)throw Error("Invalid ");var C=p.toUpperCase().charCodeAt(0)-64;return{type:"Character",value:C}},r.prototype.nulCharacterAtom=function(){return this.consumeChar("0"),{type:"Character",value:n("\0")}},r.prototype.hexEscapeSequenceAtom=function(){return this.consumeChar("x"),this.parseHexDigits(2)},r.prototype.regExpUnicodeEscapeSequenceAtom=function(){return this.consumeChar("u"),this.parseHexDigits(4)},r.prototype.identityEscapeAtom=function(){var p=this.popChar();return{type:"Character",value:n(p)}},r.prototype.classPatternCharacterAtom=function(){switch(this.peekChar()){case` `:case"\r":case"\u2028":case"\u2029":case"\\":case"]":throw Error("TBD");default:var p=this.popChar();return{type:"Character",value:n(p)}}},r.prototype.characterClass=function(){var p=[],C=!1;for(this.consumeChar("["),this.peekChar(0)==="^"&&(this.consumeChar("^"),C=!0);this.isClassAtom();){var y=this.classAtom(),B=y.type==="Character";if(B&&this.isRangeDash()){this.consumeChar("-");var v=this.classAtom(),D=v.type==="Character";if(D){if(v.value=this.input.length)throw Error("Unexpected end of input");this.idx++},r.prototype.loc=function(p){return{begin:p,end:this.idx}};var e=/[0-9a-fA-F]/,t=/[0-9]/,i=/[1-9]/;function n(p){return p.charCodeAt(0)}function s(p,C){p.length!==void 0?p.forEach(function(y){C.push(y)}):C.push(p)}function o(p,C){if(p[C]===!0)throw"duplicate flag "+C;p[C]=!0}function a(p){if(p===void 0)throw Error("Internal Error - Should never get here!")}function l(){throw Error("Internal Error - Should never get here!")}var c,u=[];for(c=n("0");c<=n("9");c++)u.push(c);var g=[n("_")].concat(u);for(c=n("a");c<=n("z");c++)g.push(c);for(c=n("A");c<=n("Z");c++)g.push(c);var f=[n(" "),n("\f"),n(` `),n("\r"),n(" "),n("\v"),n(" "),n("\xA0"),n("\u1680"),n("\u2000"),n("\u2001"),n("\u2002"),n("\u2003"),n("\u2004"),n("\u2005"),n("\u2006"),n("\u2007"),n("\u2008"),n("\u2009"),n("\u200A"),n("\u2028"),n("\u2029"),n("\u202F"),n("\u205F"),n("\u3000"),n("\uFEFF")];function h(){}return h.prototype.visitChildren=function(p){for(var C in p){var y=p[C];p.hasOwnProperty(C)&&(y.type!==void 0?this.visit(y):Array.isArray(y)&&y.forEach(function(B){this.visit(B)},this))}},h.prototype.visit=function(p){switch(p.type){case"Pattern":this.visitPattern(p);break;case"Flags":this.visitFlags(p);break;case"Disjunction":this.visitDisjunction(p);break;case"Alternative":this.visitAlternative(p);break;case"StartAnchor":this.visitStartAnchor(p);break;case"EndAnchor":this.visitEndAnchor(p);break;case"WordBoundary":this.visitWordBoundary(p);break;case"NonWordBoundary":this.visitNonWordBoundary(p);break;case"Lookahead":this.visitLookahead(p);break;case"NegativeLookahead":this.visitNegativeLookahead(p);break;case"Character":this.visitCharacter(p);break;case"Set":this.visitSet(p);break;case"Group":this.visitGroup(p);break;case"GroupBackReference":this.visitGroupBackReference(p);break;case"Quantifier":this.visitQuantifier(p);break}this.visitChildren(p)},h.prototype.visitPattern=function(p){},h.prototype.visitFlags=function(p){},h.prototype.visitDisjunction=function(p){},h.prototype.visitAlternative=function(p){},h.prototype.visitStartAnchor=function(p){},h.prototype.visitEndAnchor=function(p){},h.prototype.visitWordBoundary=function(p){},h.prototype.visitNonWordBoundary=function(p){},h.prototype.visitLookahead=function(p){},h.prototype.visitNegativeLookahead=function(p){},h.prototype.visitCharacter=function(p){},h.prototype.visitSet=function(p){},h.prototype.visitGroup=function(p){},h.prototype.visitGroupBackReference=function(p){},h.prototype.visitQuantifier=function(p){},{RegExpParser:r,BaseRegExpVisitor:h,VERSION:"0.5.0"}})});var ty=w(Xg=>{"use strict";Object.defineProperty(Xg,"__esModule",{value:!0});Xg.clearRegExpParserCache=Xg.getRegExpAst=void 0;var dEe=$I(),ey={},CEe=new dEe.RegExpParser;function mEe(r){var e=r.toString();if(ey.hasOwnProperty(e))return ey[e];var t=CEe.pattern(e);return ey[e]=t,t}Xg.getRegExpAst=mEe;function EEe(){ey={}}Xg.clearRegExpParserCache=EEe});var YY=w(dn=>{"use strict";var IEe=dn&&dn.__extends||function(){var r=function(e,t){return r=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(i,n){i.__proto__=n}||function(i,n){for(var s in n)Object.prototype.hasOwnProperty.call(n,s)&&(i[s]=n[s])},r(e,t)};return function(e,t){if(typeof t!="function"&&t!==null)throw new TypeError("Class extends value "+String(t)+" is not a constructor or null");r(e,t);function i(){this.constructor=e}e.prototype=t===null?Object.create(t):(i.prototype=t.prototype,new i)}}();Object.defineProperty(dn,"__esModule",{value:!0});dn.canMatchCharCode=dn.firstCharOptimizedIndices=dn.getOptimizedStartCodesIndices=dn.failedOptimizationPrefixMsg=void 0;var KY=$I(),gs=Gt(),HY=ty(),va=Rv(),GY="Complement Sets are not supported for first char optimization";dn.failedOptimizationPrefixMsg=`Unable to use "first char" lexer optimizations: `;function yEe(r,e){e===void 0&&(e=!1);try{var t=(0,HY.getRegExpAst)(r),i=iy(t.value,{},t.flags.ignoreCase);return i}catch(s){if(s.message===GY)e&&(0,gs.PRINT_WARNING)(""+dn.failedOptimizationPrefixMsg+(" Unable to optimize: < "+r.toString()+` > `)+` Complement Sets cannot be automatically optimized. This will disable the lexer's first char optimizations. See: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#COMPLEMENT for details.`);else{var n="";e&&(n=` This will disable the lexer's first char optimizations. See: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#REGEXP_PARSING for details.`),(0,gs.PRINT_ERROR)(dn.failedOptimizationPrefixMsg+` `+(" Failed parsing: < "+r.toString()+` > `)+(" Using the regexp-to-ast library version: "+KY.VERSION+` `)+" Please open an issue at: https://github.com/bd82/regexp-to-ast/issues"+n)}}return[]}dn.getOptimizedStartCodesIndices=yEe;function iy(r,e,t){switch(r.type){case"Disjunction":for(var i=0;i=va.minOptimizationVal)for(var f=u.from>=va.minOptimizationVal?u.from:va.minOptimizationVal,h=u.to,p=(0,va.charCodeToOptimizedIndex)(f),C=(0,va.charCodeToOptimizedIndex)(h),y=p;y<=C;y++)e[y]=y}}});break;case"Group":iy(o.value,e,t);break;default:throw Error("Non Exhaustive Match")}var a=o.quantifier!==void 0&&o.quantifier.atLeast===0;if(o.type==="Group"&&kv(o)===!1||o.type!=="Group"&&a===!1)break}break;default:throw Error("non exhaustive match!")}return(0,gs.values)(e)}dn.firstCharOptimizedIndices=iy;function ry(r,e,t){var i=(0,va.charCodeToOptimizedIndex)(r);e[i]=i,t===!0&&wEe(r,e)}function wEe(r,e){var t=String.fromCharCode(r),i=t.toUpperCase();if(i!==t){var n=(0,va.charCodeToOptimizedIndex)(i.charCodeAt(0));e[n]=n}else{var s=t.toLowerCase();if(s!==t){var n=(0,va.charCodeToOptimizedIndex)(s.charCodeAt(0));e[n]=n}}}function UY(r,e){return(0,gs.find)(r.value,function(t){if(typeof t=="number")return(0,gs.contains)(e,t);var i=t;return(0,gs.find)(e,function(n){return i.from<=n&&n<=i.to})!==void 0})}function kv(r){return r.quantifier&&r.quantifier.atLeast===0?!0:r.value?(0,gs.isArray)(r.value)?(0,gs.every)(r.value,kv):kv(r.value):!1}var BEe=function(r){IEe(e,r);function e(t){var i=r.call(this)||this;return i.targetCharCodes=t,i.found=!1,i}return e.prototype.visitChildren=function(t){if(this.found!==!0){switch(t.type){case"Lookahead":this.visitLookahead(t);return;case"NegativeLookahead":this.visitNegativeLookahead(t);return}r.prototype.visitChildren.call(this,t)}},e.prototype.visitCharacter=function(t){(0,gs.contains)(this.targetCharCodes,t.value)&&(this.found=!0)},e.prototype.visitSet=function(t){t.complement?UY(t,this.targetCharCodes)===void 0&&(this.found=!0):UY(t,this.targetCharCodes)!==void 0&&(this.found=!0)},e}(KY.BaseRegExpVisitor);function QEe(r,e){if(e instanceof RegExp){var t=(0,HY.getRegExpAst)(e),i=new BEe(r);return i.visit(t),i.found}else return(0,gs.find)(e,function(n){return(0,gs.contains)(r,n.charCodeAt(0))})!==void 0}dn.canMatchCharCode=QEe});var Rv=w(Ve=>{"use strict";var jY=Ve&&Ve.__extends||function(){var r=function(e,t){return r=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(i,n){i.__proto__=n}||function(i,n){for(var s in n)Object.prototype.hasOwnProperty.call(n,s)&&(i[s]=n[s])},r(e,t)};return function(e,t){if(typeof t!="function"&&t!==null)throw new TypeError("Class extends value "+String(t)+" is not a constructor or null");r(e,t);function i(){this.constructor=e}e.prototype=t===null?Object.create(t):(i.prototype=t.prototype,new i)}}();Object.defineProperty(Ve,"__esModule",{value:!0});Ve.charCodeToOptimizedIndex=Ve.minOptimizationVal=Ve.buildLineBreakIssueMessage=Ve.LineTerminatorOptimizedTester=Ve.isShortPattern=Ve.isCustomPattern=Ve.cloneEmptyGroups=Ve.performWarningRuntimeChecks=Ve.performRuntimeChecks=Ve.addStickyFlag=Ve.addStartOfInput=Ve.findUnreachablePatterns=Ve.findModesThatDoNotExist=Ve.findInvalidGroupType=Ve.findDuplicatePatterns=Ve.findUnsupportedFlags=Ve.findStartOfInputAnchor=Ve.findEmptyMatchRegExps=Ve.findEndOfInputAnchor=Ve.findInvalidPatterns=Ve.findMissingPatterns=Ve.validatePatterns=Ve.analyzeTokenTypes=Ve.enableSticky=Ve.disableSticky=Ve.SUPPORT_STICKY=Ve.MODES=Ve.DEFAULT_MODE=void 0;var qY=$I(),ir=Bd(),xe=Gt(),Zg=YY(),JY=ty(),Do="PATTERN";Ve.DEFAULT_MODE="defaultMode";Ve.MODES="modes";Ve.SUPPORT_STICKY=typeof new RegExp("(?:)").sticky=="boolean";function bEe(){Ve.SUPPORT_STICKY=!1}Ve.disableSticky=bEe;function SEe(){Ve.SUPPORT_STICKY=!0}Ve.enableSticky=SEe;function vEe(r,e){e=(0,xe.defaults)(e,{useSticky:Ve.SUPPORT_STICKY,debug:!1,safeMode:!1,positionTracking:"full",lineTerminatorCharacters:["\r",` `],tracer:function(v,D){return D()}});var t=e.tracer;t("initCharCodeToOptimizedIndexMap",function(){OEe()});var i;t("Reject Lexer.NA",function(){i=(0,xe.reject)(r,function(v){return v[Do]===ir.Lexer.NA})});var n=!1,s;t("Transform Patterns",function(){n=!1,s=(0,xe.map)(i,function(v){var D=v[Do];if((0,xe.isRegExp)(D)){var L=D.source;return L.length===1&&L!=="^"&&L!=="$"&&L!=="."&&!D.ignoreCase?L:L.length===2&&L[0]==="\\"&&!(0,xe.contains)(["d","D","s","S","t","r","n","t","0","c","b","B","f","v","w","W"],L[1])?L[1]:e.useSticky?Lv(D):Nv(D)}else{if((0,xe.isFunction)(D))return n=!0,{exec:D};if((0,xe.has)(D,"exec"))return n=!0,D;if(typeof D=="string"){if(D.length===1)return D;var H=D.replace(/[\\^$.*+?()[\]{}|]/g,"\\$&"),j=new RegExp(H);return e.useSticky?Lv(j):Nv(j)}else throw Error("non exhaustive match")}})});var o,a,l,c,u;t("misc mapping",function(){o=(0,xe.map)(i,function(v){return v.tokenTypeIdx}),a=(0,xe.map)(i,function(v){var D=v.GROUP;if(D!==ir.Lexer.SKIPPED){if((0,xe.isString)(D))return D;if((0,xe.isUndefined)(D))return!1;throw Error("non exhaustive match")}}),l=(0,xe.map)(i,function(v){var D=v.LONGER_ALT;if(D){var L=(0,xe.isArray)(D)?(0,xe.map)(D,function(H){return(0,xe.indexOf)(i,H)}):[(0,xe.indexOf)(i,D)];return L}}),c=(0,xe.map)(i,function(v){return v.PUSH_MODE}),u=(0,xe.map)(i,function(v){return(0,xe.has)(v,"POP_MODE")})});var g;t("Line Terminator Handling",function(){var v=oj(e.lineTerminatorCharacters);g=(0,xe.map)(i,function(D){return!1}),e.positionTracking!=="onlyOffset"&&(g=(0,xe.map)(i,function(D){if((0,xe.has)(D,"LINE_BREAKS"))return D.LINE_BREAKS;if(nj(D,v)===!1)return(0,Zg.canMatchCharCode)(v,D.PATTERN)}))});var f,h,p,C;t("Misc Mapping #2",function(){f=(0,xe.map)(i,Ov),h=(0,xe.map)(s,ij),p=(0,xe.reduce)(i,function(v,D){var L=D.GROUP;return(0,xe.isString)(L)&&L!==ir.Lexer.SKIPPED&&(v[L]=[]),v},{}),C=(0,xe.map)(s,function(v,D){return{pattern:s[D],longerAlt:l[D],canLineTerminator:g[D],isCustom:f[D],short:h[D],group:a[D],push:c[D],pop:u[D],tokenTypeIdx:o[D],tokenType:i[D]}})});var y=!0,B=[];return e.safeMode||t("First Char Optimization",function(){B=(0,xe.reduce)(i,function(v,D,L){if(typeof D.PATTERN=="string"){var H=D.PATTERN.charCodeAt(0),j=Tv(H);Fv(v,j,C[L])}else if((0,xe.isArray)(D.START_CHARS_HINT)){var $;(0,xe.forEach)(D.START_CHARS_HINT,function(W){var _=typeof W=="string"?W.charCodeAt(0):W,A=Tv(_);$!==A&&($=A,Fv(v,A,C[L]))})}else if((0,xe.isRegExp)(D.PATTERN))if(D.PATTERN.unicode)y=!1,e.ensureOptimizations&&(0,xe.PRINT_ERROR)(""+Zg.failedOptimizationPrefixMsg+(" Unable to analyze < "+D.PATTERN.toString()+` > pattern. `)+` The regexp unicode flag is not currently supported by the regexp-to-ast library. This will disable the lexer's first char optimizations. For details See: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#UNICODE_OPTIMIZE`);else{var V=(0,Zg.getOptimizedStartCodesIndices)(D.PATTERN,e.ensureOptimizations);(0,xe.isEmpty)(V)&&(y=!1),(0,xe.forEach)(V,function(W){Fv(v,W,C[L])})}else e.ensureOptimizations&&(0,xe.PRINT_ERROR)(""+Zg.failedOptimizationPrefixMsg+(" TokenType: <"+D.name+`> is using a custom token pattern without providing parameter. `)+` This will disable the lexer's first char optimizations. For details See: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#CUSTOM_OPTIMIZE`),y=!1;return v},[])}),t("ArrayPacking",function(){B=(0,xe.packArray)(B)}),{emptyGroups:p,patternIdxToConfig:C,charCodeToPatternIdxToConfig:B,hasCustom:n,canBeOptimized:y}}Ve.analyzeTokenTypes=vEe;function xEe(r,e){var t=[],i=WY(r);t=t.concat(i.errors);var n=zY(i.valid),s=n.valid;return t=t.concat(n.errors),t=t.concat(PEe(s)),t=t.concat(ej(s)),t=t.concat(tj(s,e)),t=t.concat(rj(s)),t}Ve.validatePatterns=xEe;function PEe(r){var e=[],t=(0,xe.filter)(r,function(i){return(0,xe.isRegExp)(i[Do])});return e=e.concat(VY(t)),e=e.concat(ZY(t)),e=e.concat(_Y(t)),e=e.concat($Y(t)),e=e.concat(XY(t)),e}function WY(r){var e=(0,xe.filter)(r,function(n){return!(0,xe.has)(n,Do)}),t=(0,xe.map)(e,function(n){return{message:"Token Type: ->"+n.name+"<- missing static 'PATTERN' property",type:ir.LexerDefinitionErrorType.MISSING_PATTERN,tokenTypes:[n]}}),i=(0,xe.difference)(r,e);return{errors:t,valid:i}}Ve.findMissingPatterns=WY;function zY(r){var e=(0,xe.filter)(r,function(n){var s=n[Do];return!(0,xe.isRegExp)(s)&&!(0,xe.isFunction)(s)&&!(0,xe.has)(s,"exec")&&!(0,xe.isString)(s)}),t=(0,xe.map)(e,function(n){return{message:"Token Type: ->"+n.name+"<- static 'PATTERN' can only be a RegExp, a Function matching the {CustomPatternMatcherFunc} type or an Object matching the {ICustomPattern} interface.",type:ir.LexerDefinitionErrorType.INVALID_PATTERN,tokenTypes:[n]}}),i=(0,xe.difference)(r,e);return{errors:t,valid:i}}Ve.findInvalidPatterns=zY;var DEe=/[^\\][\$]/;function VY(r){var e=function(n){jY(s,n);function s(){var o=n!==null&&n.apply(this,arguments)||this;return o.found=!1,o}return s.prototype.visitEndAnchor=function(o){this.found=!0},s}(qY.BaseRegExpVisitor),t=(0,xe.filter)(r,function(n){var s=n[Do];try{var o=(0,JY.getRegExpAst)(s),a=new e;return a.visit(o),a.found}catch{return DEe.test(s.source)}}),i=(0,xe.map)(t,function(n){return{message:`Unexpected RegExp Anchor Error: Token Type: ->`+n.name+`<- static 'PATTERN' cannot contain end of input anchor '$' See chevrotain.io/docs/guide/resolving_lexer_errors.html#ANCHORS for details.`,type:ir.LexerDefinitionErrorType.EOI_ANCHOR_FOUND,tokenTypes:[n]}});return i}Ve.findEndOfInputAnchor=VY;function XY(r){var e=(0,xe.filter)(r,function(i){var n=i[Do];return n.test("")}),t=(0,xe.map)(e,function(i){return{message:"Token Type: ->"+i.name+"<- static 'PATTERN' must not match an empty string",type:ir.LexerDefinitionErrorType.EMPTY_MATCH_PATTERN,tokenTypes:[i]}});return t}Ve.findEmptyMatchRegExps=XY;var kEe=/[^\\[][\^]|^\^/;function ZY(r){var e=function(n){jY(s,n);function s(){var o=n!==null&&n.apply(this,arguments)||this;return o.found=!1,o}return s.prototype.visitStartAnchor=function(o){this.found=!0},s}(qY.BaseRegExpVisitor),t=(0,xe.filter)(r,function(n){var s=n[Do];try{var o=(0,JY.getRegExpAst)(s),a=new e;return a.visit(o),a.found}catch{return kEe.test(s.source)}}),i=(0,xe.map)(t,function(n){return{message:`Unexpected RegExp Anchor Error: Token Type: ->`+n.name+`<- static 'PATTERN' cannot contain start of input anchor '^' See https://chevrotain.io/docs/guide/resolving_lexer_errors.html#ANCHORS for details.`,type:ir.LexerDefinitionErrorType.SOI_ANCHOR_FOUND,tokenTypes:[n]}});return i}Ve.findStartOfInputAnchor=ZY;function _Y(r){var e=(0,xe.filter)(r,function(i){var n=i[Do];return n instanceof RegExp&&(n.multiline||n.global)}),t=(0,xe.map)(e,function(i){return{message:"Token Type: ->"+i.name+"<- static 'PATTERN' may NOT contain global('g') or multiline('m')",type:ir.LexerDefinitionErrorType.UNSUPPORTED_FLAGS_FOUND,tokenTypes:[i]}});return t}Ve.findUnsupportedFlags=_Y;function $Y(r){var e=[],t=(0,xe.map)(r,function(s){return(0,xe.reduce)(r,function(o,a){return s.PATTERN.source===a.PATTERN.source&&!(0,xe.contains)(e,a)&&a.PATTERN!==ir.Lexer.NA&&(e.push(a),o.push(a)),o},[])});t=(0,xe.compact)(t);var i=(0,xe.filter)(t,function(s){return s.length>1}),n=(0,xe.map)(i,function(s){var o=(0,xe.map)(s,function(l){return l.name}),a=(0,xe.first)(s).PATTERN;return{message:"The same RegExp pattern ->"+a+"<-"+("has been used in all of the following Token Types: "+o.join(", ")+" <-"),type:ir.LexerDefinitionErrorType.DUPLICATE_PATTERNS_FOUND,tokenTypes:s}});return n}Ve.findDuplicatePatterns=$Y;function ej(r){var e=(0,xe.filter)(r,function(i){if(!(0,xe.has)(i,"GROUP"))return!1;var n=i.GROUP;return n!==ir.Lexer.SKIPPED&&n!==ir.Lexer.NA&&!(0,xe.isString)(n)}),t=(0,xe.map)(e,function(i){return{message:"Token Type: ->"+i.name+"<- static 'GROUP' can only be Lexer.SKIPPED/Lexer.NA/A String",type:ir.LexerDefinitionErrorType.INVALID_GROUP_TYPE_FOUND,tokenTypes:[i]}});return t}Ve.findInvalidGroupType=ej;function tj(r,e){var t=(0,xe.filter)(r,function(n){return n.PUSH_MODE!==void 0&&!(0,xe.contains)(e,n.PUSH_MODE)}),i=(0,xe.map)(t,function(n){var s="Token Type: ->"+n.name+"<- static 'PUSH_MODE' value cannot refer to a Lexer Mode ->"+n.PUSH_MODE+"<-which does not exist";return{message:s,type:ir.LexerDefinitionErrorType.PUSH_MODE_DOES_NOT_EXIST,tokenTypes:[n]}});return i}Ve.findModesThatDoNotExist=tj;function rj(r){var e=[],t=(0,xe.reduce)(r,function(i,n,s){var o=n.PATTERN;return o===ir.Lexer.NA||((0,xe.isString)(o)?i.push({str:o,idx:s,tokenType:n}):(0,xe.isRegExp)(o)&&FEe(o)&&i.push({str:o.source,idx:s,tokenType:n})),i},[]);return(0,xe.forEach)(r,function(i,n){(0,xe.forEach)(t,function(s){var o=s.str,a=s.idx,l=s.tokenType;if(n"+i.name+"<-")+`in the lexer's definition. See https://chevrotain.io/docs/guide/resolving_lexer_errors.html#UNREACHABLE`;e.push({message:c,type:ir.LexerDefinitionErrorType.UNREACHABLE_PATTERN,tokenTypes:[i,l]})}})}),e}Ve.findUnreachablePatterns=rj;function REe(r,e){if((0,xe.isRegExp)(e)){var t=e.exec(r);return t!==null&&t.index===0}else{if((0,xe.isFunction)(e))return e(r,0,[],{});if((0,xe.has)(e,"exec"))return e.exec(r,0,[],{});if(typeof e=="string")return e===r;throw Error("non exhaustive match")}}function FEe(r){var e=[".","\\","[","]","|","^","$","(",")","?","*","+","{"];return(0,xe.find)(e,function(t){return r.source.indexOf(t)!==-1})===void 0}function Nv(r){var e=r.ignoreCase?"i":"";return new RegExp("^(?:"+r.source+")",e)}Ve.addStartOfInput=Nv;function Lv(r){var e=r.ignoreCase?"iy":"y";return new RegExp(""+r.source,e)}Ve.addStickyFlag=Lv;function NEe(r,e,t){var i=[];return(0,xe.has)(r,Ve.DEFAULT_MODE)||i.push({message:"A MultiMode Lexer cannot be initialized without a <"+Ve.DEFAULT_MODE+`> property in its definition `,type:ir.LexerDefinitionErrorType.MULTI_MODE_LEXER_WITHOUT_DEFAULT_MODE}),(0,xe.has)(r,Ve.MODES)||i.push({message:"A MultiMode Lexer cannot be initialized without a <"+Ve.MODES+`> property in its definition `,type:ir.LexerDefinitionErrorType.MULTI_MODE_LEXER_WITHOUT_MODES_PROPERTY}),(0,xe.has)(r,Ve.MODES)&&(0,xe.has)(r,Ve.DEFAULT_MODE)&&!(0,xe.has)(r.modes,r.defaultMode)&&i.push({message:"A MultiMode Lexer cannot be initialized with a "+Ve.DEFAULT_MODE+": <"+r.defaultMode+`>which does not exist `,type:ir.LexerDefinitionErrorType.MULTI_MODE_LEXER_DEFAULT_MODE_VALUE_DOES_NOT_EXIST}),(0,xe.has)(r,Ve.MODES)&&(0,xe.forEach)(r.modes,function(n,s){(0,xe.forEach)(n,function(o,a){(0,xe.isUndefined)(o)&&i.push({message:"A Lexer cannot be initialized using an undefined Token Type. Mode:"+("<"+s+"> at index: <"+a+`> `),type:ir.LexerDefinitionErrorType.LEXER_DEFINITION_CANNOT_CONTAIN_UNDEFINED})})}),i}Ve.performRuntimeChecks=NEe;function LEe(r,e,t){var i=[],n=!1,s=(0,xe.compact)((0,xe.flatten)((0,xe.mapValues)(r.modes,function(l){return l}))),o=(0,xe.reject)(s,function(l){return l[Do]===ir.Lexer.NA}),a=oj(t);return e&&(0,xe.forEach)(o,function(l){var c=nj(l,a);if(c!==!1){var u=sj(l,c),g={message:u,type:c.issue,tokenType:l};i.push(g)}else(0,xe.has)(l,"LINE_BREAKS")?l.LINE_BREAKS===!0&&(n=!0):(0,Zg.canMatchCharCode)(a,l.PATTERN)&&(n=!0)}),e&&!n&&i.push({message:`Warning: No LINE_BREAKS Found. This Lexer has been defined to track line and column information, But none of the Token Types can be identified as matching a line terminator. See https://chevrotain.io/docs/guide/resolving_lexer_errors.html#LINE_BREAKS for details.`,type:ir.LexerDefinitionErrorType.NO_LINE_BREAKS_FLAGS}),i}Ve.performWarningRuntimeChecks=LEe;function TEe(r){var e={},t=(0,xe.keys)(r);return(0,xe.forEach)(t,function(i){var n=r[i];if((0,xe.isArray)(n))e[i]=[];else throw Error("non exhaustive match")}),e}Ve.cloneEmptyGroups=TEe;function Ov(r){var e=r.PATTERN;if((0,xe.isRegExp)(e))return!1;if((0,xe.isFunction)(e))return!0;if((0,xe.has)(e,"exec"))return!0;if((0,xe.isString)(e))return!1;throw Error("non exhaustive match")}Ve.isCustomPattern=Ov;function ij(r){return(0,xe.isString)(r)&&r.length===1?r.charCodeAt(0):!1}Ve.isShortPattern=ij;Ve.LineTerminatorOptimizedTester={test:function(r){for(var e=r.length,t=this.lastIndex;t Token Type `)+(" Root cause: "+e.errMsg+`. `)+" For details See: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#IDENTIFY_TERMINATOR";if(e.issue===ir.LexerDefinitionErrorType.CUSTOM_LINE_BREAK)return`Warning: A Custom Token Pattern should specify the option. `+(" The problem is in the <"+r.name+`> Token Type `)+" For details See: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#CUSTOM_LINE_BREAK";throw Error("non exhaustive match")}Ve.buildLineBreakIssueMessage=sj;function oj(r){var e=(0,xe.map)(r,function(t){return(0,xe.isString)(t)&&t.length>0?t.charCodeAt(0):t});return e}function Fv(r,e,t){r[e]===void 0?r[e]=[t]:r[e].push(t)}Ve.minOptimizationVal=256;var ny=[];function Tv(r){return r255?255+~~(r/255):r}}});var _g=w(Nt=>{"use strict";Object.defineProperty(Nt,"__esModule",{value:!0});Nt.isTokenType=Nt.hasExtendingTokensTypesMapProperty=Nt.hasExtendingTokensTypesProperty=Nt.hasCategoriesProperty=Nt.hasShortKeyProperty=Nt.singleAssignCategoriesToksMap=Nt.assignCategoriesMapProp=Nt.assignCategoriesTokensProp=Nt.assignTokenDefaultProps=Nt.expandCategories=Nt.augmentTokenTypes=Nt.tokenIdxToClass=Nt.tokenShortNameIdx=Nt.tokenStructuredMatcherNoCategories=Nt.tokenStructuredMatcher=void 0;var Zr=Gt();function MEe(r,e){var t=r.tokenTypeIdx;return t===e.tokenTypeIdx?!0:e.isParent===!0&&e.categoryMatchesMap[t]===!0}Nt.tokenStructuredMatcher=MEe;function UEe(r,e){return r.tokenTypeIdx===e.tokenTypeIdx}Nt.tokenStructuredMatcherNoCategories=UEe;Nt.tokenShortNameIdx=1;Nt.tokenIdxToClass={};function KEe(r){var e=aj(r);Aj(e),cj(e),lj(e),(0,Zr.forEach)(e,function(t){t.isParent=t.categoryMatches.length>0})}Nt.augmentTokenTypes=KEe;function aj(r){for(var e=(0,Zr.cloneArr)(r),t=r,i=!0;i;){t=(0,Zr.compact)((0,Zr.flatten)((0,Zr.map)(t,function(s){return s.CATEGORIES})));var n=(0,Zr.difference)(t,e);e=e.concat(n),(0,Zr.isEmpty)(n)?i=!1:t=n}return e}Nt.expandCategories=aj;function Aj(r){(0,Zr.forEach)(r,function(e){uj(e)||(Nt.tokenIdxToClass[Nt.tokenShortNameIdx]=e,e.tokenTypeIdx=Nt.tokenShortNameIdx++),Mv(e)&&!(0,Zr.isArray)(e.CATEGORIES)&&(e.CATEGORIES=[e.CATEGORIES]),Mv(e)||(e.CATEGORIES=[]),gj(e)||(e.categoryMatches=[]),fj(e)||(e.categoryMatchesMap={})})}Nt.assignTokenDefaultProps=Aj;function lj(r){(0,Zr.forEach)(r,function(e){e.categoryMatches=[],(0,Zr.forEach)(e.categoryMatchesMap,function(t,i){e.categoryMatches.push(Nt.tokenIdxToClass[i].tokenTypeIdx)})})}Nt.assignCategoriesTokensProp=lj;function cj(r){(0,Zr.forEach)(r,function(e){Uv([],e)})}Nt.assignCategoriesMapProp=cj;function Uv(r,e){(0,Zr.forEach)(r,function(t){e.categoryMatchesMap[t.tokenTypeIdx]=!0}),(0,Zr.forEach)(e.CATEGORIES,function(t){var i=r.concat(e);(0,Zr.contains)(i,t)||Uv(i,t)})}Nt.singleAssignCategoriesToksMap=Uv;function uj(r){return(0,Zr.has)(r,"tokenTypeIdx")}Nt.hasShortKeyProperty=uj;function Mv(r){return(0,Zr.has)(r,"CATEGORIES")}Nt.hasCategoriesProperty=Mv;function gj(r){return(0,Zr.has)(r,"categoryMatches")}Nt.hasExtendingTokensTypesProperty=gj;function fj(r){return(0,Zr.has)(r,"categoryMatchesMap")}Nt.hasExtendingTokensTypesMapProperty=fj;function HEe(r){return(0,Zr.has)(r,"tokenTypeIdx")}Nt.isTokenType=HEe});var Kv=w(sy=>{"use strict";Object.defineProperty(sy,"__esModule",{value:!0});sy.defaultLexerErrorProvider=void 0;sy.defaultLexerErrorProvider={buildUnableToPopLexerModeMessage:function(r){return"Unable to pop Lexer Mode after encountering Token ->"+r.image+"<- The Mode Stack is empty"},buildUnexpectedCharactersMessage:function(r,e,t,i,n){return"unexpected character: ->"+r.charAt(e)+"<- at offset: "+e+","+(" skipped "+t+" characters.")}}});var Bd=w(Cc=>{"use strict";Object.defineProperty(Cc,"__esModule",{value:!0});Cc.Lexer=Cc.LexerDefinitionErrorType=void 0;var _s=Rv(),nr=Gt(),GEe=_g(),YEe=Kv(),jEe=ty(),qEe;(function(r){r[r.MISSING_PATTERN=0]="MISSING_PATTERN",r[r.INVALID_PATTERN=1]="INVALID_PATTERN",r[r.EOI_ANCHOR_FOUND=2]="EOI_ANCHOR_FOUND",r[r.UNSUPPORTED_FLAGS_FOUND=3]="UNSUPPORTED_FLAGS_FOUND",r[r.DUPLICATE_PATTERNS_FOUND=4]="DUPLICATE_PATTERNS_FOUND",r[r.INVALID_GROUP_TYPE_FOUND=5]="INVALID_GROUP_TYPE_FOUND",r[r.PUSH_MODE_DOES_NOT_EXIST=6]="PUSH_MODE_DOES_NOT_EXIST",r[r.MULTI_MODE_LEXER_WITHOUT_DEFAULT_MODE=7]="MULTI_MODE_LEXER_WITHOUT_DEFAULT_MODE",r[r.MULTI_MODE_LEXER_WITHOUT_MODES_PROPERTY=8]="MULTI_MODE_LEXER_WITHOUT_MODES_PROPERTY",r[r.MULTI_MODE_LEXER_DEFAULT_MODE_VALUE_DOES_NOT_EXIST=9]="MULTI_MODE_LEXER_DEFAULT_MODE_VALUE_DOES_NOT_EXIST",r[r.LEXER_DEFINITION_CANNOT_CONTAIN_UNDEFINED=10]="LEXER_DEFINITION_CANNOT_CONTAIN_UNDEFINED",r[r.SOI_ANCHOR_FOUND=11]="SOI_ANCHOR_FOUND",r[r.EMPTY_MATCH_PATTERN=12]="EMPTY_MATCH_PATTERN",r[r.NO_LINE_BREAKS_FLAGS=13]="NO_LINE_BREAKS_FLAGS",r[r.UNREACHABLE_PATTERN=14]="UNREACHABLE_PATTERN",r[r.IDENTIFY_TERMINATOR=15]="IDENTIFY_TERMINATOR",r[r.CUSTOM_LINE_BREAK=16]="CUSTOM_LINE_BREAK"})(qEe=Cc.LexerDefinitionErrorType||(Cc.LexerDefinitionErrorType={}));var Qd={deferDefinitionErrorsHandling:!1,positionTracking:"full",lineTerminatorsPattern:/\n|\r\n?/g,lineTerminatorCharacters:[` `,"\r"],ensureOptimizations:!1,safeMode:!1,errorMessageProvider:YEe.defaultLexerErrorProvider,traceInitPerf:!1,skipValidations:!1};Object.freeze(Qd);var JEe=function(){function r(e,t){var i=this;if(t===void 0&&(t=Qd),this.lexerDefinition=e,this.lexerDefinitionErrors=[],this.lexerDefinitionWarning=[],this.patternIdxToConfig={},this.charCodeToPatternIdxToConfig={},this.modes=[],this.emptyGroups={},this.config=void 0,this.trackStartLines=!0,this.trackEndLines=!0,this.hasCustom=!1,this.canModeBeOptimized={},typeof t=="boolean")throw Error(`The second argument to the Lexer constructor is now an ILexerConfig Object. a boolean 2nd argument is no longer supported`);this.config=(0,nr.merge)(Qd,t);var n=this.config.traceInitPerf;n===!0?(this.traceInitMaxIdent=1/0,this.traceInitPerf=!0):typeof n=="number"&&(this.traceInitMaxIdent=n,this.traceInitPerf=!0),this.traceInitIndent=-1,this.TRACE_INIT("Lexer Constructor",function(){var s,o=!0;i.TRACE_INIT("Lexer Config handling",function(){if(i.config.lineTerminatorsPattern===Qd.lineTerminatorsPattern)i.config.lineTerminatorsPattern=_s.LineTerminatorOptimizedTester;else if(i.config.lineTerminatorCharacters===Qd.lineTerminatorCharacters)throw Error(`Error: Missing property on the Lexer config. For details See: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#MISSING_LINE_TERM_CHARS`);if(t.safeMode&&t.ensureOptimizations)throw Error('"safeMode" and "ensureOptimizations" flags are mutually exclusive.');i.trackStartLines=/full|onlyStart/i.test(i.config.positionTracking),i.trackEndLines=/full/i.test(i.config.positionTracking),(0,nr.isArray)(e)?(s={modes:{}},s.modes[_s.DEFAULT_MODE]=(0,nr.cloneArr)(e),s[_s.DEFAULT_MODE]=_s.DEFAULT_MODE):(o=!1,s=(0,nr.cloneObj)(e))}),i.config.skipValidations===!1&&(i.TRACE_INIT("performRuntimeChecks",function(){i.lexerDefinitionErrors=i.lexerDefinitionErrors.concat((0,_s.performRuntimeChecks)(s,i.trackStartLines,i.config.lineTerminatorCharacters))}),i.TRACE_INIT("performWarningRuntimeChecks",function(){i.lexerDefinitionWarning=i.lexerDefinitionWarning.concat((0,_s.performWarningRuntimeChecks)(s,i.trackStartLines,i.config.lineTerminatorCharacters))})),s.modes=s.modes?s.modes:{},(0,nr.forEach)(s.modes,function(u,g){s.modes[g]=(0,nr.reject)(u,function(f){return(0,nr.isUndefined)(f)})});var a=(0,nr.keys)(s.modes);if((0,nr.forEach)(s.modes,function(u,g){i.TRACE_INIT("Mode: <"+g+"> processing",function(){if(i.modes.push(g),i.config.skipValidations===!1&&i.TRACE_INIT("validatePatterns",function(){i.lexerDefinitionErrors=i.lexerDefinitionErrors.concat((0,_s.validatePatterns)(u,a))}),(0,nr.isEmpty)(i.lexerDefinitionErrors)){(0,GEe.augmentTokenTypes)(u);var f;i.TRACE_INIT("analyzeTokenTypes",function(){f=(0,_s.analyzeTokenTypes)(u,{lineTerminatorCharacters:i.config.lineTerminatorCharacters,positionTracking:t.positionTracking,ensureOptimizations:t.ensureOptimizations,safeMode:t.safeMode,tracer:i.TRACE_INIT.bind(i)})}),i.patternIdxToConfig[g]=f.patternIdxToConfig,i.charCodeToPatternIdxToConfig[g]=f.charCodeToPatternIdxToConfig,i.emptyGroups=(0,nr.merge)(i.emptyGroups,f.emptyGroups),i.hasCustom=f.hasCustom||i.hasCustom,i.canModeBeOptimized[g]=f.canBeOptimized}})}),i.defaultMode=s.defaultMode,!(0,nr.isEmpty)(i.lexerDefinitionErrors)&&!i.config.deferDefinitionErrorsHandling){var l=(0,nr.map)(i.lexerDefinitionErrors,function(u){return u.message}),c=l.join(`----------------------- `);throw new Error(`Errors detected in definition of Lexer: `+c)}(0,nr.forEach)(i.lexerDefinitionWarning,function(u){(0,nr.PRINT_WARNING)(u.message)}),i.TRACE_INIT("Choosing sub-methods implementations",function(){if(_s.SUPPORT_STICKY?(i.chopInput=nr.IDENTITY,i.match=i.matchWithTest):(i.updateLastIndex=nr.NOOP,i.match=i.matchWithExec),o&&(i.handleModes=nr.NOOP),i.trackStartLines===!1&&(i.computeNewColumn=nr.IDENTITY),i.trackEndLines===!1&&(i.updateTokenEndLineColumnLocation=nr.NOOP),/full/i.test(i.config.positionTracking))i.createTokenInstance=i.createFullToken;else if(/onlyStart/i.test(i.config.positionTracking))i.createTokenInstance=i.createStartOnlyToken;else if(/onlyOffset/i.test(i.config.positionTracking))i.createTokenInstance=i.createOffsetOnlyToken;else throw Error('Invalid config option: "'+i.config.positionTracking+'"');i.hasCustom?(i.addToken=i.addTokenUsingPush,i.handlePayload=i.handlePayloadWithCustom):(i.addToken=i.addTokenUsingMemberAccess,i.handlePayload=i.handlePayloadNoCustom)}),i.TRACE_INIT("Failed Optimization Warnings",function(){var u=(0,nr.reduce)(i.canModeBeOptimized,function(g,f,h){return f===!1&&g.push(h),g},[]);if(t.ensureOptimizations&&!(0,nr.isEmpty)(u))throw Error("Lexer Modes: < "+u.join(", ")+` > cannot be optimized. Disable the "ensureOptimizations" lexer config flag to silently ignore this and run the lexer in an un-optimized mode. Or inspect the console log for details on how to resolve these issues.`)}),i.TRACE_INIT("clearRegExpParserCache",function(){(0,jEe.clearRegExpParserCache)()}),i.TRACE_INIT("toFastProperties",function(){(0,nr.toFastProperties)(i)})})}return r.prototype.tokenize=function(e,t){if(t===void 0&&(t=this.defaultMode),!(0,nr.isEmpty)(this.lexerDefinitionErrors)){var i=(0,nr.map)(this.lexerDefinitionErrors,function(o){return o.message}),n=i.join(`----------------------- `);throw new Error(`Unable to Tokenize because Errors detected in definition of Lexer: `+n)}var s=this.tokenizeInternal(e,t);return s},r.prototype.tokenizeInternal=function(e,t){var i=this,n,s,o,a,l,c,u,g,f,h,p,C,y,B,v,D,L=e,H=L.length,j=0,$=0,V=this.hasCustom?0:Math.floor(e.length/10),W=new Array(V),_=[],A=this.trackStartLines?1:void 0,Ae=this.trackStartLines?1:void 0,ge=(0,_s.cloneEmptyGroups)(this.emptyGroups),re=this.trackStartLines,O=this.config.lineTerminatorsPattern,F=0,ue=[],pe=[],ke=[],Fe=[];Object.freeze(Fe);var Ne=void 0;function oe(){return ue}function le(pr){var Ii=(0,_s.charCodeToOptimizedIndex)(pr),rs=pe[Ii];return rs===void 0?Fe:rs}var Be=function(pr){if(ke.length===1&&pr.tokenType.PUSH_MODE===void 0){var Ii=i.config.errorMessageProvider.buildUnableToPopLexerModeMessage(pr);_.push({offset:pr.startOffset,line:pr.startLine!==void 0?pr.startLine:void 0,column:pr.startColumn!==void 0?pr.startColumn:void 0,length:pr.image.length,message:Ii})}else{ke.pop();var rs=(0,nr.last)(ke);ue=i.patternIdxToConfig[rs],pe=i.charCodeToPatternIdxToConfig[rs],F=ue.length;var ga=i.canModeBeOptimized[rs]&&i.config.safeMode===!1;pe&&ga?Ne=le:Ne=oe}};function fe(pr){ke.push(pr),pe=this.charCodeToPatternIdxToConfig[pr],ue=this.patternIdxToConfig[pr],F=ue.length,F=ue.length;var Ii=this.canModeBeOptimized[pr]&&this.config.safeMode===!1;pe&&Ii?Ne=le:Ne=oe}fe.call(this,t);for(var ae;jc.length){c=a,u=g,ae=_e;break}}}break}}if(c!==null){if(f=c.length,h=ae.group,h!==void 0&&(p=ae.tokenTypeIdx,C=this.createTokenInstance(c,j,p,ae.tokenType,A,Ae,f),this.handlePayload(C,u),h===!1?$=this.addToken(W,$,C):ge[h].push(C)),e=this.chopInput(e,f),j=j+f,Ae=this.computeNewColumn(Ae,f),re===!0&&ae.canLineTerminator===!0){var It=0,Or=void 0,ii=void 0;O.lastIndex=0;do Or=O.test(c),Or===!0&&(ii=O.lastIndex-1,It++);while(Or===!0);It!==0&&(A=A+It,Ae=f-ii,this.updateTokenEndLineColumnLocation(C,h,ii,It,A,Ae,f))}this.handleModes(ae,Be,fe,C)}else{for(var gi=j,hr=A,fi=Ae,ni=!1;!ni&&j <"+e+">");var n=(0,nr.timer)(t),s=n.time,o=n.value,a=s>10?console.warn:console.log;return this.traceInitIndent time: "+s+"ms"),this.traceInitIndent--,o}else return t()},r.SKIPPED="This marks a skipped Token pattern, this means each token identified by it willbe consumed and then thrown into oblivion, this can be used to for example to completely ignore whitespace.",r.NA=/NOT_APPLICABLE/,r}();Cc.Lexer=JEe});var LA=w(bi=>{"use strict";Object.defineProperty(bi,"__esModule",{value:!0});bi.tokenMatcher=bi.createTokenInstance=bi.EOF=bi.createToken=bi.hasTokenLabel=bi.tokenName=bi.tokenLabel=void 0;var $s=Gt(),WEe=Bd(),Hv=_g();function zEe(r){return wj(r)?r.LABEL:r.name}bi.tokenLabel=zEe;function VEe(r){return r.name}bi.tokenName=VEe;function wj(r){return(0,$s.isString)(r.LABEL)&&r.LABEL!==""}bi.hasTokenLabel=wj;var XEe="parent",hj="categories",pj="label",dj="group",Cj="push_mode",mj="pop_mode",Ej="longer_alt",Ij="line_breaks",yj="start_chars_hint";function Bj(r){return ZEe(r)}bi.createToken=Bj;function ZEe(r){var e=r.pattern,t={};if(t.name=r.name,(0,$s.isUndefined)(e)||(t.PATTERN=e),(0,$s.has)(r,XEe))throw`The parent property is no longer supported. See: https://github.com/chevrotain/chevrotain/issues/564#issuecomment-349062346 for details.`;return(0,$s.has)(r,hj)&&(t.CATEGORIES=r[hj]),(0,Hv.augmentTokenTypes)([t]),(0,$s.has)(r,pj)&&(t.LABEL=r[pj]),(0,$s.has)(r,dj)&&(t.GROUP=r[dj]),(0,$s.has)(r,mj)&&(t.POP_MODE=r[mj]),(0,$s.has)(r,Cj)&&(t.PUSH_MODE=r[Cj]),(0,$s.has)(r,Ej)&&(t.LONGER_ALT=r[Ej]),(0,$s.has)(r,Ij)&&(t.LINE_BREAKS=r[Ij]),(0,$s.has)(r,yj)&&(t.START_CHARS_HINT=r[yj]),t}bi.EOF=Bj({name:"EOF",pattern:WEe.Lexer.NA});(0,Hv.augmentTokenTypes)([bi.EOF]);function _Ee(r,e,t,i,n,s,o,a){return{image:e,startOffset:t,endOffset:i,startLine:n,endLine:s,startColumn:o,endColumn:a,tokenTypeIdx:r.tokenTypeIdx,tokenType:r}}bi.createTokenInstance=_Ee;function $Ee(r,e){return(0,Hv.tokenStructuredMatcher)(r,e)}bi.tokenMatcher=$Ee});var Cn=w(zt=>{"use strict";var xa=zt&&zt.__extends||function(){var r=function(e,t){return r=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(i,n){i.__proto__=n}||function(i,n){for(var s in n)Object.prototype.hasOwnProperty.call(n,s)&&(i[s]=n[s])},r(e,t)};return function(e,t){if(typeof t!="function"&&t!==null)throw new TypeError("Class extends value "+String(t)+" is not a constructor or null");r(e,t);function i(){this.constructor=e}e.prototype=t===null?Object.create(t):(i.prototype=t.prototype,new i)}}();Object.defineProperty(zt,"__esModule",{value:!0});zt.serializeProduction=zt.serializeGrammar=zt.Terminal=zt.Alternation=zt.RepetitionWithSeparator=zt.Repetition=zt.RepetitionMandatoryWithSeparator=zt.RepetitionMandatory=zt.Option=zt.Alternative=zt.Rule=zt.NonTerminal=zt.AbstractProduction=void 0;var Ar=Gt(),eIe=LA(),ko=function(){function r(e){this._definition=e}return Object.defineProperty(r.prototype,"definition",{get:function(){return this._definition},set:function(e){this._definition=e},enumerable:!1,configurable:!0}),r.prototype.accept=function(e){e.visit(this),(0,Ar.forEach)(this.definition,function(t){t.accept(e)})},r}();zt.AbstractProduction=ko;var Qj=function(r){xa(e,r);function e(t){var i=r.call(this,[])||this;return i.idx=1,(0,Ar.assign)(i,(0,Ar.pick)(t,function(n){return n!==void 0})),i}return Object.defineProperty(e.prototype,"definition",{get:function(){return this.referencedRule!==void 0?this.referencedRule.definition:[]},set:function(t){},enumerable:!1,configurable:!0}),e.prototype.accept=function(t){t.visit(this)},e}(ko);zt.NonTerminal=Qj;var bj=function(r){xa(e,r);function e(t){var i=r.call(this,t.definition)||this;return i.orgText="",(0,Ar.assign)(i,(0,Ar.pick)(t,function(n){return n!==void 0})),i}return e}(ko);zt.Rule=bj;var Sj=function(r){xa(e,r);function e(t){var i=r.call(this,t.definition)||this;return i.ignoreAmbiguities=!1,(0,Ar.assign)(i,(0,Ar.pick)(t,function(n){return n!==void 0})),i}return e}(ko);zt.Alternative=Sj;var vj=function(r){xa(e,r);function e(t){var i=r.call(this,t.definition)||this;return i.idx=1,(0,Ar.assign)(i,(0,Ar.pick)(t,function(n){return n!==void 0})),i}return e}(ko);zt.Option=vj;var xj=function(r){xa(e,r);function e(t){var i=r.call(this,t.definition)||this;return i.idx=1,(0,Ar.assign)(i,(0,Ar.pick)(t,function(n){return n!==void 0})),i}return e}(ko);zt.RepetitionMandatory=xj;var Pj=function(r){xa(e,r);function e(t){var i=r.call(this,t.definition)||this;return i.idx=1,(0,Ar.assign)(i,(0,Ar.pick)(t,function(n){return n!==void 0})),i}return e}(ko);zt.RepetitionMandatoryWithSeparator=Pj;var Dj=function(r){xa(e,r);function e(t){var i=r.call(this,t.definition)||this;return i.idx=1,(0,Ar.assign)(i,(0,Ar.pick)(t,function(n){return n!==void 0})),i}return e}(ko);zt.Repetition=Dj;var kj=function(r){xa(e,r);function e(t){var i=r.call(this,t.definition)||this;return i.idx=1,(0,Ar.assign)(i,(0,Ar.pick)(t,function(n){return n!==void 0})),i}return e}(ko);zt.RepetitionWithSeparator=kj;var Rj=function(r){xa(e,r);function e(t){var i=r.call(this,t.definition)||this;return i.idx=1,i.ignoreAmbiguities=!1,i.hasPredicates=!1,(0,Ar.assign)(i,(0,Ar.pick)(t,function(n){return n!==void 0})),i}return Object.defineProperty(e.prototype,"definition",{get:function(){return this._definition},set:function(t){this._definition=t},enumerable:!1,configurable:!0}),e}(ko);zt.Alternation=Rj;var oy=function(){function r(e){this.idx=1,(0,Ar.assign)(this,(0,Ar.pick)(e,function(t){return t!==void 0}))}return r.prototype.accept=function(e){e.visit(this)},r}();zt.Terminal=oy;function tIe(r){return(0,Ar.map)(r,bd)}zt.serializeGrammar=tIe;function bd(r){function e(s){return(0,Ar.map)(s,bd)}if(r instanceof Qj){var t={type:"NonTerminal",name:r.nonTerminalName,idx:r.idx};return(0,Ar.isString)(r.label)&&(t.label=r.label),t}else{if(r instanceof Sj)return{type:"Alternative",definition:e(r.definition)};if(r instanceof vj)return{type:"Option",idx:r.idx,definition:e(r.definition)};if(r instanceof xj)return{type:"RepetitionMandatory",idx:r.idx,definition:e(r.definition)};if(r instanceof Pj)return{type:"RepetitionMandatoryWithSeparator",idx:r.idx,separator:bd(new oy({terminalType:r.separator})),definition:e(r.definition)};if(r instanceof kj)return{type:"RepetitionWithSeparator",idx:r.idx,separator:bd(new oy({terminalType:r.separator})),definition:e(r.definition)};if(r instanceof Dj)return{type:"Repetition",idx:r.idx,definition:e(r.definition)};if(r instanceof Rj)return{type:"Alternation",idx:r.idx,definition:e(r.definition)};if(r instanceof oy){var i={type:"Terminal",name:r.terminalType.name,label:(0,eIe.tokenLabel)(r.terminalType),idx:r.idx};(0,Ar.isString)(r.label)&&(i.terminalLabel=r.label);var n=r.terminalType.PATTERN;return r.terminalType.PATTERN&&(i.pattern=(0,Ar.isRegExp)(n)?n.source:n),i}else{if(r instanceof bj)return{type:"Rule",name:r.name,orgText:r.orgText,definition:e(r.definition)};throw Error("non exhaustive match")}}}zt.serializeProduction=bd});var Ay=w(ay=>{"use strict";Object.defineProperty(ay,"__esModule",{value:!0});ay.RestWalker=void 0;var Gv=Gt(),mn=Cn(),rIe=function(){function r(){}return r.prototype.walk=function(e,t){var i=this;t===void 0&&(t=[]),(0,Gv.forEach)(e.definition,function(n,s){var o=(0,Gv.drop)(e.definition,s+1);if(n instanceof mn.NonTerminal)i.walkProdRef(n,o,t);else if(n instanceof mn.Terminal)i.walkTerminal(n,o,t);else if(n instanceof mn.Alternative)i.walkFlat(n,o,t);else if(n instanceof mn.Option)i.walkOption(n,o,t);else if(n instanceof mn.RepetitionMandatory)i.walkAtLeastOne(n,o,t);else if(n instanceof mn.RepetitionMandatoryWithSeparator)i.walkAtLeastOneSep(n,o,t);else if(n instanceof mn.RepetitionWithSeparator)i.walkManySep(n,o,t);else if(n instanceof mn.Repetition)i.walkMany(n,o,t);else if(n instanceof mn.Alternation)i.walkOr(n,o,t);else throw Error("non exhaustive match")})},r.prototype.walkTerminal=function(e,t,i){},r.prototype.walkProdRef=function(e,t,i){},r.prototype.walkFlat=function(e,t,i){var n=t.concat(i);this.walk(e,n)},r.prototype.walkOption=function(e,t,i){var n=t.concat(i);this.walk(e,n)},r.prototype.walkAtLeastOne=function(e,t,i){var n=[new mn.Option({definition:e.definition})].concat(t,i);this.walk(e,n)},r.prototype.walkAtLeastOneSep=function(e,t,i){var n=Fj(e,t,i);this.walk(e,n)},r.prototype.walkMany=function(e,t,i){var n=[new mn.Option({definition:e.definition})].concat(t,i);this.walk(e,n)},r.prototype.walkManySep=function(e,t,i){var n=Fj(e,t,i);this.walk(e,n)},r.prototype.walkOr=function(e,t,i){var n=this,s=t.concat(i);(0,Gv.forEach)(e.definition,function(o){var a=new mn.Alternative({definition:[o]});n.walk(a,s)})},r}();ay.RestWalker=rIe;function Fj(r,e,t){var i=[new mn.Option({definition:[new mn.Terminal({terminalType:r.separator})].concat(r.definition)})],n=i.concat(e,t);return n}});var $g=w(ly=>{"use strict";Object.defineProperty(ly,"__esModule",{value:!0});ly.GAstVisitor=void 0;var Ro=Cn(),iIe=function(){function r(){}return r.prototype.visit=function(e){var t=e;switch(t.constructor){case Ro.NonTerminal:return this.visitNonTerminal(t);case Ro.Alternative:return this.visitAlternative(t);case Ro.Option:return this.visitOption(t);case Ro.RepetitionMandatory:return this.visitRepetitionMandatory(t);case Ro.RepetitionMandatoryWithSeparator:return this.visitRepetitionMandatoryWithSeparator(t);case Ro.RepetitionWithSeparator:return this.visitRepetitionWithSeparator(t);case Ro.Repetition:return this.visitRepetition(t);case Ro.Alternation:return this.visitAlternation(t);case Ro.Terminal:return this.visitTerminal(t);case Ro.Rule:return this.visitRule(t);default:throw Error("non exhaustive match")}},r.prototype.visitNonTerminal=function(e){},r.prototype.visitAlternative=function(e){},r.prototype.visitOption=function(e){},r.prototype.visitRepetition=function(e){},r.prototype.visitRepetitionMandatory=function(e){},r.prototype.visitRepetitionMandatoryWithSeparator=function(e){},r.prototype.visitRepetitionWithSeparator=function(e){},r.prototype.visitAlternation=function(e){},r.prototype.visitTerminal=function(e){},r.prototype.visitRule=function(e){},r}();ly.GAstVisitor=iIe});var vd=w(Mi=>{"use strict";var nIe=Mi&&Mi.__extends||function(){var r=function(e,t){return r=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(i,n){i.__proto__=n}||function(i,n){for(var s in n)Object.prototype.hasOwnProperty.call(n,s)&&(i[s]=n[s])},r(e,t)};return function(e,t){if(typeof t!="function"&&t!==null)throw new TypeError("Class extends value "+String(t)+" is not a constructor or null");r(e,t);function i(){this.constructor=e}e.prototype=t===null?Object.create(t):(i.prototype=t.prototype,new i)}}();Object.defineProperty(Mi,"__esModule",{value:!0});Mi.collectMethods=Mi.DslMethodsCollectorVisitor=Mi.getProductionDslName=Mi.isBranchingProd=Mi.isOptionalProd=Mi.isSequenceProd=void 0;var Sd=Gt(),Qr=Cn(),sIe=$g();function oIe(r){return r instanceof Qr.Alternative||r instanceof Qr.Option||r instanceof Qr.Repetition||r instanceof Qr.RepetitionMandatory||r instanceof Qr.RepetitionMandatoryWithSeparator||r instanceof Qr.RepetitionWithSeparator||r instanceof Qr.Terminal||r instanceof Qr.Rule}Mi.isSequenceProd=oIe;function Yv(r,e){e===void 0&&(e=[]);var t=r instanceof Qr.Option||r instanceof Qr.Repetition||r instanceof Qr.RepetitionWithSeparator;return t?!0:r instanceof Qr.Alternation?(0,Sd.some)(r.definition,function(i){return Yv(i,e)}):r instanceof Qr.NonTerminal&&(0,Sd.contains)(e,r)?!1:r instanceof Qr.AbstractProduction?(r instanceof Qr.NonTerminal&&e.push(r),(0,Sd.every)(r.definition,function(i){return Yv(i,e)})):!1}Mi.isOptionalProd=Yv;function aIe(r){return r instanceof Qr.Alternation}Mi.isBranchingProd=aIe;function AIe(r){if(r instanceof Qr.NonTerminal)return"SUBRULE";if(r instanceof Qr.Option)return"OPTION";if(r instanceof Qr.Alternation)return"OR";if(r instanceof Qr.RepetitionMandatory)return"AT_LEAST_ONE";if(r instanceof Qr.RepetitionMandatoryWithSeparator)return"AT_LEAST_ONE_SEP";if(r instanceof Qr.RepetitionWithSeparator)return"MANY_SEP";if(r instanceof Qr.Repetition)return"MANY";if(r instanceof Qr.Terminal)return"CONSUME";throw Error("non exhaustive match")}Mi.getProductionDslName=AIe;var Nj=function(r){nIe(e,r);function e(){var t=r!==null&&r.apply(this,arguments)||this;return t.separator="-",t.dslMethods={option:[],alternation:[],repetition:[],repetitionWithSeparator:[],repetitionMandatory:[],repetitionMandatoryWithSeparator:[]},t}return e.prototype.reset=function(){this.dslMethods={option:[],alternation:[],repetition:[],repetitionWithSeparator:[],repetitionMandatory:[],repetitionMandatoryWithSeparator:[]}},e.prototype.visitTerminal=function(t){var i=t.terminalType.name+this.separator+"Terminal";(0,Sd.has)(this.dslMethods,i)||(this.dslMethods[i]=[]),this.dslMethods[i].push(t)},e.prototype.visitNonTerminal=function(t){var i=t.nonTerminalName+this.separator+"Terminal";(0,Sd.has)(this.dslMethods,i)||(this.dslMethods[i]=[]),this.dslMethods[i].push(t)},e.prototype.visitOption=function(t){this.dslMethods.option.push(t)},e.prototype.visitRepetitionWithSeparator=function(t){this.dslMethods.repetitionWithSeparator.push(t)},e.prototype.visitRepetitionMandatory=function(t){this.dslMethods.repetitionMandatory.push(t)},e.prototype.visitRepetitionMandatoryWithSeparator=function(t){this.dslMethods.repetitionMandatoryWithSeparator.push(t)},e.prototype.visitRepetition=function(t){this.dslMethods.repetition.push(t)},e.prototype.visitAlternation=function(t){this.dslMethods.alternation.push(t)},e}(sIe.GAstVisitor);Mi.DslMethodsCollectorVisitor=Nj;var cy=new Nj;function lIe(r){cy.reset(),r.accept(cy);var e=cy.dslMethods;return cy.reset(),e}Mi.collectMethods=lIe});var qv=w(Fo=>{"use strict";Object.defineProperty(Fo,"__esModule",{value:!0});Fo.firstForTerminal=Fo.firstForBranching=Fo.firstForSequence=Fo.first=void 0;var uy=Gt(),Lj=Cn(),jv=vd();function gy(r){if(r instanceof Lj.NonTerminal)return gy(r.referencedRule);if(r instanceof Lj.Terminal)return Mj(r);if((0,jv.isSequenceProd)(r))return Tj(r);if((0,jv.isBranchingProd)(r))return Oj(r);throw Error("non exhaustive match")}Fo.first=gy;function Tj(r){for(var e=[],t=r.definition,i=0,n=t.length>i,s,o=!0;n&&o;)s=t[i],o=(0,jv.isOptionalProd)(s),e=e.concat(gy(s)),i=i+1,n=t.length>i;return(0,uy.uniq)(e)}Fo.firstForSequence=Tj;function Oj(r){var e=(0,uy.map)(r.definition,function(t){return gy(t)});return(0,uy.uniq)((0,uy.flatten)(e))}Fo.firstForBranching=Oj;function Mj(r){return[r.terminalType]}Fo.firstForTerminal=Mj});var Jv=w(fy=>{"use strict";Object.defineProperty(fy,"__esModule",{value:!0});fy.IN=void 0;fy.IN="_~IN~_"});var Yj=w(fs=>{"use strict";var cIe=fs&&fs.__extends||function(){var r=function(e,t){return r=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(i,n){i.__proto__=n}||function(i,n){for(var s in n)Object.prototype.hasOwnProperty.call(n,s)&&(i[s]=n[s])},r(e,t)};return function(e,t){if(typeof t!="function"&&t!==null)throw new TypeError("Class extends value "+String(t)+" is not a constructor or null");r(e,t);function i(){this.constructor=e}e.prototype=t===null?Object.create(t):(i.prototype=t.prototype,new i)}}();Object.defineProperty(fs,"__esModule",{value:!0});fs.buildInProdFollowPrefix=fs.buildBetweenProdsFollowPrefix=fs.computeAllProdsFollows=fs.ResyncFollowsWalker=void 0;var uIe=Ay(),gIe=qv(),Uj=Gt(),Kj=Jv(),fIe=Cn(),Hj=function(r){cIe(e,r);function e(t){var i=r.call(this)||this;return i.topProd=t,i.follows={},i}return e.prototype.startWalking=function(){return this.walk(this.topProd),this.follows},e.prototype.walkTerminal=function(t,i,n){},e.prototype.walkProdRef=function(t,i,n){var s=Gj(t.referencedRule,t.idx)+this.topProd.name,o=i.concat(n),a=new fIe.Alternative({definition:o}),l=(0,gIe.first)(a);this.follows[s]=l},e}(uIe.RestWalker);fs.ResyncFollowsWalker=Hj;function hIe(r){var e={};return(0,Uj.forEach)(r,function(t){var i=new Hj(t).startWalking();(0,Uj.assign)(e,i)}),e}fs.computeAllProdsFollows=hIe;function Gj(r,e){return r.name+e+Kj.IN}fs.buildBetweenProdsFollowPrefix=Gj;function pIe(r){var e=r.terminalType.name;return e+r.idx+Kj.IN}fs.buildInProdFollowPrefix=pIe});var xd=w(Pa=>{"use strict";Object.defineProperty(Pa,"__esModule",{value:!0});Pa.defaultGrammarValidatorErrorProvider=Pa.defaultGrammarResolverErrorProvider=Pa.defaultParserErrorProvider=void 0;var ef=LA(),dIe=Gt(),eo=Gt(),Wv=Cn(),jj=vd();Pa.defaultParserErrorProvider={buildMismatchTokenMessage:function(r){var e=r.expected,t=r.actual,i=r.previous,n=r.ruleName,s=(0,ef.hasTokenLabel)(e),o=s?"--> "+(0,ef.tokenLabel)(e)+" <--":"token of type --> "+e.name+" <--",a="Expecting "+o+" but found --> '"+t.image+"' <--";return a},buildNotAllInputParsedMessage:function(r){var e=r.firstRedundant,t=r.ruleName;return"Redundant input, expecting EOF but found: "+e.image},buildNoViableAltMessage:function(r){var e=r.expectedPathsPerAlt,t=r.actual,i=r.previous,n=r.customUserDescription,s=r.ruleName,o="Expecting: ",a=(0,eo.first)(t).image,l=` but found: '`+a+"'";if(n)return o+n+l;var c=(0,eo.reduce)(e,function(h,p){return h.concat(p)},[]),u=(0,eo.map)(c,function(h){return"["+(0,eo.map)(h,function(p){return(0,ef.tokenLabel)(p)}).join(", ")+"]"}),g=(0,eo.map)(u,function(h,p){return" "+(p+1)+". "+h}),f=`one of these possible Token sequences: `+g.join(` `);return o+f+l},buildEarlyExitMessage:function(r){var e=r.expectedIterationPaths,t=r.actual,i=r.customUserDescription,n=r.ruleName,s="Expecting: ",o=(0,eo.first)(t).image,a=` but found: '`+o+"'";if(i)return s+i+a;var l=(0,eo.map)(e,function(u){return"["+(0,eo.map)(u,function(g){return(0,ef.tokenLabel)(g)}).join(",")+"]"}),c=`expecting at least one iteration which starts with one of these possible Token sequences:: `+("<"+l.join(" ,")+">");return s+c+a}};Object.freeze(Pa.defaultParserErrorProvider);Pa.defaultGrammarResolverErrorProvider={buildRuleNotFoundError:function(r,e){var t="Invalid grammar, reference to a rule which is not defined: ->"+e.nonTerminalName+`<- inside top level rule: ->`+r.name+"<-";return t}};Pa.defaultGrammarValidatorErrorProvider={buildDuplicateFoundError:function(r,e){function t(u){return u instanceof Wv.Terminal?u.terminalType.name:u instanceof Wv.NonTerminal?u.nonTerminalName:""}var i=r.name,n=(0,eo.first)(e),s=n.idx,o=(0,jj.getProductionDslName)(n),a=t(n),l=s>0,c="->"+o+(l?s:"")+"<- "+(a?"with argument: ->"+a+"<-":"")+` appears more than once (`+e.length+" times) in the top level rule: ->"+i+`<-. For further details see: https://chevrotain.io/docs/FAQ.html#NUMERICAL_SUFFIXES `;return c=c.replace(/[ \t]+/g," "),c=c.replace(/\s\s+/g,` `),c},buildNamespaceConflictError:function(r){var e=`Namespace conflict found in grammar. `+("The grammar has both a Terminal(Token) and a Non-Terminal(Rule) named: <"+r.name+`>. `)+`To resolve this make sure each Terminal and Non-Terminal names are unique This is easy to accomplish by using the convention that Terminal names start with an uppercase letter and Non-Terminal names start with a lower case letter.`;return e},buildAlternationPrefixAmbiguityError:function(r){var e=(0,eo.map)(r.prefixPath,function(n){return(0,ef.tokenLabel)(n)}).join(", "),t=r.alternation.idx===0?"":r.alternation.idx,i="Ambiguous alternatives: <"+r.ambiguityIndices.join(" ,")+`> due to common lookahead prefix `+("in inside <"+r.topLevelRule.name+`> Rule, `)+("<"+e+`> may appears as a prefix path in all these alternatives. `)+`See: https://chevrotain.io/docs/guide/resolving_grammar_errors.html#COMMON_PREFIX For Further details.`;return i},buildAlternationAmbiguityError:function(r){var e=(0,eo.map)(r.prefixPath,function(n){return(0,ef.tokenLabel)(n)}).join(", "),t=r.alternation.idx===0?"":r.alternation.idx,i="Ambiguous Alternatives Detected: <"+r.ambiguityIndices.join(" ,")+"> in "+(" inside <"+r.topLevelRule.name+`> Rule, `)+("<"+e+`> may appears as a prefix path in all these alternatives. `);return i=i+`See: https://chevrotain.io/docs/guide/resolving_grammar_errors.html#AMBIGUOUS_ALTERNATIVES For Further details.`,i},buildEmptyRepetitionError:function(r){var e=(0,jj.getProductionDslName)(r.repetition);r.repetition.idx!==0&&(e+=r.repetition.idx);var t="The repetition <"+e+"> within Rule <"+r.topLevelRule.name+`> can never consume any tokens. This could lead to an infinite loop.`;return t},buildTokenNameError:function(r){return"deprecated"},buildEmptyAlternationError:function(r){var e="Ambiguous empty alternative: <"+(r.emptyChoiceIdx+1)+">"+(" in inside <"+r.topLevelRule.name+`> Rule. `)+"Only the last alternative may be an empty alternative.";return e},buildTooManyAlternativesError:function(r){var e=`An Alternation cannot have more than 256 alternatives: `+(" inside <"+r.topLevelRule.name+`> Rule. has `+(r.alternation.definition.length+1)+" alternatives.");return e},buildLeftRecursionError:function(r){var e=r.topLevelRule.name,t=dIe.map(r.leftRecursionPath,function(s){return s.name}),i=e+" --> "+t.concat([e]).join(" --> "),n=`Left Recursion found in grammar. `+("rule: <"+e+`> can be invoked from itself (directly or indirectly) `)+(`without consuming any Tokens. The grammar path that causes this is: `+i+` `)+` To fix this refactor your grammar to remove the left recursion. see: https://en.wikipedia.org/wiki/LL_parser#Left_Factoring.`;return n},buildInvalidRuleNameError:function(r){return"deprecated"},buildDuplicateRuleNameError:function(r){var e;r.topLevelRule instanceof Wv.Rule?e=r.topLevelRule.name:e=r.topLevelRule;var t="Duplicate definition, rule: ->"+e+"<- is already defined in the grammar: ->"+r.grammarName+"<-";return t}}});var Wj=w(TA=>{"use strict";var CIe=TA&&TA.__extends||function(){var r=function(e,t){return r=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(i,n){i.__proto__=n}||function(i,n){for(var s in n)Object.prototype.hasOwnProperty.call(n,s)&&(i[s]=n[s])},r(e,t)};return function(e,t){if(typeof t!="function"&&t!==null)throw new TypeError("Class extends value "+String(t)+" is not a constructor or null");r(e,t);function i(){this.constructor=e}e.prototype=t===null?Object.create(t):(i.prototype=t.prototype,new i)}}();Object.defineProperty(TA,"__esModule",{value:!0});TA.GastRefResolverVisitor=TA.resolveGrammar=void 0;var mIe=Yn(),qj=Gt(),EIe=$g();function IIe(r,e){var t=new Jj(r,e);return t.resolveRefs(),t.errors}TA.resolveGrammar=IIe;var Jj=function(r){CIe(e,r);function e(t,i){var n=r.call(this)||this;return n.nameToTopRule=t,n.errMsgProvider=i,n.errors=[],n}return e.prototype.resolveRefs=function(){var t=this;(0,qj.forEach)((0,qj.values)(this.nameToTopRule),function(i){t.currTopLevel=i,i.accept(t)})},e.prototype.visitNonTerminal=function(t){var i=this.nameToTopRule[t.nonTerminalName];if(i)t.referencedRule=i;else{var n=this.errMsgProvider.buildRuleNotFoundError(this.currTopLevel,t);this.errors.push({message:n,type:mIe.ParserDefinitionErrorType.UNRESOLVED_SUBRULE_REF,ruleName:this.currTopLevel.name,unresolvedRefName:t.nonTerminalName})}},e}(EIe.GAstVisitor);TA.GastRefResolverVisitor=Jj});var Dd=w(Nr=>{"use strict";var mc=Nr&&Nr.__extends||function(){var r=function(e,t){return r=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(i,n){i.__proto__=n}||function(i,n){for(var s in n)Object.prototype.hasOwnProperty.call(n,s)&&(i[s]=n[s])},r(e,t)};return function(e,t){if(typeof t!="function"&&t!==null)throw new TypeError("Class extends value "+String(t)+" is not a constructor or null");r(e,t);function i(){this.constructor=e}e.prototype=t===null?Object.create(t):(i.prototype=t.prototype,new i)}}();Object.defineProperty(Nr,"__esModule",{value:!0});Nr.nextPossibleTokensAfter=Nr.possiblePathsFrom=Nr.NextTerminalAfterAtLeastOneSepWalker=Nr.NextTerminalAfterAtLeastOneWalker=Nr.NextTerminalAfterManySepWalker=Nr.NextTerminalAfterManyWalker=Nr.AbstractNextTerminalAfterProductionWalker=Nr.NextAfterTokenWalker=Nr.AbstractNextPossibleTokensWalker=void 0;var zj=Ay(),Ut=Gt(),yIe=qv(),kt=Cn(),Vj=function(r){mc(e,r);function e(t,i){var n=r.call(this)||this;return n.topProd=t,n.path=i,n.possibleTokTypes=[],n.nextProductionName="",n.nextProductionOccurrence=0,n.found=!1,n.isAtEndOfPath=!1,n}return e.prototype.startWalking=function(){if(this.found=!1,this.path.ruleStack[0]!==this.topProd.name)throw Error("The path does not start with the walker's top Rule!");return this.ruleStack=(0,Ut.cloneArr)(this.path.ruleStack).reverse(),this.occurrenceStack=(0,Ut.cloneArr)(this.path.occurrenceStack).reverse(),this.ruleStack.pop(),this.occurrenceStack.pop(),this.updateExpectedNext(),this.walk(this.topProd),this.possibleTokTypes},e.prototype.walk=function(t,i){i===void 0&&(i=[]),this.found||r.prototype.walk.call(this,t,i)},e.prototype.walkProdRef=function(t,i,n){if(t.referencedRule.name===this.nextProductionName&&t.idx===this.nextProductionOccurrence){var s=i.concat(n);this.updateExpectedNext(),this.walk(t.referencedRule,s)}},e.prototype.updateExpectedNext=function(){(0,Ut.isEmpty)(this.ruleStack)?(this.nextProductionName="",this.nextProductionOccurrence=0,this.isAtEndOfPath=!0):(this.nextProductionName=this.ruleStack.pop(),this.nextProductionOccurrence=this.occurrenceStack.pop())},e}(zj.RestWalker);Nr.AbstractNextPossibleTokensWalker=Vj;var wIe=function(r){mc(e,r);function e(t,i){var n=r.call(this,t,i)||this;return n.path=i,n.nextTerminalName="",n.nextTerminalOccurrence=0,n.nextTerminalName=n.path.lastTok.name,n.nextTerminalOccurrence=n.path.lastTokOccurrence,n}return e.prototype.walkTerminal=function(t,i,n){if(this.isAtEndOfPath&&t.terminalType.name===this.nextTerminalName&&t.idx===this.nextTerminalOccurrence&&!this.found){var s=i.concat(n),o=new kt.Alternative({definition:s});this.possibleTokTypes=(0,yIe.first)(o),this.found=!0}},e}(Vj);Nr.NextAfterTokenWalker=wIe;var Pd=function(r){mc(e,r);function e(t,i){var n=r.call(this)||this;return n.topRule=t,n.occurrence=i,n.result={token:void 0,occurrence:void 0,isEndOfRule:void 0},n}return e.prototype.startWalking=function(){return this.walk(this.topRule),this.result},e}(zj.RestWalker);Nr.AbstractNextTerminalAfterProductionWalker=Pd;var BIe=function(r){mc(e,r);function e(){return r!==null&&r.apply(this,arguments)||this}return e.prototype.walkMany=function(t,i,n){if(t.idx===this.occurrence){var s=(0,Ut.first)(i.concat(n));this.result.isEndOfRule=s===void 0,s instanceof kt.Terminal&&(this.result.token=s.terminalType,this.result.occurrence=s.idx)}else r.prototype.walkMany.call(this,t,i,n)},e}(Pd);Nr.NextTerminalAfterManyWalker=BIe;var QIe=function(r){mc(e,r);function e(){return r!==null&&r.apply(this,arguments)||this}return e.prototype.walkManySep=function(t,i,n){if(t.idx===this.occurrence){var s=(0,Ut.first)(i.concat(n));this.result.isEndOfRule=s===void 0,s instanceof kt.Terminal&&(this.result.token=s.terminalType,this.result.occurrence=s.idx)}else r.prototype.walkManySep.call(this,t,i,n)},e}(Pd);Nr.NextTerminalAfterManySepWalker=QIe;var bIe=function(r){mc(e,r);function e(){return r!==null&&r.apply(this,arguments)||this}return e.prototype.walkAtLeastOne=function(t,i,n){if(t.idx===this.occurrence){var s=(0,Ut.first)(i.concat(n));this.result.isEndOfRule=s===void 0,s instanceof kt.Terminal&&(this.result.token=s.terminalType,this.result.occurrence=s.idx)}else r.prototype.walkAtLeastOne.call(this,t,i,n)},e}(Pd);Nr.NextTerminalAfterAtLeastOneWalker=bIe;var SIe=function(r){mc(e,r);function e(){return r!==null&&r.apply(this,arguments)||this}return e.prototype.walkAtLeastOneSep=function(t,i,n){if(t.idx===this.occurrence){var s=(0,Ut.first)(i.concat(n));this.result.isEndOfRule=s===void 0,s instanceof kt.Terminal&&(this.result.token=s.terminalType,this.result.occurrence=s.idx)}else r.prototype.walkAtLeastOneSep.call(this,t,i,n)},e}(Pd);Nr.NextTerminalAfterAtLeastOneSepWalker=SIe;function Xj(r,e,t){t===void 0&&(t=[]),t=(0,Ut.cloneArr)(t);var i=[],n=0;function s(c){return c.concat((0,Ut.drop)(r,n+1))}function o(c){var u=Xj(s(c),e,t);return i.concat(u)}for(;t.length=0;ge--){var re=B.definition[ge],O={idx:p,def:re.definition.concat((0,Ut.drop)(h)),ruleStack:C,occurrenceStack:y};g.push(O),g.push(o)}else if(B instanceof kt.Alternative)g.push({idx:p,def:B.definition.concat((0,Ut.drop)(h)),ruleStack:C,occurrenceStack:y});else if(B instanceof kt.Rule)g.push(xIe(B,p,C,y));else throw Error("non exhaustive match")}}return u}Nr.nextPossibleTokensAfter=vIe;function xIe(r,e,t,i){var n=(0,Ut.cloneArr)(t);n.push(r.name);var s=(0,Ut.cloneArr)(i);return s.push(1),{idx:e,def:r.definition,ruleStack:n,occurrenceStack:s}}});var kd=w(Zt=>{"use strict";var $j=Zt&&Zt.__extends||function(){var r=function(e,t){return r=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(i,n){i.__proto__=n}||function(i,n){for(var s in n)Object.prototype.hasOwnProperty.call(n,s)&&(i[s]=n[s])},r(e,t)};return function(e,t){if(typeof t!="function"&&t!==null)throw new TypeError("Class extends value "+String(t)+" is not a constructor or null");r(e,t);function i(){this.constructor=e}e.prototype=t===null?Object.create(t):(i.prototype=t.prototype,new i)}}();Object.defineProperty(Zt,"__esModule",{value:!0});Zt.areTokenCategoriesNotUsed=Zt.isStrictPrefixOfPath=Zt.containsPath=Zt.getLookaheadPathsForOptionalProd=Zt.getLookaheadPathsForOr=Zt.lookAheadSequenceFromAlternatives=Zt.buildSingleAlternativeLookaheadFunction=Zt.buildAlternativesLookAheadFunc=Zt.buildLookaheadFuncForOptionalProd=Zt.buildLookaheadFuncForOr=Zt.getProdType=Zt.PROD_TYPE=void 0;var sr=Gt(),Zj=Dd(),PIe=Ay(),hy=_g(),OA=Cn(),DIe=$g(),oi;(function(r){r[r.OPTION=0]="OPTION",r[r.REPETITION=1]="REPETITION",r[r.REPETITION_MANDATORY=2]="REPETITION_MANDATORY",r[r.REPETITION_MANDATORY_WITH_SEPARATOR=3]="REPETITION_MANDATORY_WITH_SEPARATOR",r[r.REPETITION_WITH_SEPARATOR=4]="REPETITION_WITH_SEPARATOR",r[r.ALTERNATION=5]="ALTERNATION"})(oi=Zt.PROD_TYPE||(Zt.PROD_TYPE={}));function kIe(r){if(r instanceof OA.Option)return oi.OPTION;if(r instanceof OA.Repetition)return oi.REPETITION;if(r instanceof OA.RepetitionMandatory)return oi.REPETITION_MANDATORY;if(r instanceof OA.RepetitionMandatoryWithSeparator)return oi.REPETITION_MANDATORY_WITH_SEPARATOR;if(r instanceof OA.RepetitionWithSeparator)return oi.REPETITION_WITH_SEPARATOR;if(r instanceof OA.Alternation)return oi.ALTERNATION;throw Error("non exhaustive match")}Zt.getProdType=kIe;function RIe(r,e,t,i,n,s){var o=tq(r,e,t),a=Xv(o)?hy.tokenStructuredMatcherNoCategories:hy.tokenStructuredMatcher;return s(o,i,a,n)}Zt.buildLookaheadFuncForOr=RIe;function FIe(r,e,t,i,n,s){var o=rq(r,e,n,t),a=Xv(o)?hy.tokenStructuredMatcherNoCategories:hy.tokenStructuredMatcher;return s(o[0],a,i)}Zt.buildLookaheadFuncForOptionalProd=FIe;function NIe(r,e,t,i){var n=r.length,s=(0,sr.every)(r,function(l){return(0,sr.every)(l,function(c){return c.length===1})});if(e)return function(l){for(var c=(0,sr.map)(l,function(D){return D.GATE}),u=0;u{"use strict";var Zv=Vt&&Vt.__extends||function(){var r=function(e,t){return r=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(i,n){i.__proto__=n}||function(i,n){for(var s in n)Object.prototype.hasOwnProperty.call(n,s)&&(i[s]=n[s])},r(e,t)};return function(e,t){if(typeof t!="function"&&t!==null)throw new TypeError("Class extends value "+String(t)+" is not a constructor or null");r(e,t);function i(){this.constructor=e}e.prototype=t===null?Object.create(t):(i.prototype=t.prototype,new i)}}();Object.defineProperty(Vt,"__esModule",{value:!0});Vt.checkPrefixAlternativesAmbiguities=Vt.validateSomeNonEmptyLookaheadPath=Vt.validateTooManyAlts=Vt.RepetionCollector=Vt.validateAmbiguousAlternationAlternatives=Vt.validateEmptyOrAlternative=Vt.getFirstNoneTerminal=Vt.validateNoLeftRecursion=Vt.validateRuleIsOverridden=Vt.validateRuleDoesNotAlreadyExist=Vt.OccurrenceValidationCollector=Vt.identifyProductionForDuplicates=Vt.validateGrammar=void 0;var er=Gt(),br=Gt(),No=Yn(),_v=vd(),tf=kd(),UIe=Dd(),to=Cn(),$v=$g();function KIe(r,e,t,i,n){var s=er.map(r,function(h){return HIe(h,i)}),o=er.map(r,function(h){return ex(h,h,i)}),a=[],l=[],c=[];(0,br.every)(o,br.isEmpty)&&(a=(0,br.map)(r,function(h){return Aq(h,i)}),l=(0,br.map)(r,function(h){return lq(h,e,i)}),c=gq(r,e,i));var u=jIe(r,t,i),g=(0,br.map)(r,function(h){return uq(h,i)}),f=(0,br.map)(r,function(h){return aq(h,r,n,i)});return er.flatten(s.concat(c,o,a,l,u,g,f))}Vt.validateGrammar=KIe;function HIe(r,e){var t=new oq;r.accept(t);var i=t.allProductions,n=er.groupBy(i,nq),s=er.pick(n,function(a){return a.length>1}),o=er.map(er.values(s),function(a){var l=er.first(a),c=e.buildDuplicateFoundError(r,a),u=(0,_v.getProductionDslName)(l),g={message:c,type:No.ParserDefinitionErrorType.DUPLICATE_PRODUCTIONS,ruleName:r.name,dslName:u,occurrence:l.idx},f=sq(l);return f&&(g.parameter=f),g});return o}function nq(r){return(0,_v.getProductionDslName)(r)+"_#_"+r.idx+"_#_"+sq(r)}Vt.identifyProductionForDuplicates=nq;function sq(r){return r instanceof to.Terminal?r.terminalType.name:r instanceof to.NonTerminal?r.nonTerminalName:""}var oq=function(r){Zv(e,r);function e(){var t=r!==null&&r.apply(this,arguments)||this;return t.allProductions=[],t}return e.prototype.visitNonTerminal=function(t){this.allProductions.push(t)},e.prototype.visitOption=function(t){this.allProductions.push(t)},e.prototype.visitRepetitionWithSeparator=function(t){this.allProductions.push(t)},e.prototype.visitRepetitionMandatory=function(t){this.allProductions.push(t)},e.prototype.visitRepetitionMandatoryWithSeparator=function(t){this.allProductions.push(t)},e.prototype.visitRepetition=function(t){this.allProductions.push(t)},e.prototype.visitAlternation=function(t){this.allProductions.push(t)},e.prototype.visitTerminal=function(t){this.allProductions.push(t)},e}($v.GAstVisitor);Vt.OccurrenceValidationCollector=oq;function aq(r,e,t,i){var n=[],s=(0,br.reduce)(e,function(a,l){return l.name===r.name?a+1:a},0);if(s>1){var o=i.buildDuplicateRuleNameError({topLevelRule:r,grammarName:t});n.push({message:o,type:No.ParserDefinitionErrorType.DUPLICATE_RULE_NAME,ruleName:r.name})}return n}Vt.validateRuleDoesNotAlreadyExist=aq;function GIe(r,e,t){var i=[],n;return er.contains(e,r)||(n="Invalid rule override, rule: ->"+r+"<- cannot be overridden in the grammar: ->"+t+"<-as it is not defined in any of the super grammars ",i.push({message:n,type:No.ParserDefinitionErrorType.INVALID_RULE_OVERRIDE,ruleName:r})),i}Vt.validateRuleIsOverridden=GIe;function ex(r,e,t,i){i===void 0&&(i=[]);var n=[],s=Rd(e.definition);if(er.isEmpty(s))return[];var o=r.name,a=er.contains(s,r);a&&n.push({message:t.buildLeftRecursionError({topLevelRule:r,leftRecursionPath:i}),type:No.ParserDefinitionErrorType.LEFT_RECURSION,ruleName:o});var l=er.difference(s,i.concat([r])),c=er.map(l,function(u){var g=er.cloneArr(i);return g.push(u),ex(r,u,t,g)});return n.concat(er.flatten(c))}Vt.validateNoLeftRecursion=ex;function Rd(r){var e=[];if(er.isEmpty(r))return e;var t=er.first(r);if(t instanceof to.NonTerminal)e.push(t.referencedRule);else if(t instanceof to.Alternative||t instanceof to.Option||t instanceof to.RepetitionMandatory||t instanceof to.RepetitionMandatoryWithSeparator||t instanceof to.RepetitionWithSeparator||t instanceof to.Repetition)e=e.concat(Rd(t.definition));else if(t instanceof to.Alternation)e=er.flatten(er.map(t.definition,function(o){return Rd(o.definition)}));else if(!(t instanceof to.Terminal))throw Error("non exhaustive match");var i=(0,_v.isOptionalProd)(t),n=r.length>1;if(i&&n){var s=er.drop(r);return e.concat(Rd(s))}else return e}Vt.getFirstNoneTerminal=Rd;var tx=function(r){Zv(e,r);function e(){var t=r!==null&&r.apply(this,arguments)||this;return t.alternations=[],t}return e.prototype.visitAlternation=function(t){this.alternations.push(t)},e}($v.GAstVisitor);function Aq(r,e){var t=new tx;r.accept(t);var i=t.alternations,n=er.reduce(i,function(s,o){var a=er.dropRight(o.definition),l=er.map(a,function(c,u){var g=(0,UIe.nextPossibleTokensAfter)([c],[],null,1);return er.isEmpty(g)?{message:e.buildEmptyAlternationError({topLevelRule:r,alternation:o,emptyChoiceIdx:u}),type:No.ParserDefinitionErrorType.NONE_LAST_EMPTY_ALT,ruleName:r.name,occurrence:o.idx,alternative:u+1}:null});return s.concat(er.compact(l))},[]);return n}Vt.validateEmptyOrAlternative=Aq;function lq(r,e,t){var i=new tx;r.accept(i);var n=i.alternations;n=(0,br.reject)(n,function(o){return o.ignoreAmbiguities===!0});var s=er.reduce(n,function(o,a){var l=a.idx,c=a.maxLookahead||e,u=(0,tf.getLookaheadPathsForOr)(l,r,c,a),g=YIe(u,a,r,t),f=fq(u,a,r,t);return o.concat(g,f)},[]);return s}Vt.validateAmbiguousAlternationAlternatives=lq;var cq=function(r){Zv(e,r);function e(){var t=r!==null&&r.apply(this,arguments)||this;return t.allProductions=[],t}return e.prototype.visitRepetitionWithSeparator=function(t){this.allProductions.push(t)},e.prototype.visitRepetitionMandatory=function(t){this.allProductions.push(t)},e.prototype.visitRepetitionMandatoryWithSeparator=function(t){this.allProductions.push(t)},e.prototype.visitRepetition=function(t){this.allProductions.push(t)},e}($v.GAstVisitor);Vt.RepetionCollector=cq;function uq(r,e){var t=new tx;r.accept(t);var i=t.alternations,n=er.reduce(i,function(s,o){return o.definition.length>255&&s.push({message:e.buildTooManyAlternativesError({topLevelRule:r,alternation:o}),type:No.ParserDefinitionErrorType.TOO_MANY_ALTS,ruleName:r.name,occurrence:o.idx}),s},[]);return n}Vt.validateTooManyAlts=uq;function gq(r,e,t){var i=[];return(0,br.forEach)(r,function(n){var s=new cq;n.accept(s);var o=s.allProductions;(0,br.forEach)(o,function(a){var l=(0,tf.getProdType)(a),c=a.maxLookahead||e,u=a.idx,g=(0,tf.getLookaheadPathsForOptionalProd)(u,n,l,c),f=g[0];if((0,br.isEmpty)((0,br.flatten)(f))){var h=t.buildEmptyRepetitionError({topLevelRule:n,repetition:a});i.push({message:h,type:No.ParserDefinitionErrorType.NO_NON_EMPTY_LOOKAHEAD,ruleName:n.name})}})}),i}Vt.validateSomeNonEmptyLookaheadPath=gq;function YIe(r,e,t,i){var n=[],s=(0,br.reduce)(r,function(a,l,c){return e.definition[c].ignoreAmbiguities===!0||(0,br.forEach)(l,function(u){var g=[c];(0,br.forEach)(r,function(f,h){c!==h&&(0,tf.containsPath)(f,u)&&e.definition[h].ignoreAmbiguities!==!0&&g.push(h)}),g.length>1&&!(0,tf.containsPath)(n,u)&&(n.push(u),a.push({alts:g,path:u}))}),a},[]),o=er.map(s,function(a){var l=(0,br.map)(a.alts,function(u){return u+1}),c=i.buildAlternationAmbiguityError({topLevelRule:t,alternation:e,ambiguityIndices:l,prefixPath:a.path});return{message:c,type:No.ParserDefinitionErrorType.AMBIGUOUS_ALTS,ruleName:t.name,occurrence:e.idx,alternatives:[a.alts]}});return o}function fq(r,e,t,i){var n=[],s=(0,br.reduce)(r,function(o,a,l){var c=(0,br.map)(a,function(u){return{idx:l,path:u}});return o.concat(c)},[]);return(0,br.forEach)(s,function(o){var a=e.definition[o.idx];if(a.ignoreAmbiguities!==!0){var l=o.idx,c=o.path,u=(0,br.findAll)(s,function(f){return e.definition[f.idx].ignoreAmbiguities!==!0&&f.idx{"use strict";Object.defineProperty(rf,"__esModule",{value:!0});rf.validateGrammar=rf.resolveGrammar=void 0;var ix=Gt(),qIe=Wj(),JIe=rx(),hq=xd();function WIe(r){r=(0,ix.defaults)(r,{errMsgProvider:hq.defaultGrammarResolverErrorProvider});var e={};return(0,ix.forEach)(r.rules,function(t){e[t.name]=t}),(0,qIe.resolveGrammar)(e,r.errMsgProvider)}rf.resolveGrammar=WIe;function zIe(r){return r=(0,ix.defaults)(r,{errMsgProvider:hq.defaultGrammarValidatorErrorProvider}),(0,JIe.validateGrammar)(r.rules,r.maxLookahead,r.tokenTypes,r.errMsgProvider,r.grammarName)}rf.validateGrammar=zIe});var nf=w(En=>{"use strict";var Fd=En&&En.__extends||function(){var r=function(e,t){return r=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(i,n){i.__proto__=n}||function(i,n){for(var s in n)Object.prototype.hasOwnProperty.call(n,s)&&(i[s]=n[s])},r(e,t)};return function(e,t){if(typeof t!="function"&&t!==null)throw new TypeError("Class extends value "+String(t)+" is not a constructor or null");r(e,t);function i(){this.constructor=e}e.prototype=t===null?Object.create(t):(i.prototype=t.prototype,new i)}}();Object.defineProperty(En,"__esModule",{value:!0});En.EarlyExitException=En.NotAllInputParsedException=En.NoViableAltException=En.MismatchedTokenException=En.isRecognitionException=void 0;var VIe=Gt(),dq="MismatchedTokenException",Cq="NoViableAltException",mq="EarlyExitException",Eq="NotAllInputParsedException",Iq=[dq,Cq,mq,Eq];Object.freeze(Iq);function XIe(r){return(0,VIe.contains)(Iq,r.name)}En.isRecognitionException=XIe;var py=function(r){Fd(e,r);function e(t,i){var n=this.constructor,s=r.call(this,t)||this;return s.token=i,s.resyncedTokens=[],Object.setPrototypeOf(s,n.prototype),Error.captureStackTrace&&Error.captureStackTrace(s,s.constructor),s}return e}(Error),ZIe=function(r){Fd(e,r);function e(t,i,n){var s=r.call(this,t,i)||this;return s.previousToken=n,s.name=dq,s}return e}(py);En.MismatchedTokenException=ZIe;var _Ie=function(r){Fd(e,r);function e(t,i,n){var s=r.call(this,t,i)||this;return s.previousToken=n,s.name=Cq,s}return e}(py);En.NoViableAltException=_Ie;var $Ie=function(r){Fd(e,r);function e(t,i){var n=r.call(this,t,i)||this;return n.name=Eq,n}return e}(py);En.NotAllInputParsedException=$Ie;var eye=function(r){Fd(e,r);function e(t,i,n){var s=r.call(this,t,i)||this;return s.previousToken=n,s.name=mq,s}return e}(py);En.EarlyExitException=eye});var sx=w(Ui=>{"use strict";Object.defineProperty(Ui,"__esModule",{value:!0});Ui.attemptInRepetitionRecovery=Ui.Recoverable=Ui.InRuleRecoveryException=Ui.IN_RULE_RECOVERY_EXCEPTION=Ui.EOF_FOLLOW_KEY=void 0;var dy=LA(),hs=Gt(),tye=nf(),rye=Jv(),iye=Yn();Ui.EOF_FOLLOW_KEY={};Ui.IN_RULE_RECOVERY_EXCEPTION="InRuleRecoveryException";function nx(r){this.name=Ui.IN_RULE_RECOVERY_EXCEPTION,this.message=r}Ui.InRuleRecoveryException=nx;nx.prototype=Error.prototype;var nye=function(){function r(){}return r.prototype.initRecoverable=function(e){this.firstAfterRepMap={},this.resyncFollows={},this.recoveryEnabled=(0,hs.has)(e,"recoveryEnabled")?e.recoveryEnabled:iye.DEFAULT_PARSER_CONFIG.recoveryEnabled,this.recoveryEnabled&&(this.attemptInRepetitionRecovery=yq)},r.prototype.getTokenToInsert=function(e){var t=(0,dy.createTokenInstance)(e,"",NaN,NaN,NaN,NaN,NaN,NaN);return t.isInsertedInRecovery=!0,t},r.prototype.canTokenTypeBeInsertedInRecovery=function(e){return!0},r.prototype.tryInRepetitionRecovery=function(e,t,i,n){for(var s=this,o=this.findReSyncTokenType(),a=this.exportLexerState(),l=[],c=!1,u=this.LA(1),g=this.LA(1),f=function(){var h=s.LA(0),p=s.errorMessageProvider.buildMismatchTokenMessage({expected:n,actual:u,previous:h,ruleName:s.getCurrRuleFullName()}),C=new tye.MismatchedTokenException(p,u,s.LA(0));C.resyncedTokens=(0,hs.dropRight)(l),s.SAVE_ERROR(C)};!c;)if(this.tokenMatcher(g,n)){f();return}else if(i.call(this)){f(),e.apply(this,t);return}else this.tokenMatcher(g,o)?c=!0:(g=this.SKIP_TOKEN(),this.addToResyncTokens(g,l));this.importLexerState(a)},r.prototype.shouldInRepetitionRecoveryBeTried=function(e,t,i){return!(i===!1||e===void 0||t===void 0||this.tokenMatcher(this.LA(1),e)||this.isBackTracking()||this.canPerformInRuleRecovery(e,this.getFollowsForInRuleRecovery(e,t)))},r.prototype.getFollowsForInRuleRecovery=function(e,t){var i=this.getCurrentGrammarPath(e,t),n=this.getNextPossibleTokenTypes(i);return n},r.prototype.tryInRuleRecovery=function(e,t){if(this.canRecoverWithSingleTokenInsertion(e,t)){var i=this.getTokenToInsert(e);return i}if(this.canRecoverWithSingleTokenDeletion(e)){var n=this.SKIP_TOKEN();return this.consumeToken(),n}throw new nx("sad sad panda")},r.prototype.canPerformInRuleRecovery=function(e,t){return this.canRecoverWithSingleTokenInsertion(e,t)||this.canRecoverWithSingleTokenDeletion(e)},r.prototype.canRecoverWithSingleTokenInsertion=function(e,t){var i=this;if(!this.canTokenTypeBeInsertedInRecovery(e)||(0,hs.isEmpty)(t))return!1;var n=this.LA(1),s=(0,hs.find)(t,function(o){return i.tokenMatcher(n,o)})!==void 0;return s},r.prototype.canRecoverWithSingleTokenDeletion=function(e){var t=this.tokenMatcher(this.LA(2),e);return t},r.prototype.isInCurrentRuleReSyncSet=function(e){var t=this.getCurrFollowKey(),i=this.getFollowSetFromFollowKey(t);return(0,hs.contains)(i,e)},r.prototype.findReSyncTokenType=function(){for(var e=this.flattenFollowSet(),t=this.LA(1),i=2;;){var n=t.tokenType;if((0,hs.contains)(e,n))return n;t=this.LA(i),i++}},r.prototype.getCurrFollowKey=function(){if(this.RULE_STACK.length===1)return Ui.EOF_FOLLOW_KEY;var e=this.getLastExplicitRuleShortName(),t=this.getLastExplicitRuleOccurrenceIndex(),i=this.getPreviousExplicitRuleShortName();return{ruleName:this.shortRuleNameToFullName(e),idxInCallingRule:t,inRule:this.shortRuleNameToFullName(i)}},r.prototype.buildFullFollowKeyStack=function(){var e=this,t=this.RULE_STACK,i=this.RULE_OCCURRENCE_STACK;return(0,hs.map)(t,function(n,s){return s===0?Ui.EOF_FOLLOW_KEY:{ruleName:e.shortRuleNameToFullName(n),idxInCallingRule:i[s],inRule:e.shortRuleNameToFullName(t[s-1])}})},r.prototype.flattenFollowSet=function(){var e=this,t=(0,hs.map)(this.buildFullFollowKeyStack(),function(i){return e.getFollowSetFromFollowKey(i)});return(0,hs.flatten)(t)},r.prototype.getFollowSetFromFollowKey=function(e){if(e===Ui.EOF_FOLLOW_KEY)return[dy.EOF];var t=e.ruleName+e.idxInCallingRule+rye.IN+e.inRule;return this.resyncFollows[t]},r.prototype.addToResyncTokens=function(e,t){return this.tokenMatcher(e,dy.EOF)||t.push(e),t},r.prototype.reSyncTo=function(e){for(var t=[],i=this.LA(1);this.tokenMatcher(i,e)===!1;)i=this.SKIP_TOKEN(),this.addToResyncTokens(i,t);return(0,hs.dropRight)(t)},r.prototype.attemptInRepetitionRecovery=function(e,t,i,n,s,o,a){},r.prototype.getCurrentGrammarPath=function(e,t){var i=this.getHumanReadableRuleStack(),n=(0,hs.cloneArr)(this.RULE_OCCURRENCE_STACK),s={ruleStack:i,occurrenceStack:n,lastTok:e,lastTokOccurrence:t};return s},r.prototype.getHumanReadableRuleStack=function(){var e=this;return(0,hs.map)(this.RULE_STACK,function(t){return e.shortRuleNameToFullName(t)})},r}();Ui.Recoverable=nye;function yq(r,e,t,i,n,s,o){var a=this.getKeyForAutomaticLookahead(i,n),l=this.firstAfterRepMap[a];if(l===void 0){var c=this.getCurrRuleFullName(),u=this.getGAstProductions()[c],g=new s(u,n);l=g.startWalking(),this.firstAfterRepMap[a]=l}var f=l.token,h=l.occurrence,p=l.isEndOfRule;this.RULE_STACK.length===1&&p&&f===void 0&&(f=dy.EOF,h=1),this.shouldInRepetitionRecoveryBeTried(f,h,o)&&this.tryInRepetitionRecovery(r,e,t,f)}Ui.attemptInRepetitionRecovery=yq});var Cy=w(Jt=>{"use strict";Object.defineProperty(Jt,"__esModule",{value:!0});Jt.getKeyForAutomaticLookahead=Jt.AT_LEAST_ONE_SEP_IDX=Jt.MANY_SEP_IDX=Jt.AT_LEAST_ONE_IDX=Jt.MANY_IDX=Jt.OPTION_IDX=Jt.OR_IDX=Jt.BITS_FOR_ALT_IDX=Jt.BITS_FOR_RULE_IDX=Jt.BITS_FOR_OCCURRENCE_IDX=Jt.BITS_FOR_METHOD_TYPE=void 0;Jt.BITS_FOR_METHOD_TYPE=4;Jt.BITS_FOR_OCCURRENCE_IDX=8;Jt.BITS_FOR_RULE_IDX=12;Jt.BITS_FOR_ALT_IDX=8;Jt.OR_IDX=1<{"use strict";Object.defineProperty(my,"__esModule",{value:!0});my.LooksAhead=void 0;var Da=kd(),ro=Gt(),wq=Yn(),ka=Cy(),Ec=vd(),oye=function(){function r(){}return r.prototype.initLooksAhead=function(e){this.dynamicTokensEnabled=(0,ro.has)(e,"dynamicTokensEnabled")?e.dynamicTokensEnabled:wq.DEFAULT_PARSER_CONFIG.dynamicTokensEnabled,this.maxLookahead=(0,ro.has)(e,"maxLookahead")?e.maxLookahead:wq.DEFAULT_PARSER_CONFIG.maxLookahead,this.lookAheadFuncsCache=(0,ro.isES2015MapSupported)()?new Map:[],(0,ro.isES2015MapSupported)()?(this.getLaFuncFromCache=this.getLaFuncFromMap,this.setLaFuncCache=this.setLaFuncCacheUsingMap):(this.getLaFuncFromCache=this.getLaFuncFromObj,this.setLaFuncCache=this.setLaFuncUsingObj)},r.prototype.preComputeLookaheadFunctions=function(e){var t=this;(0,ro.forEach)(e,function(i){t.TRACE_INIT(i.name+" Rule Lookahead",function(){var n=(0,Ec.collectMethods)(i),s=n.alternation,o=n.repetition,a=n.option,l=n.repetitionMandatory,c=n.repetitionMandatoryWithSeparator,u=n.repetitionWithSeparator;(0,ro.forEach)(s,function(g){var f=g.idx===0?"":g.idx;t.TRACE_INIT(""+(0,Ec.getProductionDslName)(g)+f,function(){var h=(0,Da.buildLookaheadFuncForOr)(g.idx,i,g.maxLookahead||t.maxLookahead,g.hasPredicates,t.dynamicTokensEnabled,t.lookAheadBuilderForAlternatives),p=(0,ka.getKeyForAutomaticLookahead)(t.fullRuleNameToShort[i.name],ka.OR_IDX,g.idx);t.setLaFuncCache(p,h)})}),(0,ro.forEach)(o,function(g){t.computeLookaheadFunc(i,g.idx,ka.MANY_IDX,Da.PROD_TYPE.REPETITION,g.maxLookahead,(0,Ec.getProductionDslName)(g))}),(0,ro.forEach)(a,function(g){t.computeLookaheadFunc(i,g.idx,ka.OPTION_IDX,Da.PROD_TYPE.OPTION,g.maxLookahead,(0,Ec.getProductionDslName)(g))}),(0,ro.forEach)(l,function(g){t.computeLookaheadFunc(i,g.idx,ka.AT_LEAST_ONE_IDX,Da.PROD_TYPE.REPETITION_MANDATORY,g.maxLookahead,(0,Ec.getProductionDslName)(g))}),(0,ro.forEach)(c,function(g){t.computeLookaheadFunc(i,g.idx,ka.AT_LEAST_ONE_SEP_IDX,Da.PROD_TYPE.REPETITION_MANDATORY_WITH_SEPARATOR,g.maxLookahead,(0,Ec.getProductionDslName)(g))}),(0,ro.forEach)(u,function(g){t.computeLookaheadFunc(i,g.idx,ka.MANY_SEP_IDX,Da.PROD_TYPE.REPETITION_WITH_SEPARATOR,g.maxLookahead,(0,Ec.getProductionDslName)(g))})})})},r.prototype.computeLookaheadFunc=function(e,t,i,n,s,o){var a=this;this.TRACE_INIT(""+o+(t===0?"":t),function(){var l=(0,Da.buildLookaheadFuncForOptionalProd)(t,e,s||a.maxLookahead,a.dynamicTokensEnabled,n,a.lookAheadBuilderForOptional),c=(0,ka.getKeyForAutomaticLookahead)(a.fullRuleNameToShort[e.name],i,t);a.setLaFuncCache(c,l)})},r.prototype.lookAheadBuilderForOptional=function(e,t,i){return(0,Da.buildSingleAlternativeLookaheadFunction)(e,t,i)},r.prototype.lookAheadBuilderForAlternatives=function(e,t,i,n){return(0,Da.buildAlternativesLookAheadFunc)(e,t,i,n)},r.prototype.getKeyForAutomaticLookahead=function(e,t){var i=this.getLastExplicitRuleShortName();return(0,ka.getKeyForAutomaticLookahead)(i,e,t)},r.prototype.getLaFuncFromCache=function(e){},r.prototype.getLaFuncFromMap=function(e){return this.lookAheadFuncsCache.get(e)},r.prototype.getLaFuncFromObj=function(e){return this.lookAheadFuncsCache[e]},r.prototype.setLaFuncCache=function(e,t){},r.prototype.setLaFuncCacheUsingMap=function(e,t){this.lookAheadFuncsCache.set(e,t)},r.prototype.setLaFuncUsingObj=function(e,t){this.lookAheadFuncsCache[e]=t},r}();my.LooksAhead=oye});var Qq=w(Lo=>{"use strict";Object.defineProperty(Lo,"__esModule",{value:!0});Lo.addNoneTerminalToCst=Lo.addTerminalToCst=Lo.setNodeLocationFull=Lo.setNodeLocationOnlyOffset=void 0;function aye(r,e){isNaN(r.startOffset)===!0?(r.startOffset=e.startOffset,r.endOffset=e.endOffset):r.endOffset{"use strict";Object.defineProperty(MA,"__esModule",{value:!0});MA.defineNameProp=MA.functionName=MA.classNameFromInstance=void 0;var uye=Gt();function gye(r){return Sq(r.constructor)}MA.classNameFromInstance=gye;var bq="name";function Sq(r){var e=r.name;return e||"anonymous"}MA.functionName=Sq;function fye(r,e){var t=Object.getOwnPropertyDescriptor(r,bq);return(0,uye.isUndefined)(t)||t.configurable?(Object.defineProperty(r,bq,{enumerable:!1,configurable:!0,writable:!1,value:e}),!0):!1}MA.defineNameProp=fye});var kq=w(Si=>{"use strict";Object.defineProperty(Si,"__esModule",{value:!0});Si.validateRedundantMethods=Si.validateMissingCstMethods=Si.validateVisitor=Si.CstVisitorDefinitionError=Si.createBaseVisitorConstructorWithDefaults=Si.createBaseSemanticVisitorConstructor=Si.defaultVisit=void 0;var ps=Gt(),Nd=ox();function vq(r,e){for(var t=(0,ps.keys)(r),i=t.length,n=0;n: `+(""+s.join(` `).replace(/\n/g,` `)))}}};return t.prototype=i,t.prototype.constructor=t,t._RULE_NAMES=e,t}Si.createBaseSemanticVisitorConstructor=hye;function pye(r,e,t){var i=function(){};(0,Nd.defineNameProp)(i,r+"BaseSemanticsWithDefaults");var n=Object.create(t.prototype);return(0,ps.forEach)(e,function(s){n[s]=vq}),i.prototype=n,i.prototype.constructor=i,i}Si.createBaseVisitorConstructorWithDefaults=pye;var ax;(function(r){r[r.REDUNDANT_METHOD=0]="REDUNDANT_METHOD",r[r.MISSING_METHOD=1]="MISSING_METHOD"})(ax=Si.CstVisitorDefinitionError||(Si.CstVisitorDefinitionError={}));function xq(r,e){var t=Pq(r,e),i=Dq(r,e);return t.concat(i)}Si.validateVisitor=xq;function Pq(r,e){var t=(0,ps.map)(e,function(i){if(!(0,ps.isFunction)(r[i]))return{msg:"Missing visitor method: <"+i+"> on "+(0,Nd.functionName)(r.constructor)+" CST Visitor.",type:ax.MISSING_METHOD,methodName:i}});return(0,ps.compact)(t)}Si.validateMissingCstMethods=Pq;var dye=["constructor","visit","validateVisitor"];function Dq(r,e){var t=[];for(var i in r)(0,ps.isFunction)(r[i])&&!(0,ps.contains)(dye,i)&&!(0,ps.contains)(e,i)&&t.push({msg:"Redundant visitor method: <"+i+"> on "+(0,Nd.functionName)(r.constructor)+` CST Visitor There is no Grammar Rule corresponding to this method's name. `,type:ax.REDUNDANT_METHOD,methodName:i});return t}Si.validateRedundantMethods=Dq});var Fq=w(Ey=>{"use strict";Object.defineProperty(Ey,"__esModule",{value:!0});Ey.TreeBuilder=void 0;var sf=Qq(),_r=Gt(),Rq=kq(),Cye=Yn(),mye=function(){function r(){}return r.prototype.initTreeBuilder=function(e){if(this.CST_STACK=[],this.outputCst=e.outputCst,this.nodeLocationTracking=(0,_r.has)(e,"nodeLocationTracking")?e.nodeLocationTracking:Cye.DEFAULT_PARSER_CONFIG.nodeLocationTracking,!this.outputCst)this.cstInvocationStateUpdate=_r.NOOP,this.cstFinallyStateUpdate=_r.NOOP,this.cstPostTerminal=_r.NOOP,this.cstPostNonTerminal=_r.NOOP,this.cstPostRule=_r.NOOP;else if(/full/i.test(this.nodeLocationTracking))this.recoveryEnabled?(this.setNodeLocationFromToken=sf.setNodeLocationFull,this.setNodeLocationFromNode=sf.setNodeLocationFull,this.cstPostRule=_r.NOOP,this.setInitialNodeLocation=this.setInitialNodeLocationFullRecovery):(this.setNodeLocationFromToken=_r.NOOP,this.setNodeLocationFromNode=_r.NOOP,this.cstPostRule=this.cstPostRuleFull,this.setInitialNodeLocation=this.setInitialNodeLocationFullRegular);else if(/onlyOffset/i.test(this.nodeLocationTracking))this.recoveryEnabled?(this.setNodeLocationFromToken=sf.setNodeLocationOnlyOffset,this.setNodeLocationFromNode=sf.setNodeLocationOnlyOffset,this.cstPostRule=_r.NOOP,this.setInitialNodeLocation=this.setInitialNodeLocationOnlyOffsetRecovery):(this.setNodeLocationFromToken=_r.NOOP,this.setNodeLocationFromNode=_r.NOOP,this.cstPostRule=this.cstPostRuleOnlyOffset,this.setInitialNodeLocation=this.setInitialNodeLocationOnlyOffsetRegular);else if(/none/i.test(this.nodeLocationTracking))this.setNodeLocationFromToken=_r.NOOP,this.setNodeLocationFromNode=_r.NOOP,this.cstPostRule=_r.NOOP,this.setInitialNodeLocation=_r.NOOP;else throw Error('Invalid config option: "'+e.nodeLocationTracking+'"')},r.prototype.setInitialNodeLocationOnlyOffsetRecovery=function(e){e.location={startOffset:NaN,endOffset:NaN}},r.prototype.setInitialNodeLocationOnlyOffsetRegular=function(e){e.location={startOffset:this.LA(1).startOffset,endOffset:NaN}},r.prototype.setInitialNodeLocationFullRecovery=function(e){e.location={startOffset:NaN,startLine:NaN,startColumn:NaN,endOffset:NaN,endLine:NaN,endColumn:NaN}},r.prototype.setInitialNodeLocationFullRegular=function(e){var t=this.LA(1);e.location={startOffset:t.startOffset,startLine:t.startLine,startColumn:t.startColumn,endOffset:NaN,endLine:NaN,endColumn:NaN}},r.prototype.cstInvocationStateUpdate=function(e,t){var i={name:e,children:{}};this.setInitialNodeLocation(i),this.CST_STACK.push(i)},r.prototype.cstFinallyStateUpdate=function(){this.CST_STACK.pop()},r.prototype.cstPostRuleFull=function(e){var t=this.LA(0),i=e.location;i.startOffset<=t.startOffset?(i.endOffset=t.endOffset,i.endLine=t.endLine,i.endColumn=t.endColumn):(i.startOffset=NaN,i.startLine=NaN,i.startColumn=NaN)},r.prototype.cstPostRuleOnlyOffset=function(e){var t=this.LA(0),i=e.location;i.startOffset<=t.startOffset?i.endOffset=t.endOffset:i.startOffset=NaN},r.prototype.cstPostTerminal=function(e,t){var i=this.CST_STACK[this.CST_STACK.length-1];(0,sf.addTerminalToCst)(i,t,e),this.setNodeLocationFromToken(i.location,t)},r.prototype.cstPostNonTerminal=function(e,t){var i=this.CST_STACK[this.CST_STACK.length-1];(0,sf.addNoneTerminalToCst)(i,t,e),this.setNodeLocationFromNode(i.location,e.location)},r.prototype.getBaseCstVisitorConstructor=function(){if((0,_r.isUndefined)(this.baseCstVisitorConstructor)){var e=(0,Rq.createBaseSemanticVisitorConstructor)(this.className,(0,_r.keys)(this.gastProductionsCache));return this.baseCstVisitorConstructor=e,e}return this.baseCstVisitorConstructor},r.prototype.getBaseCstVisitorConstructorWithDefaults=function(){if((0,_r.isUndefined)(this.baseCstVisitorWithDefaultsConstructor)){var e=(0,Rq.createBaseVisitorConstructorWithDefaults)(this.className,(0,_r.keys)(this.gastProductionsCache),this.getBaseCstVisitorConstructor());return this.baseCstVisitorWithDefaultsConstructor=e,e}return this.baseCstVisitorWithDefaultsConstructor},r.prototype.getLastExplicitRuleShortName=function(){var e=this.RULE_STACK;return e[e.length-1]},r.prototype.getPreviousExplicitRuleShortName=function(){var e=this.RULE_STACK;return e[e.length-2]},r.prototype.getLastExplicitRuleOccurrenceIndex=function(){var e=this.RULE_OCCURRENCE_STACK;return e[e.length-1]},r}();Ey.TreeBuilder=mye});var Lq=w(Iy=>{"use strict";Object.defineProperty(Iy,"__esModule",{value:!0});Iy.LexerAdapter=void 0;var Nq=Yn(),Eye=function(){function r(){}return r.prototype.initLexerAdapter=function(){this.tokVector=[],this.tokVectorLength=0,this.currIdx=-1},Object.defineProperty(r.prototype,"input",{get:function(){return this.tokVector},set:function(e){if(this.selfAnalysisDone!==!0)throw Error("Missing invocation at the end of the Parser's constructor.");this.reset(),this.tokVector=e,this.tokVectorLength=e.length},enumerable:!1,configurable:!0}),r.prototype.SKIP_TOKEN=function(){return this.currIdx<=this.tokVector.length-2?(this.consumeToken(),this.LA(1)):Nq.END_OF_FILE},r.prototype.LA=function(e){var t=this.currIdx+e;return t<0||this.tokVectorLength<=t?Nq.END_OF_FILE:this.tokVector[t]},r.prototype.consumeToken=function(){this.currIdx++},r.prototype.exportLexerState=function(){return this.currIdx},r.prototype.importLexerState=function(e){this.currIdx=e},r.prototype.resetLexerState=function(){this.currIdx=-1},r.prototype.moveToTerminatedState=function(){this.currIdx=this.tokVector.length-1},r.prototype.getLexerPosition=function(){return this.exportLexerState()},r}();Iy.LexerAdapter=Eye});var Oq=w(yy=>{"use strict";Object.defineProperty(yy,"__esModule",{value:!0});yy.RecognizerApi=void 0;var Tq=Gt(),Iye=nf(),Ax=Yn(),yye=xd(),wye=rx(),Bye=Cn(),Qye=function(){function r(){}return r.prototype.ACTION=function(e){return e.call(this)},r.prototype.consume=function(e,t,i){return this.consumeInternal(t,e,i)},r.prototype.subrule=function(e,t,i){return this.subruleInternal(t,e,i)},r.prototype.option=function(e,t){return this.optionInternal(t,e)},r.prototype.or=function(e,t){return this.orInternal(t,e)},r.prototype.many=function(e,t){return this.manyInternal(e,t)},r.prototype.atLeastOne=function(e,t){return this.atLeastOneInternal(e,t)},r.prototype.CONSUME=function(e,t){return this.consumeInternal(e,0,t)},r.prototype.CONSUME1=function(e,t){return this.consumeInternal(e,1,t)},r.prototype.CONSUME2=function(e,t){return this.consumeInternal(e,2,t)},r.prototype.CONSUME3=function(e,t){return this.consumeInternal(e,3,t)},r.prototype.CONSUME4=function(e,t){return this.consumeInternal(e,4,t)},r.prototype.CONSUME5=function(e,t){return this.consumeInternal(e,5,t)},r.prototype.CONSUME6=function(e,t){return this.consumeInternal(e,6,t)},r.prototype.CONSUME7=function(e,t){return this.consumeInternal(e,7,t)},r.prototype.CONSUME8=function(e,t){return this.consumeInternal(e,8,t)},r.prototype.CONSUME9=function(e,t){return this.consumeInternal(e,9,t)},r.prototype.SUBRULE=function(e,t){return this.subruleInternal(e,0,t)},r.prototype.SUBRULE1=function(e,t){return this.subruleInternal(e,1,t)},r.prototype.SUBRULE2=function(e,t){return this.subruleInternal(e,2,t)},r.prototype.SUBRULE3=function(e,t){return this.subruleInternal(e,3,t)},r.prototype.SUBRULE4=function(e,t){return this.subruleInternal(e,4,t)},r.prototype.SUBRULE5=function(e,t){return this.subruleInternal(e,5,t)},r.prototype.SUBRULE6=function(e,t){return this.subruleInternal(e,6,t)},r.prototype.SUBRULE7=function(e,t){return this.subruleInternal(e,7,t)},r.prototype.SUBRULE8=function(e,t){return this.subruleInternal(e,8,t)},r.prototype.SUBRULE9=function(e,t){return this.subruleInternal(e,9,t)},r.prototype.OPTION=function(e){return this.optionInternal(e,0)},r.prototype.OPTION1=function(e){return this.optionInternal(e,1)},r.prototype.OPTION2=function(e){return this.optionInternal(e,2)},r.prototype.OPTION3=function(e){return this.optionInternal(e,3)},r.prototype.OPTION4=function(e){return this.optionInternal(e,4)},r.prototype.OPTION5=function(e){return this.optionInternal(e,5)},r.prototype.OPTION6=function(e){return this.optionInternal(e,6)},r.prototype.OPTION7=function(e){return this.optionInternal(e,7)},r.prototype.OPTION8=function(e){return this.optionInternal(e,8)},r.prototype.OPTION9=function(e){return this.optionInternal(e,9)},r.prototype.OR=function(e){return this.orInternal(e,0)},r.prototype.OR1=function(e){return this.orInternal(e,1)},r.prototype.OR2=function(e){return this.orInternal(e,2)},r.prototype.OR3=function(e){return this.orInternal(e,3)},r.prototype.OR4=function(e){return this.orInternal(e,4)},r.prototype.OR5=function(e){return this.orInternal(e,5)},r.prototype.OR6=function(e){return this.orInternal(e,6)},r.prototype.OR7=function(e){return this.orInternal(e,7)},r.prototype.OR8=function(e){return this.orInternal(e,8)},r.prototype.OR9=function(e){return this.orInternal(e,9)},r.prototype.MANY=function(e){this.manyInternal(0,e)},r.prototype.MANY1=function(e){this.manyInternal(1,e)},r.prototype.MANY2=function(e){this.manyInternal(2,e)},r.prototype.MANY3=function(e){this.manyInternal(3,e)},r.prototype.MANY4=function(e){this.manyInternal(4,e)},r.prototype.MANY5=function(e){this.manyInternal(5,e)},r.prototype.MANY6=function(e){this.manyInternal(6,e)},r.prototype.MANY7=function(e){this.manyInternal(7,e)},r.prototype.MANY8=function(e){this.manyInternal(8,e)},r.prototype.MANY9=function(e){this.manyInternal(9,e)},r.prototype.MANY_SEP=function(e){this.manySepFirstInternal(0,e)},r.prototype.MANY_SEP1=function(e){this.manySepFirstInternal(1,e)},r.prototype.MANY_SEP2=function(e){this.manySepFirstInternal(2,e)},r.prototype.MANY_SEP3=function(e){this.manySepFirstInternal(3,e)},r.prototype.MANY_SEP4=function(e){this.manySepFirstInternal(4,e)},r.prototype.MANY_SEP5=function(e){this.manySepFirstInternal(5,e)},r.prototype.MANY_SEP6=function(e){this.manySepFirstInternal(6,e)},r.prototype.MANY_SEP7=function(e){this.manySepFirstInternal(7,e)},r.prototype.MANY_SEP8=function(e){this.manySepFirstInternal(8,e)},r.prototype.MANY_SEP9=function(e){this.manySepFirstInternal(9,e)},r.prototype.AT_LEAST_ONE=function(e){this.atLeastOneInternal(0,e)},r.prototype.AT_LEAST_ONE1=function(e){return this.atLeastOneInternal(1,e)},r.prototype.AT_LEAST_ONE2=function(e){this.atLeastOneInternal(2,e)},r.prototype.AT_LEAST_ONE3=function(e){this.atLeastOneInternal(3,e)},r.prototype.AT_LEAST_ONE4=function(e){this.atLeastOneInternal(4,e)},r.prototype.AT_LEAST_ONE5=function(e){this.atLeastOneInternal(5,e)},r.prototype.AT_LEAST_ONE6=function(e){this.atLeastOneInternal(6,e)},r.prototype.AT_LEAST_ONE7=function(e){this.atLeastOneInternal(7,e)},r.prototype.AT_LEAST_ONE8=function(e){this.atLeastOneInternal(8,e)},r.prototype.AT_LEAST_ONE9=function(e){this.atLeastOneInternal(9,e)},r.prototype.AT_LEAST_ONE_SEP=function(e){this.atLeastOneSepFirstInternal(0,e)},r.prototype.AT_LEAST_ONE_SEP1=function(e){this.atLeastOneSepFirstInternal(1,e)},r.prototype.AT_LEAST_ONE_SEP2=function(e){this.atLeastOneSepFirstInternal(2,e)},r.prototype.AT_LEAST_ONE_SEP3=function(e){this.atLeastOneSepFirstInternal(3,e)},r.prototype.AT_LEAST_ONE_SEP4=function(e){this.atLeastOneSepFirstInternal(4,e)},r.prototype.AT_LEAST_ONE_SEP5=function(e){this.atLeastOneSepFirstInternal(5,e)},r.prototype.AT_LEAST_ONE_SEP6=function(e){this.atLeastOneSepFirstInternal(6,e)},r.prototype.AT_LEAST_ONE_SEP7=function(e){this.atLeastOneSepFirstInternal(7,e)},r.prototype.AT_LEAST_ONE_SEP8=function(e){this.atLeastOneSepFirstInternal(8,e)},r.prototype.AT_LEAST_ONE_SEP9=function(e){this.atLeastOneSepFirstInternal(9,e)},r.prototype.RULE=function(e,t,i){if(i===void 0&&(i=Ax.DEFAULT_RULE_CONFIG),(0,Tq.contains)(this.definedRulesNames,e)){var n=yye.defaultGrammarValidatorErrorProvider.buildDuplicateRuleNameError({topLevelRule:e,grammarName:this.className}),s={message:n,type:Ax.ParserDefinitionErrorType.DUPLICATE_RULE_NAME,ruleName:e};this.definitionErrors.push(s)}this.definedRulesNames.push(e);var o=this.defineRule(e,t,i);return this[e]=o,o},r.prototype.OVERRIDE_RULE=function(e,t,i){i===void 0&&(i=Ax.DEFAULT_RULE_CONFIG);var n=[];n=n.concat((0,wye.validateRuleIsOverridden)(e,this.definedRulesNames,this.className)),this.definitionErrors=this.definitionErrors.concat(n);var s=this.defineRule(e,t,i);return this[e]=s,s},r.prototype.BACKTRACK=function(e,t){return function(){this.isBackTrackingStack.push(1);var i=this.saveRecogState();try{return e.apply(this,t),!0}catch(n){if((0,Iye.isRecognitionException)(n))return!1;throw n}finally{this.reloadRecogState(i),this.isBackTrackingStack.pop()}}},r.prototype.getGAstProductions=function(){return this.gastProductionsCache},r.prototype.getSerializedGastProductions=function(){return(0,Bye.serializeGrammar)((0,Tq.values)(this.gastProductionsCache))},r}();yy.RecognizerApi=Qye});var Hq=w(By=>{"use strict";Object.defineProperty(By,"__esModule",{value:!0});By.RecognizerEngine=void 0;var Pr=Gt(),jn=Cy(),wy=nf(),Mq=kd(),of=Dd(),Uq=Yn(),bye=sx(),Kq=LA(),Ld=_g(),Sye=ox(),vye=function(){function r(){}return r.prototype.initRecognizerEngine=function(e,t){if(this.className=(0,Sye.classNameFromInstance)(this),this.shortRuleNameToFull={},this.fullRuleNameToShort={},this.ruleShortNameIdx=256,this.tokenMatcher=Ld.tokenStructuredMatcherNoCategories,this.definedRulesNames=[],this.tokensMap={},this.isBackTrackingStack=[],this.RULE_STACK=[],this.RULE_OCCURRENCE_STACK=[],this.gastProductionsCache={},(0,Pr.has)(t,"serializedGrammar"))throw Error(`The Parser's configuration can no longer contain a property. See: https://chevrotain.io/docs/changes/BREAKING_CHANGES.html#_6-0-0 For Further details.`);if((0,Pr.isArray)(e)){if((0,Pr.isEmpty)(e))throw Error(`A Token Vocabulary cannot be empty. Note that the first argument for the parser constructor is no longer a Token vector (since v4.0).`);if(typeof e[0].startOffset=="number")throw Error(`The Parser constructor no longer accepts a token vector as the first argument. See: https://chevrotain.io/docs/changes/BREAKING_CHANGES.html#_4-0-0 For Further details.`)}if((0,Pr.isArray)(e))this.tokensMap=(0,Pr.reduce)(e,function(o,a){return o[a.name]=a,o},{});else if((0,Pr.has)(e,"modes")&&(0,Pr.every)((0,Pr.flatten)((0,Pr.values)(e.modes)),Ld.isTokenType)){var i=(0,Pr.flatten)((0,Pr.values)(e.modes)),n=(0,Pr.uniq)(i);this.tokensMap=(0,Pr.reduce)(n,function(o,a){return o[a.name]=a,o},{})}else if((0,Pr.isObject)(e))this.tokensMap=(0,Pr.cloneObj)(e);else throw new Error(" argument must be An Array of Token constructors, A dictionary of Token constructors or an IMultiModeLexerDefinition");this.tokensMap.EOF=Kq.EOF;var s=(0,Pr.every)((0,Pr.values)(e),function(o){return(0,Pr.isEmpty)(o.categoryMatches)});this.tokenMatcher=s?Ld.tokenStructuredMatcherNoCategories:Ld.tokenStructuredMatcher,(0,Ld.augmentTokenTypes)((0,Pr.values)(this.tokensMap))},r.prototype.defineRule=function(e,t,i){if(this.selfAnalysisDone)throw Error("Grammar rule <"+e+`> may not be defined after the 'performSelfAnalysis' method has been called' Make sure that all grammar rule definitions are done before 'performSelfAnalysis' is called.`);var n=(0,Pr.has)(i,"resyncEnabled")?i.resyncEnabled:Uq.DEFAULT_RULE_CONFIG.resyncEnabled,s=(0,Pr.has)(i,"recoveryValueFunc")?i.recoveryValueFunc:Uq.DEFAULT_RULE_CONFIG.recoveryValueFunc,o=this.ruleShortNameIdx<t},r.prototype.orInternal=function(e,t){var i=this.getKeyForAutomaticLookahead(jn.OR_IDX,t),n=(0,Pr.isArray)(e)?e:e.DEF,s=this.getLaFuncFromCache(i),o=s.call(this,n);if(o!==void 0){var a=n[o];return a.ALT.call(this)}this.raiseNoAltException(t,e.ERR_MSG)},r.prototype.ruleFinallyStateUpdate=function(){if(this.RULE_STACK.pop(),this.RULE_OCCURRENCE_STACK.pop(),this.cstFinallyStateUpdate(),this.RULE_STACK.length===0&&this.isAtEndOfInput()===!1){var e=this.LA(1),t=this.errorMessageProvider.buildNotAllInputParsedMessage({firstRedundant:e,ruleName:this.getCurrRuleFullName()});this.SAVE_ERROR(new wy.NotAllInputParsedException(t,e))}},r.prototype.subruleInternal=function(e,t,i){var n;try{var s=i!==void 0?i.ARGS:void 0;return n=e.call(this,t,s),this.cstPostNonTerminal(n,i!==void 0&&i.LABEL!==void 0?i.LABEL:e.ruleName),n}catch(o){this.subruleInternalError(o,i,e.ruleName)}},r.prototype.subruleInternalError=function(e,t,i){throw(0,wy.isRecognitionException)(e)&&e.partialCstResult!==void 0&&(this.cstPostNonTerminal(e.partialCstResult,t!==void 0&&t.LABEL!==void 0?t.LABEL:i),delete e.partialCstResult),e},r.prototype.consumeInternal=function(e,t,i){var n;try{var s=this.LA(1);this.tokenMatcher(s,e)===!0?(this.consumeToken(),n=s):this.consumeInternalError(e,s,i)}catch(o){n=this.consumeInternalRecovery(e,t,o)}return this.cstPostTerminal(i!==void 0&&i.LABEL!==void 0?i.LABEL:e.name,n),n},r.prototype.consumeInternalError=function(e,t,i){var n,s=this.LA(0);throw i!==void 0&&i.ERR_MSG?n=i.ERR_MSG:n=this.errorMessageProvider.buildMismatchTokenMessage({expected:e,actual:t,previous:s,ruleName:this.getCurrRuleFullName()}),this.SAVE_ERROR(new wy.MismatchedTokenException(n,t,s))},r.prototype.consumeInternalRecovery=function(e,t,i){if(this.recoveryEnabled&&i.name==="MismatchedTokenException"&&!this.isBackTracking()){var n=this.getFollowsForInRuleRecovery(e,t);try{return this.tryInRuleRecovery(e,n)}catch(s){throw s.name===bye.IN_RULE_RECOVERY_EXCEPTION?i:s}}else throw i},r.prototype.saveRecogState=function(){var e=this.errors,t=(0,Pr.cloneArr)(this.RULE_STACK);return{errors:e,lexerState:this.exportLexerState(),RULE_STACK:t,CST_STACK:this.CST_STACK}},r.prototype.reloadRecogState=function(e){this.errors=e.errors,this.importLexerState(e.lexerState),this.RULE_STACK=e.RULE_STACK},r.prototype.ruleInvocationStateUpdate=function(e,t,i){this.RULE_OCCURRENCE_STACK.push(i),this.RULE_STACK.push(e),this.cstInvocationStateUpdate(t,e)},r.prototype.isBackTracking=function(){return this.isBackTrackingStack.length!==0},r.prototype.getCurrRuleFullName=function(){var e=this.getLastExplicitRuleShortName();return this.shortRuleNameToFull[e]},r.prototype.shortRuleNameToFullName=function(e){return this.shortRuleNameToFull[e]},r.prototype.isAtEndOfInput=function(){return this.tokenMatcher(this.LA(1),Kq.EOF)},r.prototype.reset=function(){this.resetLexerState(),this.isBackTrackingStack=[],this.errors=[],this.RULE_STACK=[],this.CST_STACK=[],this.RULE_OCCURRENCE_STACK=[]},r}();By.RecognizerEngine=vye});var Yq=w(Qy=>{"use strict";Object.defineProperty(Qy,"__esModule",{value:!0});Qy.ErrorHandler=void 0;var lx=nf(),cx=Gt(),Gq=kd(),xye=Yn(),Pye=function(){function r(){}return r.prototype.initErrorHandler=function(e){this._errors=[],this.errorMessageProvider=(0,cx.has)(e,"errorMessageProvider")?e.errorMessageProvider:xye.DEFAULT_PARSER_CONFIG.errorMessageProvider},r.prototype.SAVE_ERROR=function(e){if((0,lx.isRecognitionException)(e))return e.context={ruleStack:this.getHumanReadableRuleStack(),ruleOccurrenceStack:(0,cx.cloneArr)(this.RULE_OCCURRENCE_STACK)},this._errors.push(e),e;throw Error("Trying to save an Error which is not a RecognitionException")},Object.defineProperty(r.prototype,"errors",{get:function(){return(0,cx.cloneArr)(this._errors)},set:function(e){this._errors=e},enumerable:!1,configurable:!0}),r.prototype.raiseEarlyExitException=function(e,t,i){for(var n=this.getCurrRuleFullName(),s=this.getGAstProductions()[n],o=(0,Gq.getLookaheadPathsForOptionalProd)(e,s,t,this.maxLookahead),a=o[0],l=[],c=1;c<=this.maxLookahead;c++)l.push(this.LA(c));var u=this.errorMessageProvider.buildEarlyExitMessage({expectedIterationPaths:a,actual:l,previous:this.LA(0),customUserDescription:i,ruleName:n});throw this.SAVE_ERROR(new lx.EarlyExitException(u,this.LA(1),this.LA(0)))},r.prototype.raiseNoAltException=function(e,t){for(var i=this.getCurrRuleFullName(),n=this.getGAstProductions()[i],s=(0,Gq.getLookaheadPathsForOr)(e,n,this.maxLookahead),o=[],a=1;a<=this.maxLookahead;a++)o.push(this.LA(a));var l=this.LA(0),c=this.errorMessageProvider.buildNoViableAltMessage({expectedPathsPerAlt:s,actual:o,previous:l,customUserDescription:t,ruleName:this.getCurrRuleFullName()});throw this.SAVE_ERROR(new lx.NoViableAltException(c,this.LA(1),l))},r}();Qy.ErrorHandler=Pye});var Jq=w(by=>{"use strict";Object.defineProperty(by,"__esModule",{value:!0});by.ContentAssist=void 0;var jq=Dd(),qq=Gt(),Dye=function(){function r(){}return r.prototype.initContentAssist=function(){},r.prototype.computeContentAssist=function(e,t){var i=this.gastProductionsCache[e];if((0,qq.isUndefined)(i))throw Error("Rule ->"+e+"<- does not exist in this grammar.");return(0,jq.nextPossibleTokensAfter)([i],t,this.tokenMatcher,this.maxLookahead)},r.prototype.getNextPossibleTokenTypes=function(e){var t=(0,qq.first)(e.ruleStack),i=this.getGAstProductions(),n=i[t],s=new jq.NextAfterTokenWalker(n,e).startWalking();return s},r}();by.ContentAssist=Dye});var eJ=w(xy=>{"use strict";Object.defineProperty(xy,"__esModule",{value:!0});xy.GastRecorder=void 0;var In=Gt(),To=Cn(),kye=Bd(),Xq=_g(),Zq=LA(),Rye=Yn(),Fye=Cy(),vy={description:"This Object indicates the Parser is during Recording Phase"};Object.freeze(vy);var Wq=!0,zq=Math.pow(2,Fye.BITS_FOR_OCCURRENCE_IDX)-1,_q=(0,Zq.createToken)({name:"RECORDING_PHASE_TOKEN",pattern:kye.Lexer.NA});(0,Xq.augmentTokenTypes)([_q]);var $q=(0,Zq.createTokenInstance)(_q,`This IToken indicates the Parser is in Recording Phase See: https://chevrotain.io/docs/guide/internals.html#grammar-recording for details`,-1,-1,-1,-1,-1,-1);Object.freeze($q);var Nye={name:`This CSTNode indicates the Parser is in Recording Phase See: https://chevrotain.io/docs/guide/internals.html#grammar-recording for details`,children:{}},Lye=function(){function r(){}return r.prototype.initGastRecorder=function(e){this.recordingProdStack=[],this.RECORDING_PHASE=!1},r.prototype.enableRecording=function(){var e=this;this.RECORDING_PHASE=!0,this.TRACE_INIT("Enable Recording",function(){for(var t=function(n){var s=n>0?n:"";e["CONSUME"+s]=function(o,a){return this.consumeInternalRecord(o,n,a)},e["SUBRULE"+s]=function(o,a){return this.subruleInternalRecord(o,n,a)},e["OPTION"+s]=function(o){return this.optionInternalRecord(o,n)},e["OR"+s]=function(o){return this.orInternalRecord(o,n)},e["MANY"+s]=function(o){this.manyInternalRecord(n,o)},e["MANY_SEP"+s]=function(o){this.manySepFirstInternalRecord(n,o)},e["AT_LEAST_ONE"+s]=function(o){this.atLeastOneInternalRecord(n,o)},e["AT_LEAST_ONE_SEP"+s]=function(o){this.atLeastOneSepFirstInternalRecord(n,o)}},i=0;i<10;i++)t(i);e.consume=function(n,s,o){return this.consumeInternalRecord(s,n,o)},e.subrule=function(n,s,o){return this.subruleInternalRecord(s,n,o)},e.option=function(n,s){return this.optionInternalRecord(s,n)},e.or=function(n,s){return this.orInternalRecord(s,n)},e.many=function(n,s){this.manyInternalRecord(n,s)},e.atLeastOne=function(n,s){this.atLeastOneInternalRecord(n,s)},e.ACTION=e.ACTION_RECORD,e.BACKTRACK=e.BACKTRACK_RECORD,e.LA=e.LA_RECORD})},r.prototype.disableRecording=function(){var e=this;this.RECORDING_PHASE=!1,this.TRACE_INIT("Deleting Recording methods",function(){for(var t=0;t<10;t++){var i=t>0?t:"";delete e["CONSUME"+i],delete e["SUBRULE"+i],delete e["OPTION"+i],delete e["OR"+i],delete e["MANY"+i],delete e["MANY_SEP"+i],delete e["AT_LEAST_ONE"+i],delete e["AT_LEAST_ONE_SEP"+i]}delete e.consume,delete e.subrule,delete e.option,delete e.or,delete e.many,delete e.atLeastOne,delete e.ACTION,delete e.BACKTRACK,delete e.LA})},r.prototype.ACTION_RECORD=function(e){},r.prototype.BACKTRACK_RECORD=function(e,t){return function(){return!0}},r.prototype.LA_RECORD=function(e){return Rye.END_OF_FILE},r.prototype.topLevelRuleRecord=function(e,t){try{var i=new To.Rule({definition:[],name:e});return i.name=e,this.recordingProdStack.push(i),t.call(this),this.recordingProdStack.pop(),i}catch(n){if(n.KNOWN_RECORDER_ERROR!==!0)try{n.message=n.message+` This error was thrown during the "grammar recording phase" For more info see: https://chevrotain.io/docs/guide/internals.html#grammar-recording`}catch{throw n}throw n}},r.prototype.optionInternalRecord=function(e,t){return Td.call(this,To.Option,e,t)},r.prototype.atLeastOneInternalRecord=function(e,t){Td.call(this,To.RepetitionMandatory,t,e)},r.prototype.atLeastOneSepFirstInternalRecord=function(e,t){Td.call(this,To.RepetitionMandatoryWithSeparator,t,e,Wq)},r.prototype.manyInternalRecord=function(e,t){Td.call(this,To.Repetition,t,e)},r.prototype.manySepFirstInternalRecord=function(e,t){Td.call(this,To.RepetitionWithSeparator,t,e,Wq)},r.prototype.orInternalRecord=function(e,t){return Tye.call(this,e,t)},r.prototype.subruleInternalRecord=function(e,t,i){if(Sy(t),!e||(0,In.has)(e,"ruleName")===!1){var n=new Error(" argument is invalid"+(" expecting a Parser method reference but got: <"+JSON.stringify(e)+">")+(` inside top level rule: <`+this.recordingProdStack[0].name+">"));throw n.KNOWN_RECORDER_ERROR=!0,n}var s=(0,In.peek)(this.recordingProdStack),o=e.ruleName,a=new To.NonTerminal({idx:t,nonTerminalName:o,label:i==null?void 0:i.LABEL,referencedRule:void 0});return s.definition.push(a),this.outputCst?Nye:vy},r.prototype.consumeInternalRecord=function(e,t,i){if(Sy(t),!(0,Xq.hasShortKeyProperty)(e)){var n=new Error(" argument is invalid"+(" expecting a TokenType reference but got: <"+JSON.stringify(e)+">")+(` inside top level rule: <`+this.recordingProdStack[0].name+">"));throw n.KNOWN_RECORDER_ERROR=!0,n}var s=(0,In.peek)(this.recordingProdStack),o=new To.Terminal({idx:t,terminalType:e,label:i==null?void 0:i.LABEL});return s.definition.push(o),$q},r}();xy.GastRecorder=Lye;function Td(r,e,t,i){i===void 0&&(i=!1),Sy(t);var n=(0,In.peek)(this.recordingProdStack),s=(0,In.isFunction)(e)?e:e.DEF,o=new r({definition:[],idx:t});return i&&(o.separator=e.SEP),(0,In.has)(e,"MAX_LOOKAHEAD")&&(o.maxLookahead=e.MAX_LOOKAHEAD),this.recordingProdStack.push(o),s.call(this),n.definition.push(o),this.recordingProdStack.pop(),vy}function Tye(r,e){var t=this;Sy(e);var i=(0,In.peek)(this.recordingProdStack),n=(0,In.isArray)(r)===!1,s=n===!1?r:r.DEF,o=new To.Alternation({definition:[],idx:e,ignoreAmbiguities:n&&r.IGNORE_AMBIGUITIES===!0});(0,In.has)(r,"MAX_LOOKAHEAD")&&(o.maxLookahead=r.MAX_LOOKAHEAD);var a=(0,In.some)(s,function(l){return(0,In.isFunction)(l.GATE)});return o.hasPredicates=a,i.definition.push(o),(0,In.forEach)(s,function(l){var c=new To.Alternative({definition:[]});o.definition.push(c),(0,In.has)(l,"IGNORE_AMBIGUITIES")?c.ignoreAmbiguities=l.IGNORE_AMBIGUITIES:(0,In.has)(l,"GATE")&&(c.ignoreAmbiguities=!0),t.recordingProdStack.push(c),l.ALT.call(t),t.recordingProdStack.pop()}),vy}function Vq(r){return r===0?"":""+r}function Sy(r){if(r<0||r>zq){var e=new Error("Invalid DSL Method idx value: <"+r+`> `+("Idx value must be a none negative value smaller than "+(zq+1)));throw e.KNOWN_RECORDER_ERROR=!0,e}}});var rJ=w(Py=>{"use strict";Object.defineProperty(Py,"__esModule",{value:!0});Py.PerformanceTracer=void 0;var tJ=Gt(),Oye=Yn(),Mye=function(){function r(){}return r.prototype.initPerformanceTracer=function(e){if((0,tJ.has)(e,"traceInitPerf")){var t=e.traceInitPerf,i=typeof t=="number";this.traceInitMaxIdent=i?t:1/0,this.traceInitPerf=i?t>0:t}else this.traceInitMaxIdent=0,this.traceInitPerf=Oye.DEFAULT_PARSER_CONFIG.traceInitPerf;this.traceInitIndent=-1},r.prototype.TRACE_INIT=function(e,t){if(this.traceInitPerf===!0){this.traceInitIndent++;var i=new Array(this.traceInitIndent+1).join(" ");this.traceInitIndent <"+e+">");var n=(0,tJ.timer)(t),s=n.time,o=n.value,a=s>10?console.warn:console.log;return this.traceInitIndent time: "+s+"ms"),this.traceInitIndent--,o}else return t()},r}();Py.PerformanceTracer=Mye});var iJ=w(Dy=>{"use strict";Object.defineProperty(Dy,"__esModule",{value:!0});Dy.applyMixins=void 0;function Uye(r,e){e.forEach(function(t){var i=t.prototype;Object.getOwnPropertyNames(i).forEach(function(n){if(n!=="constructor"){var s=Object.getOwnPropertyDescriptor(i,n);s&&(s.get||s.set)?Object.defineProperty(r.prototype,n,s):r.prototype[n]=t.prototype[n]}})})}Dy.applyMixins=Uye});var Yn=w(dr=>{"use strict";var oJ=dr&&dr.__extends||function(){var r=function(e,t){return r=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(i,n){i.__proto__=n}||function(i,n){for(var s in n)Object.prototype.hasOwnProperty.call(n,s)&&(i[s]=n[s])},r(e,t)};return function(e,t){if(typeof t!="function"&&t!==null)throw new TypeError("Class extends value "+String(t)+" is not a constructor or null");r(e,t);function i(){this.constructor=e}e.prototype=t===null?Object.create(t):(i.prototype=t.prototype,new i)}}();Object.defineProperty(dr,"__esModule",{value:!0});dr.EmbeddedActionsParser=dr.CstParser=dr.Parser=dr.EMPTY_ALT=dr.ParserDefinitionErrorType=dr.DEFAULT_RULE_CONFIG=dr.DEFAULT_PARSER_CONFIG=dr.END_OF_FILE=void 0;var $i=Gt(),Kye=Yj(),nJ=LA(),aJ=xd(),sJ=pq(),Hye=sx(),Gye=Bq(),Yye=Fq(),jye=Lq(),qye=Oq(),Jye=Hq(),Wye=Yq(),zye=Jq(),Vye=eJ(),Xye=rJ(),Zye=iJ();dr.END_OF_FILE=(0,nJ.createTokenInstance)(nJ.EOF,"",NaN,NaN,NaN,NaN,NaN,NaN);Object.freeze(dr.END_OF_FILE);dr.DEFAULT_PARSER_CONFIG=Object.freeze({recoveryEnabled:!1,maxLookahead:3,dynamicTokensEnabled:!1,outputCst:!0,errorMessageProvider:aJ.defaultParserErrorProvider,nodeLocationTracking:"none",traceInitPerf:!1,skipValidations:!1});dr.DEFAULT_RULE_CONFIG=Object.freeze({recoveryValueFunc:function(){},resyncEnabled:!0});var _ye;(function(r){r[r.INVALID_RULE_NAME=0]="INVALID_RULE_NAME",r[r.DUPLICATE_RULE_NAME=1]="DUPLICATE_RULE_NAME",r[r.INVALID_RULE_OVERRIDE=2]="INVALID_RULE_OVERRIDE",r[r.DUPLICATE_PRODUCTIONS=3]="DUPLICATE_PRODUCTIONS",r[r.UNRESOLVED_SUBRULE_REF=4]="UNRESOLVED_SUBRULE_REF",r[r.LEFT_RECURSION=5]="LEFT_RECURSION",r[r.NONE_LAST_EMPTY_ALT=6]="NONE_LAST_EMPTY_ALT",r[r.AMBIGUOUS_ALTS=7]="AMBIGUOUS_ALTS",r[r.CONFLICT_TOKENS_RULES_NAMESPACE=8]="CONFLICT_TOKENS_RULES_NAMESPACE",r[r.INVALID_TOKEN_NAME=9]="INVALID_TOKEN_NAME",r[r.NO_NON_EMPTY_LOOKAHEAD=10]="NO_NON_EMPTY_LOOKAHEAD",r[r.AMBIGUOUS_PREFIX_ALTS=11]="AMBIGUOUS_PREFIX_ALTS",r[r.TOO_MANY_ALTS=12]="TOO_MANY_ALTS"})(_ye=dr.ParserDefinitionErrorType||(dr.ParserDefinitionErrorType={}));function $ye(r){return r===void 0&&(r=void 0),function(){return r}}dr.EMPTY_ALT=$ye;var ky=function(){function r(e,t){this.definitionErrors=[],this.selfAnalysisDone=!1;var i=this;if(i.initErrorHandler(t),i.initLexerAdapter(),i.initLooksAhead(t),i.initRecognizerEngine(e,t),i.initRecoverable(t),i.initTreeBuilder(t),i.initContentAssist(),i.initGastRecorder(t),i.initPerformanceTracer(t),(0,$i.has)(t,"ignoredIssues"))throw new Error(`The IParserConfig property has been deprecated. Please use the flag on the relevant DSL method instead. See: https://chevrotain.io/docs/guide/resolving_grammar_errors.html#IGNORING_AMBIGUITIES For further details.`);this.skipValidations=(0,$i.has)(t,"skipValidations")?t.skipValidations:dr.DEFAULT_PARSER_CONFIG.skipValidations}return r.performSelfAnalysis=function(e){throw Error("The **static** `performSelfAnalysis` method has been deprecated. \nUse the **instance** method with the same name instead.")},r.prototype.performSelfAnalysis=function(){var e=this;this.TRACE_INIT("performSelfAnalysis",function(){var t;e.selfAnalysisDone=!0;var i=e.className;e.TRACE_INIT("toFastProps",function(){(0,$i.toFastProperties)(e)}),e.TRACE_INIT("Grammar Recording",function(){try{e.enableRecording(),(0,$i.forEach)(e.definedRulesNames,function(s){var o=e[s],a=o.originalGrammarAction,l=void 0;e.TRACE_INIT(s+" Rule",function(){l=e.topLevelRuleRecord(s,a)}),e.gastProductionsCache[s]=l})}finally{e.disableRecording()}});var n=[];if(e.TRACE_INIT("Grammar Resolving",function(){n=(0,sJ.resolveGrammar)({rules:(0,$i.values)(e.gastProductionsCache)}),e.definitionErrors=e.definitionErrors.concat(n)}),e.TRACE_INIT("Grammar Validations",function(){if((0,$i.isEmpty)(n)&&e.skipValidations===!1){var s=(0,sJ.validateGrammar)({rules:(0,$i.values)(e.gastProductionsCache),maxLookahead:e.maxLookahead,tokenTypes:(0,$i.values)(e.tokensMap),errMsgProvider:aJ.defaultGrammarValidatorErrorProvider,grammarName:i});e.definitionErrors=e.definitionErrors.concat(s)}}),(0,$i.isEmpty)(e.definitionErrors)&&(e.recoveryEnabled&&e.TRACE_INIT("computeAllProdsFollows",function(){var s=(0,Kye.computeAllProdsFollows)((0,$i.values)(e.gastProductionsCache));e.resyncFollows=s}),e.TRACE_INIT("ComputeLookaheadFunctions",function(){e.preComputeLookaheadFunctions((0,$i.values)(e.gastProductionsCache))})),!r.DEFER_DEFINITION_ERRORS_HANDLING&&!(0,$i.isEmpty)(e.definitionErrors))throw t=(0,$i.map)(e.definitionErrors,function(s){return s.message}),new Error(`Parser Definition Errors detected: `+t.join(` ------------------------------- `))})},r.DEFER_DEFINITION_ERRORS_HANDLING=!1,r}();dr.Parser=ky;(0,Zye.applyMixins)(ky,[Hye.Recoverable,Gye.LooksAhead,Yye.TreeBuilder,jye.LexerAdapter,Jye.RecognizerEngine,qye.RecognizerApi,Wye.ErrorHandler,zye.ContentAssist,Vye.GastRecorder,Xye.PerformanceTracer]);var ewe=function(r){oJ(e,r);function e(t,i){i===void 0&&(i=dr.DEFAULT_PARSER_CONFIG);var n=this,s=(0,$i.cloneObj)(i);return s.outputCst=!0,n=r.call(this,t,s)||this,n}return e}(ky);dr.CstParser=ewe;var twe=function(r){oJ(e,r);function e(t,i){i===void 0&&(i=dr.DEFAULT_PARSER_CONFIG);var n=this,s=(0,$i.cloneObj)(i);return s.outputCst=!1,n=r.call(this,t,s)||this,n}return e}(ky);dr.EmbeddedActionsParser=twe});var lJ=w(Ry=>{"use strict";Object.defineProperty(Ry,"__esModule",{value:!0});Ry.createSyntaxDiagramsCode=void 0;var AJ=Dv();function rwe(r,e){var t=e===void 0?{}:e,i=t.resourceBase,n=i===void 0?"https://unpkg.com/chevrotain@"+AJ.VERSION+"/diagrams/":i,s=t.css,o=s===void 0?"https://unpkg.com/chevrotain@"+AJ.VERSION+"/diagrams/diagrams.css":s,a=` `,l=` `,c=` ================================================ FILE: tokenizers/examples/unstable_wasm/www/index.js ================================================ import * as wasm from "unstable_wasm"; console.log(wasm.tokenize("ab")); console.log(wasm.tokenize("abc")); ================================================ FILE: tokenizers/examples/unstable_wasm/www/package.json ================================================ { "name": "create-wasm-app", "version": "0.1.0", "description": "create an app to consume rust-generated wasm packages", "main": "index.js", "bin": { "create-wasm-app": ".bin/create-wasm-app.js" }, "scripts": { "build": "webpack --config webpack.config.js", "start": "NODE_OPTIONS=--openssl-legacy-provider webpack-dev-server" }, "repository": { "type": "git", "url": "git+https://github.com/rustwasm/create-wasm-app.git" }, "keywords": ["webassembly", "wasm", "rust", "webpack"], "author": "Ashley Williams ", "license": "(MIT OR Apache-2.0)", "bugs": { "url": "https://github.com/rustwasm/create-wasm-app/issues" }, "homepage": "https://github.com/rustwasm/create-wasm-app#readme", "devDependencies": { "copy-webpack-plugin": "^11.0.0", "webpack": "^5.75.0", "webpack-cli": "^5.0.1", "webpack-dev-server": "^5.2.1" }, "dependencies": { "unstable_wasm": "file:../pkg" } } ================================================ FILE: tokenizers/examples/unstable_wasm/www/webpack.config.js ================================================ const CopyWebpackPlugin = require("copy-webpack-plugin"); const path = require('path'); module.exports = { entry: "./bootstrap.js", output: { path: path.resolve(__dirname, "dist"), filename: "bootstrap.js", }, mode: "development", plugins: [ new CopyWebpackPlugin(['index.html']) ], }; ================================================ FILE: tokenizers/rust-toolchain ================================================ stable ================================================ FILE: tokenizers/src/decoders/bpe.rs ================================================ use crate::tokenizer::{Decoder, Result}; use serde::{Deserialize, Serialize}; #[derive(Deserialize, Clone, Debug, Serialize)] /// Allows decoding Original BPE by joining all the tokens and then replacing /// the suffix used to identify end-of-words by whitespaces #[serde(tag = "type")] #[non_exhaustive] pub struct BPEDecoder { pub suffix: String, } impl BPEDecoder { pub fn new(suffix: String) -> Self { Self { suffix } } } impl Default for BPEDecoder { fn default() -> Self { Self::new("".into()) } } impl Decoder for BPEDecoder { fn decode_chain(&self, tokens: Vec) -> Result> { let n = tokens.len() - 1; Ok(tokens .into_iter() .enumerate() .map(|(i, token)| { let replacement = if i == n { "" } else { " " }; token.replace(&self.suffix, replacement) }) .collect()) } } ================================================ FILE: tokenizers/src/decoders/byte_fallback.rs ================================================ use crate::tokenizer::{Decoder, Result}; use monostate::MustBe; use serde::{Deserialize, Serialize}; #[derive(Deserialize, Clone, Debug, Serialize, Default)] /// ByteFallback is a simple trick which converts tokens looking like `<0x61>` /// to pure bytes, and attempts to make them into a string. If the tokens /// cannot be decoded you will get � instead for each inconvertible byte token #[non_exhaustive] pub struct ByteFallback { #[serde(rename = "type")] type_: MustBe!("ByteFallback"), } impl ByteFallback { pub fn new() -> Self { Self { type_: MustBe!("ByteFallback"), } } } impl Decoder for ByteFallback { fn decode_chain(&self, tokens: Vec) -> Result> { let mut new_tokens: Vec = vec![]; let mut previous_byte_tokens: Vec = vec![]; for token in tokens { let bytes = if token.len() == 6 && token.starts_with("<0x") && token.ends_with('>') { u8::from_str_radix(&token[3..5], 16).ok() } else { None }; if let Some(bytes) = bytes { previous_byte_tokens.push(bytes); } else { if !previous_byte_tokens.is_empty() { if let Ok(string) = String::from_utf8(previous_byte_tokens.clone()) { new_tokens.push(string); } else { for _ in 0..previous_byte_tokens.len() { new_tokens.push("�".into()); } } previous_byte_tokens.clear(); } new_tokens.push(token); } } if !previous_byte_tokens.is_empty() { if let Ok(string) = String::from_utf8(previous_byte_tokens.clone()) { new_tokens.push(string); } else { for _ in 0..previous_byte_tokens.len() { new_tokens.push("�".into()); } } } Ok(new_tokens) } } #[cfg(test)] mod tests { use super::*; #[test] fn decode() { let decoder = ByteFallback::new(); let res = decoder .decode_chain(vec!["Hey".into(), "friend!".into()]) .unwrap(); assert_eq!(res, vec!["Hey", "friend!"]); let res = decoder.decode_chain(vec!["<0x61>".into()]).unwrap(); assert_eq!(res, vec!["a"]); let res = decoder.decode_chain(vec!["<0xE5>".into()]).unwrap(); assert_eq!(res, vec!["�"]); let res = decoder .decode_chain(vec!["<0xE5>".into(), "<0x8f>".into()]) .unwrap(); assert_eq!(res, vec!["�", "�"]); // 叫 let res = decoder .decode_chain(vec!["<0xE5>".into(), "<0x8f>".into(), "<0xab>".into()]) .unwrap(); assert_eq!(res, vec!["叫"]); let res = decoder .decode_chain(vec![ "<0xE5>".into(), "<0x8f>".into(), "<0xab>".into(), "a".into(), ]) .unwrap(); assert_eq!(res, vec!["叫", "a"]); let res = decoder .decode_chain(vec!["<0xE5>".into(), "<0x8f>".into(), "a".into()]) .unwrap(); assert_eq!(res, vec!["�", "�", "a"]); } } ================================================ FILE: tokenizers/src/decoders/ctc.rs ================================================ use crate::decoders::wordpiece; use crate::tokenizer::{Decoder, Result}; use itertools::Itertools; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Serialize, Deserialize)] /// The CTC (Connectionist Temporal Classification) decoder takes care /// of sanitizing a list of inputs token. /// Due to some alignment problem the output of some models can come /// with duplicated token. #[serde(tag = "type")] #[non_exhaustive] pub struct CTC { /// The pad token used by CTC to delimit a new token. pub pad_token: String, /// The word delimiter token. It will be replaced by a ``. pub word_delimiter_token: String, /// Whether to cleanup some tokenization artifacts. /// Mainly spaces before punctuation, and some abbreviated english forms. pub cleanup: bool, } impl CTC { pub fn new(pad_token: String, word_delimiter_token: String, cleanup: bool) -> Self { Self { pad_token, word_delimiter_token, cleanup, } } } impl Default for CTC { fn default() -> Self { Self { pad_token: "".to_string(), word_delimiter_token: "|".to_string(), cleanup: true, } } } impl Decoder for CTC { fn decode_chain(&self, tokens: Vec) -> Result> { Ok(tokens .into_iter() .dedup() .filter_map(|token| { let mut replaced = token.replace(&self.pad_token, ""); if self.cleanup { replaced = wordpiece::cleanup(&replaced).replace(&self.word_delimiter_token, " "); } if replaced.is_empty() { None } else { Some(replaced) } }) .collect()) } } #[cfg(test)] mod tests { use super::*; #[test] fn handmade_sample() { let ctc_decoder = CTC::default(); let id_to_string_result = " h e e l l l o o o " .split(' ') .map(|s| s.to_string()) .collect(); assert_eq!( ctc_decoder.decode_chain(id_to_string_result).unwrap(), vec!["h", "e", "l", "l", "o"] ); } #[test] fn handmade_with_delimiter_sample() { let ctc_decoder = CTC::default(); let id_to_string_result = " h e e l l l o o o | w o o o r l l d " .split(' ') .map(|s| s.to_string()) .collect(); assert_eq!( ctc_decoder.decode_chain(id_to_string_result).unwrap(), vec!["h", "e", "l", "l", "o", " ", "w", "o", "r", "l", "d"] ); } #[test] fn librispeech_sample() { let ctc_decoder = CTC::default(); let id_to_string_result = " A | | M A N | | | S A I D D | | T T O | | T H E E | | | U U N N I V E R R S E E | | S S I R R | | | I | E X I S T | | ".split(' ').map(|s| s.to_string()).collect(); assert_eq!( ctc_decoder.decode_chain(id_to_string_result).unwrap(), vec![ "A", " ", "M", "A", "N", " ", "S", "A", "I", "D", " ", "T", "O", " ", "T", "H", "E", " ", "U", "N", "I", "V", "E", "R", "S", "E", " ", "S", "I", "R", " ", "I", " ", "E", "X", "I", "S", "T", " " ] ); } #[test] fn another_librispeech_sample() { let ctc_decoder = CTC::default(); let id_to_string_result = " H I S S | | I N S T T A N C C T | | | | | P A N N N I C | | W A S | | F O L L L O O W E E D | | B Y | | | A | | S S S M M A L L L | | | S H H A R R P | B L L O W W | | | H I G H H | | O N | | H I S S | | C H H E S S T T | | | ".split(' ').map(|s| s.to_string()).collect(); assert_eq!( ctc_decoder.decode_chain(id_to_string_result).unwrap(), vec![ "H", "I", "S", " ", "I", "N", "S", "T", "A", "N", "C", "T", " ", "P", "A", "N", "I", "C", " ", "W", "A", "S", " ", "F", "O", "L", "L", "O", "W", "E", "D", " ", "B", "Y", " ", "A", " ", "S", "M", "A", "L", "L", " ", "S", "H", "A", "R", "P", " ", "B", "L", "O", "W", " ", "H", "I", "G", "H", " ", "O", "N", " ", "H", "I", "S", " ", "C", "H", "E", "S", "T", " " ] ); } } ================================================ FILE: tokenizers/src/decoders/fuse.rs ================================================ use crate::tokenizer::{Decoder, Result}; use monostate::MustBe; use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, Serialize, Deserialize, Default)] /// Fuse simply fuses all tokens into one big string. /// It's usually the last decoding step anyway, but this /// decoder exists incase some decoders need to happen after that /// step #[non_exhaustive] pub struct Fuse { #[serde(rename = "type")] type_: MustBe!("Fuse"), } impl Fuse { pub fn new() -> Self { Self { type_: MustBe!("Fuse"), } } } impl Decoder for Fuse { fn decode_chain(&self, tokens: Vec) -> Result> { let new_string = tokens.join(""); Ok(vec![new_string]) } } #[cfg(test)] mod tests { use super::*; #[test] fn decode() { let decoder = Fuse::new(); let res = decoder .decode_chain(vec!["Hey".into(), " friend!".into()]) .unwrap(); assert_eq!(res, vec!["Hey friend!"]); } } ================================================ FILE: tokenizers/src/decoders/mod.rs ================================================ pub mod bpe; pub mod byte_fallback; pub mod ctc; pub mod fuse; pub mod sequence; pub mod strip; pub mod wordpiece; // Re-export these as decoders pub use super::pre_tokenizers::byte_level; pub use super::pre_tokenizers::metaspace; use serde::{Deserialize, Deserializer, Serialize}; use crate::decoders::bpe::BPEDecoder; use crate::decoders::byte_fallback::ByteFallback; use crate::decoders::ctc::CTC; use crate::decoders::fuse::Fuse; use crate::decoders::sequence::Sequence; use crate::decoders::strip::Strip; use crate::decoders::wordpiece::WordPiece; use crate::normalizers::replace::Replace; use crate::pre_tokenizers::byte_level::ByteLevel; use crate::pre_tokenizers::metaspace::Metaspace; use crate::{Decoder, Result}; #[derive(Serialize, Clone, Debug)] #[serde(untagged)] pub enum DecoderWrapper { BPE(BPEDecoder), ByteLevel(ByteLevel), WordPiece(WordPiece), Metaspace(Metaspace), CTC(CTC), Sequence(Sequence), Replace(Replace), Fuse(Fuse), Strip(Strip), ByteFallback(ByteFallback), } impl<'de> Deserialize<'de> for DecoderWrapper { fn deserialize(deserializer: D) -> std::result::Result where D: Deserializer<'de>, { #[derive(Deserialize)] pub struct Tagged { #[serde(rename = "type")] variant: EnumType, #[serde(flatten)] rest: serde_json::Value, } #[derive(Serialize, Deserialize)] pub enum EnumType { BPEDecoder, ByteLevel, WordPiece, Metaspace, CTC, Sequence, Replace, Fuse, Strip, ByteFallback, } #[derive(Deserialize)] #[serde(untagged)] pub enum DecoderHelper { Tagged(Tagged), Legacy(serde_json::Value), } #[derive(Deserialize)] #[serde(untagged)] pub enum DecoderUntagged { BPE(BPEDecoder), ByteLevel(ByteLevel), WordPiece(WordPiece), Metaspace(Metaspace), CTC(CTC), Sequence(Sequence), Replace(Replace), Fuse(Fuse), Strip(Strip), ByteFallback(ByteFallback), } let helper = DecoderHelper::deserialize(deserializer).expect("Helper"); Ok(match helper { DecoderHelper::Tagged(model) => { let mut values: serde_json::Map = serde_json::from_value(model.rest).map_err(serde::de::Error::custom)?; values.insert( "type".to_string(), serde_json::to_value(&model.variant).map_err(serde::de::Error::custom)?, ); let values = serde_json::Value::Object(values); match model.variant { EnumType::BPEDecoder => DecoderWrapper::BPE( serde_json::from_value(values).map_err(serde::de::Error::custom)?, ), EnumType::ByteLevel => DecoderWrapper::ByteLevel( serde_json::from_value(values).map_err(serde::de::Error::custom)?, ), EnumType::WordPiece => DecoderWrapper::WordPiece( serde_json::from_value(values).map_err(serde::de::Error::custom)?, ), EnumType::Metaspace => DecoderWrapper::Metaspace( serde_json::from_value(values).map_err(serde::de::Error::custom)?, ), EnumType::CTC => DecoderWrapper::CTC( serde_json::from_value(values).map_err(serde::de::Error::custom)?, ), EnumType::Sequence => DecoderWrapper::Sequence( serde_json::from_value(values).map_err(serde::de::Error::custom)?, ), EnumType::Replace => DecoderWrapper::Replace( serde_json::from_value(values).map_err(serde::de::Error::custom)?, ), EnumType::Fuse => DecoderWrapper::Fuse( serde_json::from_value(values).map_err(serde::de::Error::custom)?, ), EnumType::Strip => DecoderWrapper::Strip( serde_json::from_value(values).map_err(serde::de::Error::custom)?, ), EnumType::ByteFallback => DecoderWrapper::ByteFallback( serde_json::from_value(values).map_err(serde::de::Error::custom)?, ), } } DecoderHelper::Legacy(value) => { let untagged = serde_json::from_value(value).map_err(serde::de::Error::custom)?; match untagged { DecoderUntagged::BPE(dec) => DecoderWrapper::BPE(dec), DecoderUntagged::ByteLevel(dec) => DecoderWrapper::ByteLevel(dec), DecoderUntagged::WordPiece(dec) => DecoderWrapper::WordPiece(dec), DecoderUntagged::Metaspace(dec) => DecoderWrapper::Metaspace(dec), DecoderUntagged::CTC(dec) => DecoderWrapper::CTC(dec), DecoderUntagged::Sequence(dec) => DecoderWrapper::Sequence(dec), DecoderUntagged::Replace(dec) => DecoderWrapper::Replace(dec), DecoderUntagged::Fuse(dec) => DecoderWrapper::Fuse(dec), DecoderUntagged::Strip(dec) => DecoderWrapper::Strip(dec), DecoderUntagged::ByteFallback(dec) => DecoderWrapper::ByteFallback(dec), } } }) } } impl Decoder for DecoderWrapper { fn decode_chain(&self, tokens: Vec) -> Result> { match self { Self::BPE(bpe) => bpe.decode_chain(tokens), Self::ByteLevel(bl) => bl.decode_chain(tokens), Self::Metaspace(ms) => ms.decode_chain(tokens), Self::WordPiece(wp) => wp.decode_chain(tokens), Self::CTC(ctc) => ctc.decode_chain(tokens), Self::Sequence(seq) => seq.decode_chain(tokens), Self::Replace(seq) => seq.decode_chain(tokens), Self::ByteFallback(bf) => bf.decode_chain(tokens), Self::Strip(bf) => bf.decode_chain(tokens), Self::Fuse(bf) => bf.decode_chain(tokens), } } } impl_enum_from!(BPEDecoder, DecoderWrapper, BPE); impl_enum_from!(ByteLevel, DecoderWrapper, ByteLevel); impl_enum_from!(ByteFallback, DecoderWrapper, ByteFallback); impl_enum_from!(Fuse, DecoderWrapper, Fuse); impl_enum_from!(Strip, DecoderWrapper, Strip); impl_enum_from!(Metaspace, DecoderWrapper, Metaspace); impl_enum_from!(WordPiece, DecoderWrapper, WordPiece); impl_enum_from!(CTC, DecoderWrapper, CTC); impl_enum_from!(Sequence, DecoderWrapper, Sequence); impl_enum_from!(Replace, DecoderWrapper, Replace); #[cfg(test)] mod tests { use super::*; #[test] fn decoder_serialization() { let oldjson = r#"{"type":"Sequence","decoders":[{"type":"ByteFallback"},{"type":"Metaspace","replacement":"▁","add_prefix_space":true,"prepend_scheme":"always"}]}"#; let olddecoder: DecoderWrapper = serde_json::from_str(oldjson).unwrap(); let oldserialized = serde_json::to_string(&olddecoder).unwrap(); let json = r#"{"type":"Sequence","decoders":[{"type":"ByteFallback"},{"type":"Metaspace","replacement":"▁","prepend_scheme":"always","split":true}]}"#; assert_eq!(oldserialized, json); let decoder: DecoderWrapper = serde_json::from_str(json).unwrap(); let serialized = serde_json::to_string(&decoder).unwrap(); assert_eq!(serialized, json); } #[test] fn decoder_serialization_other_no_arg() { let json = r#"{"type":"Sequence","decoders":[{"type":"Fuse"},{"type":"Metaspace","replacement":"▁","prepend_scheme":"always","split":true}]}"#; let decoder: DecoderWrapper = serde_json::from_str(json).unwrap(); let serialized = serde_json::to_string(&decoder).unwrap(); assert_eq!(serialized, json); } #[test] fn decoder_serialization_no_decode() { let json = r#"{"type":"Sequence","decoders":[{},{"type":"Metaspace","replacement":"▁","prepend_scheme":"always"}]}"#; let parse = serde_json::from_str::(json); match parse { Err(err) => assert_eq!( format!("{err}"), "data did not match any variant of untagged enum DecoderUntagged" ), _ => panic!("Expected error"), } let json = r#"{"replacement":"▁","prepend_scheme":"always"}"#; let parse = serde_json::from_str::(json); match parse { Err(err) => assert_eq!( format!("{err}"), "data did not match any variant of untagged enum DecoderUntagged" ), _ => panic!("Expected error"), } let json = r#"{"type":"Sequence","prepend_scheme":"always"}"#; let parse = serde_json::from_str::(json); match parse { Err(err) => assert_eq!(format!("{err}"), "missing field `decoders`"), _ => panic!("Expected error"), } } } ================================================ FILE: tokenizers/src/decoders/sequence.rs ================================================ use crate::decoders::DecoderWrapper; use crate::tokenizer::{Decoder, Result}; use crate::utils::macro_rules_attribute; use serde::{Deserialize, Serialize}; #[derive(Clone, Debug)] #[macro_rules_attribute(impl_serde_type!)] pub struct Sequence { decoders: Vec, } impl Sequence { pub fn new(decoders: Vec) -> Self { Self { decoders } } pub fn get_decoders(&self) -> &[DecoderWrapper] { &self.decoders } pub fn get_decoders_mut(&mut self) -> &mut [DecoderWrapper] { &mut self.decoders } } impl Decoder for Sequence { fn decode_chain(&self, mut tokens: Vec) -> Result> { for decoder in &self.decoders { tokens = decoder.decode_chain(tokens)?; } Ok(tokens) } } #[cfg(test)] mod tests { use super::*; use crate::decoders::ctc::CTC; use crate::pre_tokenizers::metaspace::Metaspace; #[test] fn sequence_basic() { let decoders = vec![ DecoderWrapper::CTC(CTC::default()), DecoderWrapper::Metaspace(Metaspace::default()), ]; let decoder = Sequence::new(decoders); let tokens: Vec = vec!["▁", "▁", "H", "H", "i", "i", "▁", "y", "o", "u"] .into_iter() .map(|s| s.to_string()) .collect(); let out_tokens = decoder.decode(tokens).unwrap(); assert_eq!(out_tokens, "Hi you"); } } ================================================ FILE: tokenizers/src/decoders/strip.rs ================================================ use crate::tokenizer::{Decoder, Result}; use serde::{Deserialize, Serialize}; #[derive(Deserialize, Clone, Debug, Serialize, Default)] /// Strip is a simple trick which converts tokens looking like `<0x61>` /// to pure bytes, and attempts to make them into a string. If the tokens /// cannot be decoded you will get � instead for each inconvertible byte token #[serde(tag = "type")] #[non_exhaustive] pub struct Strip { pub content: char, pub start: usize, pub stop: usize, } impl Strip { pub fn new(content: char, start: usize, stop: usize) -> Self { Self { content, start, stop, } } } impl Decoder for Strip { fn decode_chain(&self, tokens: Vec) -> Result> { Ok(tokens .into_iter() .map(|token| { let chars: Vec = token.chars().collect(); let mut start_cut = 0; for (i, &c) in chars.iter().enumerate().take(self.start) { if c == self.content { start_cut = i + 1; continue; } else { break; } } let mut stop_cut = chars.len(); for i in 0..self.stop { let index = chars.len() - i - 1; if chars[index] == self.content { stop_cut = index; continue; } else { break; } } let new_token: String = chars[start_cut..stop_cut].iter().collect(); new_token }) .collect()) } } #[cfg(test)] mod tests { use super::*; #[test] fn decode() { let decoder = Strip::new('H', 1, 0); let res = decoder .decode_chain(vec!["Hey".into(), " friend!".into(), "HHH".into()]) .unwrap(); assert_eq!(res, vec!["ey", " friend!", "HH"]); let decoder = Strip::new('y', 0, 1); let res = decoder .decode_chain(vec!["Hey".into(), " friend!".into()]) .unwrap(); assert_eq!(res, vec!["He", " friend!"]); } } ================================================ FILE: tokenizers/src/decoders/wordpiece.rs ================================================ use crate::tokenizer::{Decoder, Result}; use serde::{Deserialize, Serialize}; #[derive(Deserialize, Clone, Debug, Serialize)] /// The WordPiece decoder takes care of decoding a list of wordpiece tokens /// back into a readable string. #[serde(tag = "type")] #[non_exhaustive] pub struct WordPiece { /// The prefix to be used for continuing subwords pub prefix: String, /// Whether to cleanup some tokenization artifacts (spaces before punctuation, ...) pub cleanup: bool, } impl WordPiece { pub fn new(prefix: String, cleanup: bool) -> Self { Self { prefix, cleanup } } } impl Default for WordPiece { fn default() -> Self { Self { prefix: "##".to_owned(), cleanup: true, } } } pub fn cleanup(dirty_input: &str) -> String { dirty_input .replace(" .", ".") .replace(" ?", "?") .replace(" !", "!") .replace(" ,", ",") .replace(" ' ", "'") .replace(" n't", "n't") .replace(" 'm", "'m") .replace(" do not", " don't") .replace(" 's", "'s") .replace(" 've", "'ve") .replace(" 're", "'re") } impl Decoder for WordPiece { fn decode_chain(&self, mut tokens: Vec) -> Result> { for (i, token) in tokens.iter_mut().enumerate() { if i != 0 { if let Some(tk) = token.strip_prefix(&self.prefix) { *token = tk.to_string(); } else { *token = format!(" {token}"); } } if self.cleanup { *token = cleanup(token); } } Ok(tokens) } } #[cfg(test)] mod tests { use super::*; #[test] fn wordpiece_decoder() { let decoder = WordPiece::new("##".to_string(), false); assert_eq!( decoder .decode(vec![ "##uelo".to_string(), "Ara".to_string(), "##új".to_string(), "##o".to_string(), "No".to_string(), "##guera".to_string() ]) .unwrap(), "##uelo Araújo Noguera" ); } } ================================================ FILE: tokenizers/src/lib.rs ================================================ #![cfg_attr(docsrs, feature(doc_cfg))] #![warn(clippy::all)] #![allow(clippy::upper_case_acronyms)] #![doc(html_favicon_url = "https://huggingface.co/favicon.ico")] #![doc(html_logo_url = "https://huggingface.co/landing/assets/huggingface_logo.svg")] //! The core of `tokenizers`, written in Rust. //! Provides an implementation of today's most used tokenizers, with a focus on performance and //! versatility. //! //! # What is a Tokenizer //! //! A Tokenizer works as a pipeline, it processes some raw text as input and outputs an `Encoding`. //! The various steps of the pipeline are: //! //! 1. The `Normalizer`: in charge of normalizing the text. Common examples of normalization are //! the [unicode normalization standards](https://unicode.org/reports/tr15/#Norm_Forms), such as `NFD` or `NFKC`. //! More details about how to use the `Normalizers` are available on the //! [Hugging Face blog](https://huggingface.co/docs/tokenizers/components#normalizers) //! 2. The `PreTokenizer`: in charge of creating initial words splits in the text. The most common way of //! splitting text is simply on whitespace. //! 3. The `Model`: in charge of doing the actual tokenization. An example of a `Model` would be //! `BPE` or `WordPiece`. //! 4. The `PostProcessor`: in charge of post-processing the `Encoding` to add anything relevant //! that, for example, a language model would need, such as special tokens. //! //! ## Loading a pretrained tokenizer from the Hub //! ``` //! use tokenizers::tokenizer::{Result, Tokenizer}; //! //! fn main() -> Result<()> { //! # #[cfg(feature = "http")] //! # { //! // needs http feature enabled //! let tokenizer = Tokenizer::from_pretrained("bert-base-cased", None)?; //! //! let encoding = tokenizer.encode("Hey there!", false)?; //! println!("{:?}", encoding.get_tokens()); //! # } //! Ok(()) //! } //! ``` //! //! ## Deserialization and tokenization example //! //! ```no_run //! use tokenizers::tokenizer::{Result, Tokenizer, EncodeInput}; //! use tokenizers::models::bpe::BPE; //! //! fn main() -> Result<()> { //! let bpe_builder = BPE::from_file("./path/to/vocab.json", "./path/to/merges.txt"); //! let bpe = bpe_builder //! .dropout(0.1) //! .unk_token("[UNK]".into()) //! .build()?; //! //! let mut tokenizer = Tokenizer::new(bpe); //! //! let encoding = tokenizer.encode("Hey there!", false)?; //! println!("{:?}", encoding.get_tokens()); //! //! Ok(()) //! } //! ``` //! //! ## Training and serialization example //! //! ```no_run //! use tokenizers::decoders::DecoderWrapper; //! use tokenizers::models::bpe::{BpeTrainerBuilder, BPE}; //! use tokenizers::normalizers::{strip::Strip, unicode::NFC, utils::Sequence, NormalizerWrapper}; //! use tokenizers::pre_tokenizers::byte_level::ByteLevel; //! use tokenizers::pre_tokenizers::PreTokenizerWrapper; //! use tokenizers::processors::PostProcessorWrapper; //! use tokenizers::{AddedToken, Model, Result, TokenizerBuilder}; //! //! use std::path::Path; //! //! fn main() -> Result<()> { //! let vocab_size: usize = 100; //! //! let mut trainer = BpeTrainerBuilder::new() //! .show_progress(true) //! .vocab_size(vocab_size) //! .min_frequency(0) //! .special_tokens(vec![ //! AddedToken::from(String::from(""), true), //! AddedToken::from(String::from(""), true), //! AddedToken::from(String::from(""), true), //! AddedToken::from(String::from(""), true), //! AddedToken::from(String::from(""), true), //! ]) //! .build(); //! //! let mut tokenizer = TokenizerBuilder::new() //! .with_model(BPE::default()) //! .with_normalizer(Some(Sequence::new(vec![ //! Strip::new(true, true).into(), //! NFC.into(), //! ]))) //! .with_pre_tokenizer(Some(ByteLevel::default())) //! .with_post_processor(Some(ByteLevel::default())) //! .with_decoder(Some(ByteLevel::default())) //! .build()?; //! //! let pretty = false; //! tokenizer //! .train_from_files( //! &mut trainer, //! vec!["path/to/vocab.txt".to_string()], //! )? //! .save("tokenizer.json", pretty)?; //! //! Ok(()) //! } //! ``` //! //! # Additional information //! //! - tokenizers is designed to leverage CPU parallelism when possible. The level of parallelism is determined //! by the total number of core/threads your CPU provides but this can be tuned by setting the `RAYON_RS_NUM_THREADS` //! environment variable. As an example setting `RAYON_RS_NUM_THREADS=4` will allocate a maximum of 4 threads. //! **_Please note this behavior may evolve in the future_** //! //! # Features //! //! - **progressbar**: The progress bar visualization is enabled by default. It might be disabled if //! compilation for certain targets is not supported by the [termios](https://crates.io/crates/termios) //! dependency of the [indicatif](https://crates.io/crates/indicatif) progress bar. //! //! - **http**: This feature enables downloading the tokenizer via HTTP. It is disabled by default. //! With this feature enabled, `Tokenizer::from_pretrained` becomes accessible. #[macro_use] extern crate log; #[macro_use] extern crate derive_builder; #[macro_use] pub mod utils; pub mod decoders; pub mod models; pub mod normalizers; pub mod pre_tokenizers; pub mod processors; pub mod tokenizer; // Re-export from tokenizer pub use tokenizer::*; // Re-export also parallelism utils pub use utils::parallelism; // Re-export for from_pretrained #[cfg(feature = "http")] pub use utils::from_pretrained::FromPretrainedParameters; ================================================ FILE: tokenizers/src/models/bpe/mod.rs ================================================ //! [Byte Pair Encoding](https://www.aclweb.org/anthology/P16-1162/) model. use std::{iter, mem}; mod model; mod serialization; pub mod trainer; mod word; type Pair = (u32, u32); /// Errors that can be encountered while using or constructing a `BPE` model. #[derive(thiserror::Error, Debug)] pub enum Error { /// An error encountered while reading files mainly. #[error("IoError: {0}")] Io(#[from] std::io::Error), /// An error forwarded from Serde, while parsing JSON #[error("JsonError: {0}")] JsonError(#[from] serde_json::Error), /// When the vocab.json file is in the wrong format #[error("Bad vocabulary json file")] BadVocabulary, /// When the merges.txt file is in the wrong format. This error holds the line /// number of the line that caused the error. #[error("Merges text file invalid at line {0}")] BadMerges(usize), /// If a token found in merges, is not in the vocab #[error("Token `{0}` out of vocabulary")] MergeTokenOutOfVocabulary(String), /// If the provided unk token is out of vocabulary #[error("Unk token `{0}` not found in the vocabulary")] UnkTokenOutOfVocabulary(String), /// Dropout not between 0 and 1. #[error("Dropout should be between 0 and 1, inclusive")] InvalidDropout, } /// Provides access to the `FirstLastIterator` to any Iterator pub(crate) trait WithFirstLastIterator: Iterator + Sized { fn with_first_and_last(self) -> FirstLastIterator; } impl WithFirstLastIterator for I where I: Iterator, { fn with_first_and_last(self) -> FirstLastIterator { FirstLastIterator { first: true, iter: self.peekable(), } } } /// Provides information about whether an item is the first and/or the last of the iterator pub(crate) struct FirstLastIterator where I: Iterator, { first: bool, iter: iter::Peekable, } impl Iterator for FirstLastIterator where I: Iterator, { /// (is_first, is_last, item) type Item = (bool, bool, I::Item); fn next(&mut self) -> Option { let first = mem::replace(&mut self.first, false); self.iter .next() .map(|e| (first, self.iter.peek().is_none(), e)) } } // Re-export pub use model::*; pub use trainer::*; use word::*; ================================================ FILE: tokenizers/src/models/bpe/model.rs ================================================ use super::{super::OrderedVocabIter, trainer::BpeTrainer, Error, Pair, Word}; use crate::tokenizer::{Model, Result, Token}; use crate::utils::cache::{Cache, DEFAULT_CACHE_CAPACITY, MAX_LENGTH}; use crate::utils::iter::ResultShunt; use ahash::AHashMap; use serde_json::Value; use std::borrow::Cow; use std::collections::HashMap; use std::{ fs::File, io::prelude::*, io::{BufRead, BufReader}, path::{Path, PathBuf}, }; pub type Vocab = AHashMap; type VocabR = AHashMap; pub type MergeMap = AHashMap; pub type Merges = Vec<(String, String)>; struct Config { files: Option<(String, String)>, vocab: Vocab, merges: Merges, cache_capacity: usize, dropout: Option, unk_token: Option, continuing_subword_prefix: Option, end_of_word_suffix: Option, fuse_unk: bool, byte_fallback: bool, ignore_merges: bool, } /// A `BpeBuilder` can be used to create a `BPE` model with a custom configuration. pub struct BpeBuilder { config: Config, } impl Default for BpeBuilder { fn default() -> Self { Self { config: Config { files: None, vocab: AHashMap::new(), merges: vec![], cache_capacity: DEFAULT_CACHE_CAPACITY, dropout: None, unk_token: None, continuing_subword_prefix: None, end_of_word_suffix: None, fuse_unk: false, byte_fallback: false, ignore_merges: false, }, } } } impl BpeBuilder { /// Constructs a new `BpeBuilder`. pub fn new() -> Self { Self::default() } /// Set the input files. #[must_use] pub fn files(mut self, vocab: String, merges: String) -> Self { self.config.files = Some((vocab, merges)); self } /// Set the vocab (token -> ID) and merges mappings. #[must_use] pub fn vocab_and_merges>>( mut self, vocab: V, merges: Merges, ) -> Self { self.config.vocab = vocab.into(); self.config.merges = merges; self } /// Set the cache's capacity. Set to 0 if you want to disable caching. #[must_use] pub fn cache_capacity(mut self, capacity: usize) -> Self { self.config.cache_capacity = capacity; self } /// Use [dropout](https://arxiv.org/abs/1910.13267) with the model. #[must_use] pub fn dropout(mut self, dropout: f32) -> Self { self.config.dropout = Some(dropout); self } /// Set the `UNK` token for the vocab. #[must_use] pub fn unk_token(mut self, unk_token: String) -> Self { self.config.unk_token = Some(unk_token); self } /// Set the `continuing_subword_prefix` option. #[must_use] pub fn continuing_subword_prefix(mut self, prefix: String) -> Self { self.config.continuing_subword_prefix = Some(prefix); self } /// Set the `end_of_word_suffix` option. #[must_use] pub fn end_of_word_suffix(mut self, prefix: String) -> Self { self.config.end_of_word_suffix = Some(prefix); self } /// Set the `fuse_unk` option. #[must_use] pub fn fuse_unk(mut self, fuse_unk: bool) -> Self { self.config.fuse_unk = fuse_unk; self } /// Set the `byte_fallback` option. #[must_use] pub fn byte_fallback(mut self, byte_fallback: bool) -> Self { self.config.byte_fallback = byte_fallback; self } /// Set the `ignore_merges` option. #[must_use] pub fn ignore_merges(mut self, ignore_merges: bool) -> Self { self.config.ignore_merges = ignore_merges; self } /// Returns a `BPE` model that uses the `BpeBuilder`'s configuration. pub fn build(mut self) -> Result { // Validate dropout. if let Some(p) = self.config.dropout { if !(0.0..=1.0).contains(&p) { return Err(Error::InvalidDropout.into()); } } // Read files if necessary if let Some((vocab, merges)) = self.config.files { let (v, m) = BPE::read_file(&vocab, &merges)?; self.config.vocab = v; self.config.merges = m; } let vocab_r = self .config .vocab .iter() .map(|(key, val)| (*val, key.to_owned())) .collect(); let cache = match self.config.cache_capacity { 0 => None, capacity => Some(Cache::new(capacity)), }; let vocab = self.config.vocab; let prefix_len = if let Some(prefix) = &self.config.continuing_subword_prefix { prefix.len() } else { 0 }; let merge_map: MergeMap = self .config .merges .into_iter() .enumerate() .map(|(i, (a, b))| -> Result<(Pair, (u32, u32))> { let a_id = vocab .get(&a) .ok_or_else(|| Error::MergeTokenOutOfVocabulary(a.to_owned()))?; let b_id = vocab .get(&b) .ok_or_else(|| Error::MergeTokenOutOfVocabulary(b.to_owned()))?; let new_token = format!("{}{}", a, &b[prefix_len..]); let new_id = vocab .get(&new_token) .ok_or(Error::MergeTokenOutOfVocabulary(new_token))?; Ok(((*a_id, *b_id), (i as u32, *new_id))) }) .collect::>()?; // merges.insert(pair, (rank as u32, *new_id)); Ok(BPE { vocab, vocab_r, merges: merge_map, cache, dropout: self.config.dropout, unk_token: self.config.unk_token, continuing_subword_prefix: self.config.continuing_subword_prefix, end_of_word_suffix: self.config.end_of_word_suffix, fuse_unk: self.config.fuse_unk, byte_fallback: self.config.byte_fallback, ignore_merges: self.config.ignore_merges, }) } } /// A [Byte Pair Encoding](https://www.aclweb.org/anthology/P16-1162/) model. #[derive(PartialEq)] pub struct BPE { /// The vocabulary assigns a number to each token. pub(crate) vocab: Vocab, /// Reversed vocabulary, to rebuild sentences. pub(crate) vocab_r: VocabR, /// Contains the mapping between Pairs and their (rank, new_id). pub(crate) merges: MergeMap, /// Contains the cache for optimizing the encoding step. cache: Option>, /// Dropout probability for merges. 0.0 = no dropout is the default. At 1.0, tokenization will /// perform no merges, so the result will just be characters. pub dropout: Option, /// The unknown token to be used when we encounter an unknown char pub unk_token: Option, /// An optional prefix to use on any subword that exist only behind another one pub continuing_subword_prefix: Option, /// An optional suffix to characterize and end-of-word subword pub end_of_word_suffix: Option, /// Do multiple unk tokens get fused pub fuse_unk: bool, /// Byte fallback from sentence pieces, instead of UNK, uses `"<0x00>"` /// for each byte in the unk token pub byte_fallback: bool, /// Whether or not to direct output words if they are part of the vocab. pub ignore_merges: bool, } impl std::fmt::Debug for BPE { fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { fmt.debug_struct("BPE") .field("dropout", &self.dropout) .field("unk_token", &self.unk_token) .field("continuing_subword_prefix", &self.continuing_subword_prefix) .field("end_of_word_suffix", &self.end_of_word_suffix) .field("fuse_unk", &self.fuse_unk) .field("byte_fallback", &self.byte_fallback) .field("vocab", &self.vocab.len()) .field("merges", &self.merges.len()) .field("ignore_merges", &self.ignore_merges) .finish() } } impl Default for BPE { fn default() -> Self { Self::builder().build().unwrap() } } impl Clone for BPE { // `Clone` can't be derive because it's not implemented for `Cache`. // To keep things simple when we clone, the new BPE will start with a fresh cache. fn clone(&self) -> Self { let fresh_cache = self.cache.as_ref().map(|cache| cache.fresh()); Self { vocab: self.vocab.clone(), vocab_r: self.vocab_r.clone(), merges: self.merges.clone(), cache: fresh_cache, dropout: self.dropout, unk_token: self.unk_token.clone(), continuing_subword_prefix: self.continuing_subword_prefix.clone(), end_of_word_suffix: self.end_of_word_suffix.clone(), fuse_unk: self.fuse_unk, byte_fallback: self.byte_fallback, ignore_merges: self.ignore_merges, } } } /// Converts the merges strings (for example from `merges.txt` file) with the format /// "{pair_a} {pair_b}" into the format expected by the BPE struct pub(crate) fn convert_merges_to_hashmap>( iter: I, _vocab: &Vocab, ) -> Result { let mut merges = vec![]; let lines = iter.filter(|l| !l.starts_with("#version")); for (rank, line) in lines.enumerate() { let parts = line.split(' ').collect::>(); if parts.len() != 2 { return Err(Error::BadMerges(rank + 1).into()); } merges.push((parts[0].to_string(), parts[1].to_string())); } Ok(merges) } impl BPE { /// Initialize a `BpeBuilder`. pub fn builder() -> BpeBuilder { BpeBuilder::new() } /// Create a new BPE model with the given vocab and merges. pub fn new(vocab: Vocab, merges: Merges) -> Self { Self::builder() .vocab_and_merges(vocab, merges) .build() .unwrap() } /// Initialize a BpeBuilder model from vocab and merges files pub fn from_file(vocab: &str, merges: &str) -> BpeBuilder { Self::builder().files(vocab.to_owned(), merges.to_owned()) } /// Read the given files to extract the vocab and merges pub fn read_file(vocab: &str, merges: &str) -> Result<(Vocab, Merges)> { // Read vocab.json let vocab_file = File::open(vocab)?; let mut vocab_file = BufReader::new(vocab_file); let mut buffer = String::new(); vocab_file.read_to_string(&mut buffer)?; let json: Value = serde_json::from_str(&buffer)?; let mut vocab = AHashMap::new(); match json { Value::Object(m) => { for (token, id) in m { if let Value::Number(id) = id { let id = id.as_u64().ok_or(Error::BadVocabulary)? as u32; vocab.insert(token, id); } } } _ => return Err(Box::new(Error::BadVocabulary)), }; // Read merges file let merge_file = File::open(merges)?; let merge_file = BufReader::new(merge_file); let merges = ResultShunt::process(merge_file.lines(), |iter| { convert_merges_to_hashmap(iter, &vocab) })??; Ok((vocab, merges)) } /// Reset the cache. pub fn clear_cache(&self) { if let Some(ref cache) = self.cache { cache.clear() } } /// Resize the cache pub fn resize_cache(&mut self, capacity: usize) { if let Some(ref mut cache) = self.cache { cache.resize(capacity); } } pub fn get_vocab(&self) -> HashMap { self.vocab.clone().into_iter().collect() } pub fn get_unk_token(&self) -> &Option { &self.unk_token } pub fn get_continuing_subword_prefix(&self) -> &Option { &self.continuing_subword_prefix } fn merge_word(&self, w: &str) -> Result { let mut indices = w.char_indices().map(|(idx, _)| idx).peekable(); let mut word = Word::with_capacity(w.len()); let mut unk: Option<(u32, usize)> = None; while let Some(i) = indices.next() { let end = indices.peek(); let is_first = i == 0; let is_last = end.is_none(); let mut s = if let Some(e) = end { Cow::Borrowed(&w[i..*e]) } else { Cow::Borrowed(&w[i..]) }; let byte_len = s.len(); // Add the `continuing_subword_prefix` if relevant if !is_first { if let Some(ref prefix) = self.continuing_subword_prefix { s = format!("{prefix}{s}").into() } } // Add the `end_of_word_suffix` if relevant if is_last { if let Some(ref suffix) = self.end_of_word_suffix { s = format!("{s}{suffix}").into() } } if let Some(id) = self.vocab.get(s.as_ref()) { if let Some((unk_id, unk_len)) = unk { word.add(unk_id, unk_len); unk = None; } word.add(*id, byte_len); } else { if self.byte_fallback { let tokens: Option> = s .bytes() .map(|b| -> Option<&u32> { let code = format!("<{b:#04X}>"); self.vocab.get(&code) }) .collect(); if let Some(tokens) = tokens { for t in tokens { word.add(*t, 1); } continue; } } if let Some(unk_token) = &self.unk_token { unk = match (unk, self.fuse_unk) { (Some((unk_id, unk_len)), true) => { // Fuse unk Some((unk_id, unk_len + byte_len)) } (Some((unk_id, unk_len)), false) => { // Do not fuse unk, add the previous one word.add(unk_id, unk_len); Some(( *self.vocab.get(unk_token).ok_or_else(|| { Error::UnkTokenOutOfVocabulary(unk_token.to_owned()) })?, byte_len, )) } _ => Some(( *self.vocab.get(unk_token).ok_or_else(|| { Error::UnkTokenOutOfVocabulary(unk_token.to_owned()) })?, byte_len, )), }; } } } if let Some((unk_id, unk_len)) = unk { word.add(unk_id, unk_len); } word.merge_all(&self.merges, self.dropout); Ok(word) } fn word_to_tokens<'a>(&'a self, word: &'a Word) -> impl Iterator + 'a { word.get_chars_iter() .zip(word.get_offsets_iter()) .map(move |(id, offsets)| Token::new(id, self.vocab_r[&id].clone(), offsets)) } fn tokenize_with_cache(&self, sequence: &str) -> Result> { if self.ignore_merges { if let Some(id) = self.vocab.get(sequence) { return Ok(vec![Token::new( *id, sequence.to_string(), (0, sequence.len()), )]); } } if let Some(ref hit) = self.cache.as_ref().and_then(|c| c.get(sequence)) { return Ok(self.word_to_tokens(hit).collect()); } let word = self.merge_word(sequence)?; let ret = self.word_to_tokens(&word).collect(); if let Some(ref cache) = self.cache { if sequence.len() < MAX_LENGTH { cache.set(sequence.to_owned(), word); } } Ok(ret) } } impl Model for BPE { type Trainer = BpeTrainer; fn get_vocab(&self) -> HashMap { self.vocab.clone().into_iter().collect() } fn get_vocab_size(&self) -> usize { self.vocab.len() } fn tokenize(&self, sequence: &str) -> Result> { if sequence.is_empty() { return Ok(vec![]); } if self.dropout.is_none() || self.dropout == Some(0.0) { self.tokenize_with_cache(sequence) } else { let word = self.merge_word(sequence)?; Ok(self.word_to_tokens(&word).collect()) } } fn token_to_id(&self, token: &str) -> Option { self.vocab.get(token).copied() } fn id_to_token(&self, id: u32) -> Option { self.vocab_r.get(&id).cloned() } fn save(&self, folder: &Path, name: Option<&str>) -> Result> { let vocab_file_name = match name { Some(name) => format!("{name}-vocab.json"), None => "vocab.json".to_string(), }; // Write vocab.json let vocab_path: PathBuf = [folder, Path::new(vocab_file_name.as_str())] .iter() .collect(); let mut vocab_file = File::create(&vocab_path)?; let order_vocab_iter = OrderedVocabIter::new(&self.vocab_r); let serialized = serde_json::to_string(&order_vocab_iter)?; vocab_file.write_all(serialized.as_bytes())?; // Write merges.txt let merges_file_name = match name { Some(name) => format!("{name}-merges.txt"), None => "merges.txt".to_string(), }; let merges_path: PathBuf = [folder, Path::new(merges_file_name.as_str())] .iter() .collect(); let mut merges_file = File::create(&merges_path)?; let mut merges: Vec<(&Pair, &u32)> = self .merges .iter() .map(|(pair, (rank, _))| (pair, rank)) .collect(); merges.sort_unstable_by_key(|k| *k.1); merges_file.write_all(b"#version: 0.2\n")?; merges_file.write_all( &merges .into_iter() .flat_map(|(pair, _)| { format!("{} {}\n", self.vocab_r[&pair.0], self.vocab_r[&pair.1]).into_bytes() }) .collect::>()[..], )?; Ok(vec![vocab_path, merges_path]) } fn get_trainer(&self) -> BpeTrainer { BpeTrainer::default() } } #[cfg(test)] mod tests { use super::*; use tempfile::NamedTempFile; #[test] fn test_ordered_vocab_iter() { let vocab_r: VocabR = [ (0, "a".into()), (1, "b".into()), (2, "c".into()), (3, "ab".into()), ] .iter() .cloned() .collect(); let order_vocab_iter = OrderedVocabIter::new(&vocab_r); let serialized = serde_json::to_string(&order_vocab_iter).unwrap(); assert_eq!(serialized, "{\"a\":0,\"b\":1,\"c\":2,\"ab\":3}"); } #[test] fn test_unk_not_fused() { let vocab: Vocab = [("".into(), 0), ("a".into(), 1), ("b".into(), 2)] .iter() .cloned() .collect(); let bpe = BpeBuilder::default() .vocab_and_merges(vocab, vec![]) .unk_token("".to_string()) .build() .unwrap(); let tokens = bpe.tokenize("c").unwrap(); assert_eq!(tokens, vec![Token::new(0u32, "".into(), (0, 1)),]); let tokens = bpe.tokenize("cc").unwrap(); assert_eq!( tokens, vec![ Token::new(0u32, "".into(), (0, 1)), Token::new(0u32, "".into(), (1, 2)), ] ); let tokens = bpe.tokenize("accb").unwrap(); assert_eq!( tokens, vec![ Token::new(1u32, "a".into(), (0, 1)), Token::new(0u32, "".into(), (1, 2)), Token::new(0u32, "".into(), (2, 3)), Token::new(2u32, "b".into(), (3, 4)), ] ); } #[test] fn test_unk_get_fused() { let vocab: Vocab = [("".into(), 0), ("a".into(), 1), ("b".into(), 2)] .iter() .cloned() .collect(); let bpe = BpeBuilder::default() .vocab_and_merges(vocab, vec![]) .unk_token("".to_string()) .fuse_unk(true) .build() .unwrap(); let tokens = bpe.tokenize("c").unwrap(); assert_eq!(tokens, vec![Token::new(0u32, "".into(), (0, 1)),]); let tokens = bpe.tokenize("cc").unwrap(); assert_eq!(tokens, vec![Token::new(0u32, "".into(), (0, 2)),]); let tokens = bpe.tokenize("accb").unwrap(); assert_eq!( tokens, vec![ Token::new(1u32, "a".into(), (0, 1)), Token::new(0u32, "".into(), (1, 3)), Token::new(2u32, "b".into(), (3, 4)), ] ); } #[test] // Test tokenization. With dropout set to 0 tokenization is deterministic, // so we know exactly what the result should be. // // To test this, we'll build a simple model to tokenize the word 'unrelated'. fn test_tokenize_with_and_without_dropout() { let vocab: Vocab = [ ("u".into(), 0), ("n".into(), 1), ("r".into(), 2), ("e".into(), 3), ("l".into(), 4), ("a".into(), 5), ("t".into(), 6), ("d".into(), 7), ("re".into(), 8), ("at".into(), 9), ("ed".into(), 10), ("un".into(), 11), ("ated".into(), 12), ("rel".into(), 13), ("related".into(), 14), ("unrelated".into(), 15), ] .iter() .cloned() .collect(); let merges: Merges = vec![ ("r".to_string(), "e".to_string()), ("a".to_string(), "t".to_string()), ("e".to_string(), "d".to_string()), ("u".to_string(), "n".to_string()), ("at".to_string(), "ed".to_string()), ("re".to_string(), "l".to_string()), ("rel".to_string(), "ated".to_string()), ("un".to_string(), "related".to_string()), ]; let mut bpe = BPE::new(vocab, merges); // With no dropout: let tokens = bpe.tokenize("unrelated").unwrap(); assert_eq!(tokens, vec![Token::new(15u32, "unrelated".into(), (0, 9))]); // With dropout = 0.0 (equivalent to dropout == none) bpe.dropout = Some(0.0); let tokens = bpe.tokenize("unrelated").unwrap(); assert_eq!(tokens, vec![Token::new(15u32, "unrelated".into(), (0, 9))]); // Now set dropout to 1.0. Result should be no merges performed. bpe.dropout = Some(1.0); let tokens = bpe.tokenize("unrelated").unwrap(); assert_eq!( tokens, vec![ Token::new(0u32, "u".into(), (0, 1)), Token::new(1u32, "n".into(), (1, 2)), Token::new(2u32, "r".into(), (2, 3)), Token::new(3u32, "e".into(), (3, 4)), Token::new(4u32, "l".into(), (4, 5)), Token::new(5u32, "a".into(), (5, 6)), Token::new(6u32, "t".into(), (6, 7)), Token::new(3u32, "e".into(), (7, 8)), Token::new(7u32, "d".into(), (8, 9)), ] ); // Now try with dropout between 0 and 1. bpe.dropout = Some(0.5); let tokens = bpe.tokenize("unrelated").unwrap(); assert!(!tokens.is_empty() && tokens.len() <= 9); } #[test] // Ensure `BPE::from_file` works as expected. fn test_bpe_from_file() { // Set up vocab file. let mut vocab_file = NamedTempFile::new().unwrap(); vocab_file .write_all(b"{\"a\": 0, \"b\": 1, \"c\": 2, \"ab\": 3}") .unwrap(); // Set up merges file. let mut merges_file = NamedTempFile::new().unwrap(); merges_file.write_all(b"#version: 0.2\na b").unwrap(); // Make sure we can instantiate a BPE model from the files. let builder = BPE::from_file( vocab_file.path().to_str().unwrap(), merges_file.path().to_str().unwrap(), ); let bpe = builder.build().unwrap(); // Check merges. assert_eq!(bpe.merges.get(&(0, 1)).unwrap(), &(0u32, 3u32)); // Check vocab. assert_eq!(bpe.vocab.get("a").unwrap(), &0u32); assert_eq!(bpe.vocab.get("b").unwrap(), &1u32); assert_eq!(bpe.vocab.get("c").unwrap(), &2u32); assert_eq!(bpe.vocab.get("ab").unwrap(), &3u32); } #[test] // Ensure BPEBuilder with dropout = 0.0 doesn't error fn test_bpe_with_dropout_0() { let bpe = BPE::builder().dropout(0.0).build().unwrap(); assert_eq!(bpe.dropout, Some(0.0)); } #[test] // Ensure `BPE::from_file` works as expected. fn test_bpe_with_continuing_subword_prefix() { let vocab: Vocab = vec![ ("a".to_string(), 0), ("##b".to_string(), 1), ("##c".to_string(), 2), ("ab".to_string(), 3), ("abc".to_string(), 4), ] .into_iter() .collect(); let merges = vec![ ("a".to_string(), "##b".to_string()), ("ab".to_string(), "##c".to_string()), ]; let bpe = BPE::builder() .vocab_and_merges(vocab, merges) .unk_token("[UNK]".to_string()) .continuing_subword_prefix("##".to_string()) .build() .unwrap(); let res = bpe.tokenize("ab"); assert_eq!( res.unwrap(), vec![Token { id: 3, value: "ab".to_string(), offsets: (0, 2) }] ); let res = bpe.tokenize("abc"); assert_eq!( res.unwrap(), vec![Token { id: 4, value: "abc".to_string(), offsets: (0, 3) }] ); } #[test] // Ensure `MergeTokenOutOfVocabulary` error is returned when it should be. fn test_bpe_from_file_merge_token_oov() { // Set up vocab file. let mut vocab_file = NamedTempFile::new().unwrap(); vocab_file .write_all(b"{\"a\": 0, \"b\": 1, \"c\": 2, \"ab\": 3}") .unwrap(); // Set up merges file. let mut merges_file = NamedTempFile::new().unwrap(); merges_file.write_all(b"#version: 0.2\na b\na d").unwrap(); // Ensure the result of BPE::from_file is a MergeTokenOutOfVocabulary error. match BPE::from_file( vocab_file.path().to_str().unwrap(), merges_file.path().to_str().unwrap(), ) .build() { Ok(_) => unreachable!(), Err(err) => match err.downcast_ref::() { Some(Error::MergeTokenOutOfVocabulary(token)) => { assert_eq!(*token, String::from("d")) } _ => unreachable!(), }, } } #[test] // Ensure `BadMerges` error is returned when there is an invalid line in the // merges.txt file. fn test_bpe_from_file_bad_merges() { // Set up vocab file. let mut vocab_file = NamedTempFile::new().unwrap(); vocab_file .write_all("{\"a\": 0, \"b\": 1, \"c\": 2, \"ab\": 3}".as_bytes()) .unwrap(); // Set up merges file with a bad line. let mut merges_file = NamedTempFile::new().unwrap(); merges_file.write_all(b"#version: 0.2\na b\nc").unwrap(); // Ensure the result of BPE::from_file is a BadMerges error. match BPE::from_file( vocab_file.path().to_str().unwrap(), merges_file.path().to_str().unwrap(), ) .build() { Ok(_) => unreachable!(), Err(err) => match err.downcast_ref::() { Some(Error::BadMerges(line)) => assert_eq!(*line, 2), _ => unreachable!(), }, } } #[test] fn test_bpe_byte_fallback() { // 0x61 == 'a' in bytes let vocab: Vocab = [("".into(), 0), ("<0x61>".into(), 1)] .iter() .cloned() .collect(); let bpe = BpeBuilder::default() .vocab_and_merges(vocab, vec![]) .unk_token("".to_string()) .byte_fallback(true) .build() .unwrap(); let tokens = bpe.tokenize("c").unwrap(); assert_eq!(tokens, vec![Token::new(0u32, "".into(), (0, 1)),]); let tokens = bpe.tokenize("a").unwrap(); assert_eq!(tokens, vec![Token::new(1u32, "<0x61>".into(), (0, 1)),]); } #[test] fn test_bpe_byte_fallback_newline() { // 0x0A == '\n' in bytes let vocab: Vocab = [("".into(), 0), ("<0x0A>".into(), 1)] .iter() .cloned() .collect(); let bpe = BpeBuilder::default() .vocab_and_merges(vocab, vec![]) .unk_token("".to_string()) .byte_fallback(true) .build() .unwrap(); let tokens = bpe.tokenize("\n").unwrap(); assert_eq!(tokens, vec![Token::new(1u32, "<0x0A>".into(), (0, 1)),]); } #[test] fn test_ignore_merges() { // 0x0A == '\n' in bytes let vocab: Vocab = [ (".:.:".into(), 0), ("Ġbelirtilen".into(), 1), (".".into(), 2), (":".into(), 3), ("bel".into(), 4), ("irtilen".into(), 5), ("Ġ".into(), 6), (".:".into(), 7), ("belirtilen".into(), 8), (".:.".into(), 9), ("be".into(), 10), ("l".into(), 11), ("ir".into(), 12), ("ti".into(), 13), ("en".into(), 14), ("irtil".into(), 15), ("irti".into(), 16), ("i".into(), 17), ("r".into(), 18), ("t".into(), 19), ("b".into(), 20), ("e".into(), 21), ("n".into(), 22), ] .iter() .cloned() .collect(); let mut bpe = BpeBuilder::default() .vocab_and_merges( vocab, vec![ (".".into(), ":".into()), ("b".into(), "e".into()), ("be".into(), "l".into()), ("i".into(), "r".into()), ("t".into(), "i".into()), ("ir".into(), "ti".into()), ("e".into(), "n".into()), ("irti".into(), "l".into()), ], ) .ignore_merges(true) .build() .unwrap(); let tokens = bpe.tokenize(".:.:").unwrap(); assert_eq!(tokens, vec![Token::new(0u32, ".:.:".into(), (0, 4))]); let tokens = bpe.tokenize("Ġbelirtilen").unwrap(); assert_eq!( tokens, vec![Token::new(1u32, "Ġbelirtilen".into(), (0, 12))] ); bpe.ignore_merges = false; let tokens = bpe.tokenize(".:.:").unwrap(); assert_eq!( tokens, vec![ Token::new(7u32, ".:".into(), (0, 2)), Token::new(7u32, ".:".into(), (2, 4)) ] ); let tokens = bpe.tokenize("Ġbelirtilen").unwrap(); assert_eq!( tokens, vec![ Token { id: 6, value: "Ġ".into(), offsets: (0, 2) }, Token { id: 4, value: "bel".into(), offsets: (2, 5) }, Token { id: 15, value: "irtil".into(), offsets: (5, 10) }, Token { id: 14, value: "en".into(), offsets: (10, 12) } ] ) } } ================================================ FILE: tokenizers/src/models/bpe/serialization.rs ================================================ use super::{super::OrderedVocabIter, convert_merges_to_hashmap, BpeBuilder, Pair, BPE}; use ahash::AHashMap; use serde::{ de::{Error, MapAccess, Visitor}, ser::SerializeStruct, Deserialize, Deserializer, Serialize, Serializer, }; impl Serialize for BPE { fn serialize(&self, serializer: S) -> Result where S: Serializer, { let mut model = serializer.serialize_struct("BPE", 8)?; // Start by small fields model.serialize_field("type", "BPE")?; model.serialize_field("dropout", &self.dropout)?; model.serialize_field("unk_token", &self.unk_token)?; model.serialize_field("continuing_subword_prefix", &self.continuing_subword_prefix)?; model.serialize_field("end_of_word_suffix", &self.end_of_word_suffix)?; model.serialize_field("fuse_unk", &self.fuse_unk)?; model.serialize_field("byte_fallback", &self.byte_fallback)?; model.serialize_field("ignore_merges", &self.ignore_merges)?; // Then the large ones let mut merges: Vec<(&Pair, &u32)> = self .merges .iter() .map(|(pair, (rank, _))| (pair, rank)) .collect(); merges.sort_unstable_by_key(|k| *k.1); let merges = merges .into_iter() .map(|(pair, _)| (self.vocab_r[&pair.0].clone(), self.vocab_r[&pair.1].clone())) .collect::>(); let ordered_vocab = OrderedVocabIter::new(&self.vocab_r); model.serialize_field("vocab", &ordered_vocab)?; model.serialize_field("merges", &merges)?; model.end() } } impl<'de> Deserialize<'de> for BPE { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { deserializer.deserialize_struct( "BPE", &[ "type", "dropout", "unk_token", "continuing_subword_prefix", "end_of_word_suffix", "fuse_unk", "byte_fallback", "ignore_merges", "vocab", "merges", ], BPEVisitor, ) } } struct BPEVisitor; impl<'de> Visitor<'de> for BPEVisitor { type Value = BPE; fn expecting(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { write!(fmt, "struct BPE") } fn visit_map(self, mut map: V) -> std::result::Result where V: MapAccess<'de>, { let mut builder = BpeBuilder::new(); let mut vocab: Option> = None; #[derive(Debug, Deserialize)] #[serde(untagged)] enum MergeType { Tuple(Vec<(String, String)>), Legacy(Vec), } let mut merges: Option = None; while let Some(key) = map.next_key::()? { match key.as_ref() { "dropout" => { if let Some(dropout) = map.next_value()? { builder = builder.dropout(dropout); } } "unk_token" => { if let Some(unk) = map.next_value()? { builder = builder.unk_token(unk); } } "continuing_subword_prefix" => { if let Some(prefix) = map.next_value()? { builder = builder.continuing_subword_prefix(prefix); } } "end_of_word_suffix" => { if let Some(suffix) = map.next_value()? { builder = builder.end_of_word_suffix(suffix); } } "fuse_unk" => { if let Some(suffix) = map.next_value()? { builder = builder.fuse_unk(suffix); } } "byte_fallback" => { if let Some(suffix) = map.next_value()? { builder = builder.byte_fallback(suffix); } } "ignore_merges" => { if let Some(suffix) = map.next_value()? { builder = builder.ignore_merges(suffix); } } "vocab" => vocab = Some(map.next_value()?), "merges" => merges = Some(map.next_value()?), "type" => match map.next_value()? { "BPE" => {} u => { return Err(serde::de::Error::invalid_value( serde::de::Unexpected::Str(u), &"BPE", )) } }, _ => {} } } if let (Some(vocab), Some(merges)) = (vocab, merges) { let merges = match merges { MergeType::Tuple(merges) => merges, MergeType::Legacy(merges) => { convert_merges_to_hashmap(merges.into_iter(), &vocab).map_err(Error::custom)? } }; builder = builder.vocab_and_merges(vocab, merges); Ok(builder.build().map_err(Error::custom)?) } else { Err(Error::custom("Missing vocab/merges")) } } } #[cfg(test)] mod test { use super::*; use crate::models::bpe::Vocab; #[test] fn test_serialization() { let vocab: Vocab = [ ("".into(), 0), ("a".into(), 1), ("b".into(), 2), ("ab".into(), 3), ] .iter() .cloned() .collect(); let bpe = BpeBuilder::default() .vocab_and_merges(vocab, vec![("a".to_string(), "b".to_string())]) .unk_token("".to_string()) .ignore_merges(true) .build() .unwrap(); let legacy = r#"{"type":"BPE","dropout":null,"unk_token":"","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"byte_fallback":false,"ignore_merges":true,"vocab":{"":0,"a":1,"b":2,"ab":3},"merges":["a b"]}"#; let legacy = serde_json::from_str(legacy).unwrap(); assert_eq!(bpe, legacy); let data = serde_json::to_string(&bpe).unwrap(); assert_eq!( data, r#"{"type":"BPE","dropout":null,"unk_token":"","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"byte_fallback":false,"ignore_merges":true,"vocab":{"":0,"a":1,"b":2,"ab":3},"merges":[["a","b"]]}"# ); let reconstructed = serde_json::from_str(&data).unwrap(); assert_eq!(bpe, reconstructed); // With a space in the token let vocab: Vocab = [ ("".into(), 0), ("a".into(), 1), ("b c d".into(), 2), ("ab c d".into(), 3), ] .iter() .cloned() .collect(); let bpe = BpeBuilder::default() .vocab_and_merges(vocab, vec![("a".to_string(), "b c d".to_string())]) .unk_token("".to_string()) .ignore_merges(true) .build() .unwrap(); let data = serde_json::to_string(&bpe).unwrap(); assert_eq!( data, r#"{"type":"BPE","dropout":null,"unk_token":"","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"byte_fallback":false,"ignore_merges":true,"vocab":{"":0,"a":1,"b c d":2,"ab c d":3},"merges":[["a","b c d"]]}"# ); let reconstructed = serde_json::from_str(&data).unwrap(); assert_eq!(bpe, reconstructed); } #[test] fn test_serialization_ignore_merges() { let vocab: Vocab = [("".into(), 0), ("a".into(), 1), ("b".into(), 2)] .iter() .cloned() .collect(); let mut bpe = BpeBuilder::default() .vocab_and_merges(vocab, vec![]) .unk_token("".to_string()) .ignore_merges(true) .build() .unwrap(); let bpe_string = r#"{"type":"BPE","dropout":null,"unk_token":"","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"byte_fallback":false,"ignore_merges":true,"vocab":{"":0,"a":1,"b":2},"merges":[]}"#; assert_eq!(serde_json::from_str::(bpe_string).unwrap(), bpe); bpe.ignore_merges = false; let bpe_string = r#"{"type":"BPE","dropout":null,"unk_token":"","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"byte_fallback":false,"vocab":{"":0,"a":1,"b":2},"merges":[]}"#; assert_eq!(serde_json::from_str::(bpe_string).unwrap(), bpe); } } ================================================ FILE: tokenizers/src/models/bpe/trainer.rs ================================================ #![allow(clippy::map_entry)] use super::{Pair, WithFirstLastIterator, Word, BPE}; use crate::parallelism::*; use crate::tokenizer::{AddedToken, Result, Trainer}; use crate::utils::progress::{ProgressBar, ProgressStyle}; use ahash::{AHashMap, AHashSet}; use compact_str::CompactString; use dary_heap::OctonaryHeap; use serde::{Deserialize, Serialize}; use std::cmp::Ordering; use std::collections::HashSet; #[derive(Debug, Eq)] struct Merge { pair: Pair, count: u64, pos: AHashSet, } impl PartialEq for Merge { fn eq(&self, other: &Self) -> bool { self.count == other.count && self.pair == other.pair } } impl PartialOrd for Merge { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for Merge { fn cmp(&self, other: &Self) -> Ordering { if self.count != other.count { self.count.cmp(&other.count) } else { // Here we want ascending order other.pair.cmp(&self.pair) } } } struct Config { min_frequency: u64, vocab_size: usize, show_progress: bool, special_tokens: Vec, limit_alphabet: Option, initial_alphabet: AHashSet, continuing_subword_prefix: Option, end_of_word_suffix: Option, max_token_length: Option, } /// A `BpeTrainerBuilder` can be used to create a `BpeTrainer` with a custom /// configuration. pub struct BpeTrainerBuilder { config: Config, } impl Default for BpeTrainerBuilder { fn default() -> Self { Self { config: Config { min_frequency: 0, vocab_size: 30000, show_progress: true, special_tokens: vec![], limit_alphabet: None, initial_alphabet: AHashSet::new(), continuing_subword_prefix: None, end_of_word_suffix: None, max_token_length: None, }, } } } impl BpeTrainerBuilder { /// Constructs a new `BpeTrainerBuilder` pub fn new() -> Self { Self::default() } /// Set the expected minimum frequency #[must_use] pub fn min_frequency(mut self, frequency: u64) -> Self { self.config.min_frequency = frequency; self } /// Set the vocabulary size #[must_use] pub fn vocab_size(mut self, size: usize) -> Self { self.config.vocab_size = size; self } /// Set whether to show progress #[must_use] pub fn show_progress(mut self, show: bool) -> Self { self.config.show_progress = show; self } /// Set the special tokens #[must_use] pub fn special_tokens(mut self, tokens: Vec) -> Self { self.config.special_tokens = tokens; self } /// Set whether to limit the alphabet #[must_use] pub fn limit_alphabet(mut self, limit: usize) -> Self { self.config.limit_alphabet = Some(limit); self } /// Set the initial alphabet #[must_use] pub fn initial_alphabet(mut self, alphabet: HashSet) -> Self { let mut initial_alphabet = AHashSet::with_capacity(alphabet.len()); initial_alphabet.extend(alphabet); self.config.initial_alphabet = initial_alphabet; self } /// Set the continuing_subword_prefix #[must_use] pub fn continuing_subword_prefix(mut self, prefix: String) -> Self { self.config.continuing_subword_prefix = Some(prefix); self } /// Set the end_of_word_suffix #[must_use] pub fn end_of_word_suffix(mut self, suffix: String) -> Self { self.config.end_of_word_suffix = Some(suffix); self } /// Set max_token_length #[must_use] pub fn max_token_length(mut self, max_token_length: Option) -> Self { self.config.max_token_length = max_token_length; self } /// Constructs the final BpeTrainer pub fn build(self) -> BpeTrainer { BpeTrainer { min_frequency: self.config.min_frequency, vocab_size: self.config.vocab_size, show_progress: self.config.show_progress, special_tokens: self.config.special_tokens, limit_alphabet: self.config.limit_alphabet, initial_alphabet: self.config.initial_alphabet, continuing_subword_prefix: self.config.continuing_subword_prefix, end_of_word_suffix: self.config.end_of_word_suffix, max_token_length: self.config.max_token_length, words: AHashMap::new(), } } } /// In charge of training a `BPE` model /// /// # Examples /// /// ``` /// use tokenizers::tokenizer::Trainer; /// use tokenizers::models::bpe::{BPE, BpeTrainer}; /// /// let sequences = vec![ "Hello", "World" ]; /// /// let mut trainer = BpeTrainer::default(); /// trainer.feed(sequences.iter(), |s| Ok(vec![s.to_owned()])); /// /// let mut model = BPE::default(); /// let special_tokens = trainer.train(&mut model).unwrap(); /// ``` #[non_exhaustive] #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Eq)] pub struct BpeTrainer { /// The minimum frequency a pair must have to produce a merge operation pub min_frequency: u64, /// The target vocabulary size pub vocab_size: usize, /// Whether to show progress while training pub show_progress: bool, /// A list of special tokens that the model should know of pub special_tokens: Vec, /// Whether to limit the number of initial tokens that can be kept before computing merges pub limit_alphabet: Option, /// The initial alphabet we want absolutely to include. This allows to cover /// some characters that are not necessarily in the training set pub initial_alphabet: AHashSet, /// An optional prefix to use on any subword that exist only behind another one pub continuing_subword_prefix: Option, /// An optional suffix to characterize and end-of-word subword pub end_of_word_suffix: Option, /// An optional parameter to limit the max length of any single token pub max_token_length: Option, words: AHashMap, } impl Default for BpeTrainer { fn default() -> Self { Self::builder().build() } } impl BpeTrainer { pub fn new(min_frequency: u64, vocab_size: usize) -> Self { Self { min_frequency, vocab_size, ..Default::default() } } pub fn builder() -> BpeTrainerBuilder { BpeTrainerBuilder::new() } /// Setup a progress bar if asked to show progress fn setup_progress(&self) -> Option { if self.show_progress { let p = ProgressBar::new(0); p.set_style( ProgressStyle::default_bar() .template("[{elapsed_precise}] {msg:<30!} {wide_bar} {pos:<9!}/{len:>9!}") .expect("Invalid progress template"), ); Some(p) } else { None } } /// Set the progress bar in the finish state fn finalize_progress(&self, p: &Option, final_len: usize) { if let Some(p) = p { p.set_length(final_len as u64); p.finish(); println!(); } } /// Update the progress bar with the new provided length and message fn update_progress(&self, p: &Option, len: usize, message: &'static str) { if let Some(p) = p { p.set_message(message); p.set_length(len as u64); p.reset(); } } /// Add the provided special tokens to the initial vocabulary fn add_special_tokens( &self, w2id: &mut AHashMap, id2w: &mut Vec, ) { for token in &self.special_tokens { // get hash of content if !w2id.contains_key(&CompactString::from(&token.content)) { id2w.push(CompactString::from(&token.content)); w2id.insert(CompactString::from(&token.content), (id2w.len() - 1) as u32); } } } /// Compute the initial alphabet and limit it if relevant fn compute_alphabet( &self, wc: &AHashMap, w2id: &mut AHashMap, id2w: &mut Vec, ) { // Compute the alphabet from seen words let mut alphabet: AHashMap = AHashMap::new(); for (word, count) in wc { for c in word.chars() { *alphabet.entry(c).or_default() += *count as usize; } } // Also include anything from the provided initial alphabet for c in &self.initial_alphabet { *alphabet.entry(*c).or_default() = usize::MAX; } let mut kept = alphabet.iter().collect::>(); // Compute the number of chars to remove from the alphabet // If `limit_alphabet < initial_alphabet.len()`, some of these initial characters // will be removed let to_remove = self .limit_alphabet .map(|limit| alphabet.len().saturating_sub(limit)) .unwrap_or(0); // Remove the unwanted chars if to_remove > 0 { kept.sort_unstable_by_key(|k| *k.1); kept.drain(..to_remove); } // Keep the initial alphabet (sorted for determinism) kept.sort_unstable_by_key(|k| *k.0 as u32); kept.into_iter().for_each(|(c, _)| { let s = c.to_string(); /* if !w2id.contains_key(&s) { id2w.push(s.clone()); w2id.insert(s, (id2w.len() - 1) as u32); } */ // u64 hash version if !w2id.contains_key(&CompactString::from(&s)) { id2w.push(CompactString::from(&s)); w2id.insert(CompactString::from(&s), (id2w.len() - 1) as u32); } }); } /// Tokenize words and add subwords to the vocabulary when relevant fn tokenize_words( &self, wc: &AHashMap, w2id: &mut AHashMap, id2w: &mut Vec, p: &Option, ) -> (Vec, Vec) { let mut words: Vec = Vec::with_capacity(wc.len()); let mut counts: Vec = Vec::with_capacity(wc.len()); for (word, count) in wc { let mut current_word = Word::new(); counts.push(*count); for (is_first, is_last, c) in word.chars().with_first_and_last() { let mut s = c.to_string(); if w2id.contains_key(&CompactString::from(&s)) { // Found the initial char in the authorized alphabet // Add the `continuing_subword_prefix` if relevant if !is_first { if let Some(prefix) = &self.continuing_subword_prefix { s.insert_str(0, prefix); } } // Add the `end_of_word_suffix` if relevant if is_last { if let Some(suffix) = &self.end_of_word_suffix { s.push_str(suffix); } } // Insert the new formed string if necessary if !w2id.contains_key(&CompactString::from(&s)) { id2w.push(CompactString::from(&s)); w2id.insert(CompactString::from(&s), (id2w.len() - 1) as u32); } current_word.add(w2id[&CompactString::from(&s)], 1); // We do not care about the len here } } words.push(current_word); if let Some(p) = p { p.inc(1); } } (words, counts) } fn count_pairs( &self, words: &[Word], counts: &[u64], p: &Option, ) -> (AHashMap, AHashMap>) { words .maybe_par_iter() .enumerate() .map(|(i, word)| { let mut pair_counts = AHashMap::new(); let mut where_to_update: AHashMap> = AHashMap::new(); for window in word.get_chars().windows(2) { let cur_pair: Pair = (window[0], window[1]); // Initialize pair_counts and where_to_update for this pair if we just saw it // Then update counts *pair_counts.entry(cur_pair).or_default() += counts[i] as i32; where_to_update.entry(cur_pair).or_default().insert(i); } if let Some(p) = &p { p.inc(1); } (pair_counts, where_to_update) }) .reduce( || (AHashMap::new(), AHashMap::new()), |(mut pair_counts, mut where_to_update), (pc, wtu)| { for (k, v) in pc { *pair_counts.entry(k).or_default() += v; } for (k, v) in wtu { where_to_update.entry(k).or_default().extend(v); } (pair_counts, where_to_update) }, ) } pub fn do_train( &self, word_counts: &AHashMap, model: &mut BPE, ) -> Result> { let mut word_to_id: AHashMap = AHashMap::with_capacity(self.vocab_size); let mut id_to_word: Vec = Vec::with_capacity(self.vocab_size); let max_token_length: usize = self.max_token_length.unwrap_or(usize::MAX); let progress = self.setup_progress(); // // 1. Add all special tokens to the vocabulary // self.add_special_tokens(&mut word_to_id, &mut id_to_word); // // 2. Compute the initial alphabet // self.compute_alphabet(word_counts, &mut word_to_id, &mut id_to_word); // // 3. Tokenize words // self.update_progress(&progress, word_counts.len(), "Tokenize words"); let (mut words, counts) = self.tokenize_words(word_counts, &mut word_to_id, &mut id_to_word, &progress); self.finalize_progress(&progress, words.len()); // // 4. Count pairs in words // self.update_progress(&progress, words.len(), "Count pairs"); let (mut pair_counts, mut where_to_update) = self.count_pairs(&words, &counts, &progress); // Insert them in the queue let mut queue = OctonaryHeap::with_capacity(pair_counts.len()); where_to_update.drain().for_each(|(pair, pos)| { let count = pair_counts[&pair]; if count > 0 { queue.push(Merge { pair, count: count as u64, pos, }); } }); self.finalize_progress(&progress, words.len()); // // 5. Do merges // self.update_progress(&progress, self.vocab_size, "Compute merges"); let mut merges: Vec<(Pair, u32)> = vec![]; loop { // Stop as soon as we have a big enough vocabulary if word_to_id.len() >= self.vocab_size { break; } let Some(mut top) = queue.pop() else { break; }; if top.count != pair_counts[&top.pair] as u64 { top.count = pair_counts[&top.pair] as u64; queue.push(top); continue; } if top.count < 1 || self.min_frequency > top.count { break; } let part_a = &id_to_word[top.pair.0 as usize]; let mut part_b = id_to_word[top.pair.1 as usize].as_str(); // Build new token if let Some(prefix) = &self.continuing_subword_prefix { if let Some(rest) = part_b.strip_prefix(prefix) { part_b = rest; } } // Insert new token if it does not already exist let new_token = format!("{part_a}{part_b}"); let new_token_id = word_to_id .get(&CompactString::from(&new_token)) .copied() .unwrap_or(id_to_word.len() as u32); if !word_to_id.contains_key(&CompactString::from(&new_token)) { id_to_word.push(CompactString::from(&new_token)); word_to_id.insert(CompactString::from(&new_token), new_token_id); } merges.push((top.pair, new_token_id)); // Merge the new pair in every words // Safety: This is just a type assertion, the code below may no longer be safe // if the type of `pos` changes let pos: &AHashSet = &top.pos; let words_len = words.len(); struct WordPtr(*mut Word); // Safety: We do not actually use this for concurrent access to the same memory, // only to different chunks within the same allocation. unsafe impl Sync for WordPtr {} let word_start = WordPtr(words.as_mut_ptr()); let changes = pos .maybe_par_iter() .flat_map(|&i| { // We can merge each of these words in parallel here because each position // can be there only once (AHashSet). So this is safe. unsafe { assert!(i < words_len); // This is words[i], but avoids needing to go through &T (which triggers UB) let word = word_start.0.add(i); // let word: &mut Word = &mut (*word); (*word) .merge(top.pair.0, top.pair.1, new_token_id, max_token_length) .into_iter() .map(|c| (c, i)) .collect::>() } }) .collect::>(); // Introduce new formed pairs for ((pair, change), iw) in changes { let count = change * counts[iw] as i32; *pair_counts.entry(pair).or_default() += count; if change > 0 { where_to_update.entry(pair).or_default().insert(iw); } } where_to_update.drain().for_each(|(pair, pos)| { let count = pair_counts[&pair]; if count > 0 { queue.push(Merge { pair, count: count as u64, pos, }); } }); if let Some(p) = &progress { p.inc(1); } } self.finalize_progress(&progress, merges.len()); // Transfer new vocab & options to model //model.vocab = word_to_id; model.vocab = word_to_id .into_iter() // we have to look up the string in id_to_word because the key in word_to_id is a hash .map(|(_key, val)| (id_to_word[val as usize].to_string(), val)) .collect(); model.vocab_r = model .vocab .iter() .map(|(key, val)| (*val, key.to_owned())) .collect(); model.merges = merges .into_iter() .enumerate() .map(|(i, (pair, new_token_id))| (pair, (i as u32, new_token_id))) .collect(); model.continuing_subword_prefix = self.continuing_subword_prefix.clone(); model.end_of_word_suffix = self.end_of_word_suffix.clone(); Ok(self.special_tokens.clone()) } } impl Trainer for BpeTrainer { type Model = BPE; /// Train a BPE model fn train(&self, model: &mut BPE) -> Result> { self.do_train(&self.words, model) } /// Whether we should show progress fn should_show_progress(&self) -> bool { self.show_progress } fn feed(&mut self, iterator: I, process: F) -> Result<()> where I: Iterator + Send, S: AsRef + Send, F: Fn(&str) -> Result> + Sync, { let words: Result> = iterator .maybe_par_bridge() .map(|sequence| { let words = process(sequence.as_ref())?; let mut map = AHashMap::new(); for word in words { *map.entry(CompactString::from(word)).or_default() += 1; } Ok(map) }) .reduce( || Ok(AHashMap::new()), |acc, ws| { let mut acc = acc?; for (k, v) in ws? { *acc.entry(k).or_default() += v; } Ok(acc) }, ); self.words = words?; Ok(()) } } #[cfg(test)] mod tests { use super::{BpeTrainer, Pair, BPE}; use ahash::AHashMap; use compact_str::CompactString; #[test] fn test_train() { let word_counts: AHashMap = [ ("roses".into(), 1), ("are".into(), 2), ("red".into(), 1), ("voilets".into(), 1), ("blue".into(), 1), ("BERT".into(), 1), ("is".into(), 2), ("big".into(), 1), ("and".into(), 1), ("so".into(), 1), ("GPT-2".into(), 1), ] .iter() .cloned() .collect(); let trainer = BpeTrainer::builder() .show_progress(false) .min_frequency(2) .build(); let mut model = BPE::default(); trainer.do_train(&word_counts, &mut model).unwrap(); // Vocab should contain all of the characters from the `word_counts` mapping // as well as three merges: 're', 'are', and 'is'. let expected_vocab: AHashMap = [ ("-".into(), 0), ("2".into(), 1), ("B".into(), 2), ("E".into(), 3), ("G".into(), 4), ("P".into(), 5), ("R".into(), 6), ("T".into(), 7), ("a".into(), 8), ("b".into(), 9), ("d".into(), 10), ("e".into(), 11), ("g".into(), 12), ("i".into(), 13), ("l".into(), 14), ("n".into(), 15), ("o".into(), 16), ("r".into(), 17), ("s".into(), 18), ("t".into(), 19), ("u".into(), 20), ("v".into(), 21), ("re".into(), 22), ("are".into(), 23), ("is".into(), 24), ] .iter() .cloned() .collect(); assert_eq!(model.vocab, expected_vocab); // The keys in `merges` are pairs of symbols, the values are tuples of (rank, id), // where 'rank' determines the order in which this merge will be applied during // tokenization, and 'id' is the vocab id of the symbol resulting from merging // the pair of symbols in the corresponding key. let expected_merges: AHashMap = [ ((17, 11), (0, 22)), // 'r' + 'e' -> 're' ((8, 22), (1, 23)), // 'a' + 're' -> 'are' ((13, 18), (2, 24)), // 'i' + 's' -> 'is' ] .iter() .cloned() .collect(); assert_eq!(model.merges, expected_merges); } #[test] fn bpe_test_max_token_length_16() { /* bpe_test_max_token_length series of tests test the max_token_length flag of bpetrainer // this is the more robust version that only tests max length of learned tokens // (pre) tokenizer settings or vocab can be easily modified when necessary */ let max_token_length = 16; let long_word_counts: AHashMap = [ ("singlelongtokenwithoutcasechange", 2), ("singleLongTokenWithCamelCaseChange", 2), ("Longsingletokenwithpunctu@t!onwithin", 2), ("Anotherlongsingletokenwithnumberw1th1n", 2), ("짧은한글문자열짧은한", 2), // korean 10 char ("긴한글문자열긴한글문자열긴한글문", 2), // korean 16 char ("短字符串短字符串短字", 2), //simplified chinese 10 char ("长字符串长字符串长字符串长字符串", 2), // simp. chinese 16 char ("短い文字列短い文字列", 2), // japanese 10 char ("長い文字列長い文字列長い文字列長", 2), // japanese 16 char ("so", 2), ("GPT-2", 2), ] .iter() .map(|(key, value)| (CompactString::from(key.to_string()), *value)) .collect(); let trainer = BpeTrainer::builder() .max_token_length(Some(max_token_length)) .show_progress(false) .min_frequency(0) .build(); let mut model = BPE::default(); trainer.do_train(&long_word_counts, &mut model).unwrap(); let vocab = model.get_vocab(); for token in vocab.keys() { assert!( token.chars().count() <= max_token_length, "token too long : {} , chars().count() = {}", token, token.chars().count() ) } } #[test] fn bpe_test_max_token_length_direct_assert() { /* more direct version of bpe_test_max_token_length test // directly compares tokens with known expected values. // maybe unstable depending on specific settings or changes. */ let long_word_counts: AHashMap = [ ("sin", 2), ("Sin", 2), ("Lon", 2), ("Ano", 2), ("짧은한", 2), ("긴한글", 2), ("短字符", 2), ("长字符", 2), ("短い文", 2), ("長い文", 2), ("so", 2), ("GP", 2), ] .iter() .map(|(key, value)| (CompactString::from(key.to_string()), *value)) .collect(); let trainer = BpeTrainer::builder() .max_token_length(Some(2)) .show_progress(false) .min_frequency(0) .build(); let mut model = BPE::default(); trainer.do_train(&long_word_counts, &mut model).unwrap(); let trained_vocab: AHashMap = model.get_vocab().into_iter().collect(); let expected_vocab: AHashMap = [ ("短", 12), ("n", 6), ("i", 5), ("s", 8), ("字符", 23), ("長", 14), ("긴", 17), ("い文", 22), ("L", 2), ("in", 21), ("o", 7), ("은한", 29), ("S", 4), ("P", 3), ("so", 27), ("符", 13), ("文", 11), ("字", 10), ("짧", 19), ("GP", 25), ("글", 16), ("G", 1), ("An", 24), ("长", 15), ("A", 0), ("Lo", 26), ("긴한", 28), ("い", 9), ("한", 20), ("은", 18), ] .iter() .cloned() .map(|(k, v)| (k.to_string(), v)) .collect(); assert_eq!(trained_vocab, expected_vocab) } } ================================================ FILE: tokenizers/src/models/bpe/word.rs ================================================ use super::Pair; use ahash::AHashMap; use dary_heap::QuaternaryHeap; use rand::{rng, Rng}; use std::cmp::Ordering; #[derive(Debug, Eq)] struct Merge { pos: usize, rank: u32, new_id: u32, } impl PartialEq for Merge { fn eq(&self, other: &Self) -> bool { self.rank == other.rank && self.pos == other.pos } } impl PartialOrd for Merge { fn partial_cmp(&self, other: &Self) -> Option { // By manually implementing this, we make the containing BinaryHeap a // min-heap ordered first on the rank, and the pos otherwise Some(self.cmp(other)) } } impl Ord for Merge { fn cmp(&self, other: &Self) -> Ordering { if self.rank != other.rank { other.rank.cmp(&self.rank) } else { other.pos.cmp(&self.pos) } } } #[derive(Debug, Clone, Copy)] struct Symbol { c: u32, prev: isize, next: isize, len: usize, } impl Symbol { /// Merges the current Symbol with the other one. /// In order to update prev/next, we consider Self to be the Symbol on the left, /// and other to be the next one on the right. pub fn merge_with(&mut self, other: &Self, new_c: u32) { self.c = new_c; self.len += other.len; self.next = other.next; } } #[derive(Clone, Default)] pub(super) struct Word { symbols: Vec, } impl std::fmt::Debug for Word { fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { fmt.debug_struct("Word") .field( "chars", &self .symbols .iter() .map(|s| s.c.to_string()) .collect::>() .join(" "), ) .field("symbols", &self.symbols) .finish() } } impl Word { pub(super) fn new() -> Self { Word { symbols: vec![] } } pub(super) fn with_capacity(capacity: usize) -> Self { Self { symbols: Vec::with_capacity(capacity), } } pub(super) fn add(&mut self, c: u32, byte_len: usize) { let (prev, next) = { let len = self.symbols.len() as isize; if let Some(last) = self.symbols.last_mut() { // Update `next` on the previous one last.next = len; (len - 1, -1) } else { (-1, -1) } }; self.symbols.push(Symbol { c, prev, next, len: byte_len, }); } pub(super) fn merge( &mut self, c1: u32, c2: u32, replacement: u32, max_length: usize, ) -> Vec<(Pair, i32)> { let mut changes: Vec<(Pair, i32)> = vec![]; let mut i = 0; loop { if i >= self.symbols.len() { break; } // Found a pair if self.symbols[i].c == c1 && i + 1 < self.symbols.len() && self.symbols[i + 1].c == c2 { let first = self.symbols[i]; let second = self.symbols[i + 1]; // Remove in place let new_s = Symbol { c: replacement, prev: first.prev, next: second.next, len: first.len + second.len, }; // If there are other characters before the pair if i > 0 { changes.push(((self.symbols[i - 1].c, first.c), -1)); if self.symbols[i - 1].len + new_s.len < max_length { changes.push(((self.symbols[i - 1].c, replacement), 1)); } } self.symbols.insert(i, new_s); // Insert replacement before first char of pair self.symbols.remove(i + 1); // Remove first char of pair self.symbols.remove(i + 1); // And then the second // If there are other characters after the pair if i < self.symbols.len() - 1 { changes.push(((second.c, self.symbols[i + 1].c), -1)); if self.symbols[i + 1].len + new_s.len < max_length { changes.push(((replacement, self.symbols[i + 1].c), 1)); } } } i += 1; } changes } pub(super) fn merge_all(&mut self, merges: &AHashMap, dropout: Option) { let mut queue = QuaternaryHeap::with_capacity(self.symbols.len()); let mut skip = Vec::with_capacity(queue.len()); queue.extend( self.symbols .windows(2) .enumerate() .filter_map(|(index, window)| { let pair = (window[0].c, window[1].c); merges.get(&pair).map(|m| Merge { pos: index, rank: m.0, new_id: m.1, }) }), ); while let Some(top) = queue.pop() { if dropout.map(|d| rng().random::() < d).unwrap_or(false) { skip.push(top); } else { // Re-insert the skipped elements queue.extend(skip.drain(..)); if self.symbols[top.pos].len == 0 { continue; } // Do nothing if we are the last symbol if self.symbols[top.pos].next == -1 { continue; } let next_pos = self.symbols[top.pos].next as usize; let right = self.symbols[next_pos]; // Make sure we are not processing an expired queue entry let target_new_pair = (self.symbols[top.pos].c, right.c); if merges .get(&target_new_pair) .is_none_or(|(_, new_id)| *new_id != top.new_id) { continue; } // Otherwise, let's merge self.symbols[top.pos].merge_with(&right, top.new_id); // Tag the right part as removed self.symbols[next_pos].len = 0; // Update `prev` on the new `next` to the current pos if right.next > -1 && (right.next as usize) < self.symbols.len() { self.symbols[right.next as usize].prev = top.pos as isize; } // Insert the new pair formed with the previous symbol let current = &self.symbols[top.pos]; if current.prev >= 0 { let prev = current.prev as usize; let prev_symbol = self.symbols[prev]; let new_pair = (prev_symbol.c, current.c); if let Some((rank, new_id)) = merges.get(&new_pair) { queue.push(Merge { pos: current.prev as usize, rank: *rank, new_id: *new_id, }); } } // Insert the new pair formed with the next symbol let next = current.next as usize; if next < self.symbols.len() { let next_symbol = self.symbols[next]; let new_pair = (current.c, next_symbol.c); if let Some((rank, new_id)) = merges.get(&new_pair) { queue.push(Merge { pos: top.pos, rank: *rank, new_id: *new_id, }); } } } } // Filter out the removed symbols self.symbols.retain(|s| s.len != 0); } pub(super) fn get_chars(&self) -> Vec { self.symbols.iter().map(|s| s.c).collect() } pub(super) fn get_chars_iter(&self) -> impl Iterator + '_ { self.symbols.iter().map(|s| s.c) } pub(super) fn get_offsets_iter(&self) -> impl Iterator + '_ { let mut pos = 0; self.symbols.iter().map(move |symbol| { let new_pos = pos + symbol.len; let offset = (pos, new_pos); pos = new_pos; offset }) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_merge() { // Let's say we have the word 'hello' and a word-to-id vocab that looks // like this: {'h': 0, 'e': 1, 'l': 2, 'o': 3}. let mut word = Word::new(); word.add(0, 1); // 'h' word.add(1, 1); // 'e' word.add(2, 1); // 'l' word.add(2, 1); // 'l' word.add(3, 1); // 'o' // We're going to perform a merge on the pair ('l', 'l') ~= (2, 2). Let's // say that 'll' has the ID of 4 in the updated word-to-id vocab. let changes = word.merge(2, 2, 4, usize::MAX); // So the word should now look like this: assert_eq!( word.get_chars(), &[ 0u32, // 'h' 1u32, // 'e' 4u32, // 'll' 3u32, // 'o' ] ); // The return value `changes` will be used to update the pair counts during // training. This merge affects the counts for the pairs // ('e', 'l') ~= (1, 2), // ('e', 'll') ~= (1, 4), // ('l', 'o') ~= (2, 3), and // ('ll', 'o') ~= (4, 3). // So the changes should reflect that: assert_eq!( changes, &[ ((1u32, 2u32), -1i32), // count for ('e', 'l') should be decreased by 1. ((1u32, 4u32), 1i32), // count for ('e', 'll') should be increased by 1. ((2u32, 3u32), -1i32), // count for ('l', 'o') should be decreased by 1. ((4u32, 3u32), 1i32), // count for ('ll', 'o') should be increased by 1. ] ); } #[test] fn test_merge_max_length() { // Let's say we have the word 'hello' and a word-to-id vocab that looks // like this: {'h': 0, 'e': 1, 'l': 2, 'o': 3}. let mut word = Word::new(); word.add(0, 1); // 'h' word.add(1, 1); // 'e' word.add(2, 1); // 'l' word.add(2, 1); // 'l' word.add(3, 1); // 'o' // We're going to perform a merge on the pair ('l', 'l') ~= (2, 2). Let's // say that 'll' has the ID of 4 in the updated word-to-id vocab. let changes = word.merge(2, 2, 4, 2); assert_eq!( word.get_chars(), &[ 0u32, // 'h' 1u32, // 'e' 4u32, // 'll' 3u32, // 'o' ] ); assert_eq!( changes, &[ ((1u32, 2u32), -1i32), // count for ('e', 'l') should be decreased by 1. // ((1u32, 4u32), 1i32), Missing since this would be larger than 2 ((2u32, 3u32), -1i32), // count for ('l', 'o') should be decreased by 1. // ((4u32, 3u32), 1i32), Missing since this would be larger than 2 ] ); } } ================================================ FILE: tokenizers/src/models/mod.rs ================================================ //! Popular tokenizer models. pub mod bpe; pub mod unigram; pub mod wordlevel; pub mod wordpiece; use ahash::AHashMap; use std::collections::HashMap; use std::path::{Path, PathBuf}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::models::bpe::{BpeTrainer, BPE}; use crate::models::unigram::{Unigram, UnigramTrainer}; use crate::models::wordlevel::{WordLevel, WordLevelTrainer}; use crate::models::wordpiece::{WordPiece, WordPieceTrainer}; use crate::{AddedToken, Model, Result, Token, Trainer}; /// Wraps a vocab mapping (ID -> token) to a struct that will be serialized in order /// of token ID, smallest to largest. struct OrderedVocabIter<'a> { vocab_r: &'a AHashMap, } impl<'a> OrderedVocabIter<'a> { fn new(vocab_r: &'a AHashMap) -> Self { Self { vocab_r } } } impl Serialize for OrderedVocabIter<'_> { fn serialize(&self, serializer: S) -> std::result::Result where S: Serializer, { // There could be holes so max + 1 is more correct than vocab_r.len() let mut holes = vec![]; let result = if let Some(max) = self.vocab_r.keys().max() { let iter = (0..*max + 1).filter_map(|i| { if let Some(token) = self.vocab_r.get(&i) { Some((token, i)) } else { holes.push(i); None } }); serializer.collect_map(iter) } else { serializer.collect_map(std::iter::empty::<(&str, u32)>()) }; if !holes.is_empty() { warn!("The OrderedVocab you are attempting to serialize contains holes for indices {holes:?}, your vocabulary could be corrupted!"); } result } } #[derive(Serialize, Debug, PartialEq, Clone)] #[serde(untagged)] pub enum ModelWrapper { BPE(BPE), // WordPiece must stay before WordLevel here for deserialization (for retrocompatibility // with the versions not including the "type"), since WordLevel is a subset of WordPiece WordPiece(WordPiece), WordLevel(WordLevel), Unigram(Unigram), } impl<'de> Deserialize<'de> for ModelWrapper { fn deserialize(deserializer: D) -> std::result::Result where D: Deserializer<'de>, { #[derive(Deserialize)] pub struct Tagged { #[serde(rename = "type")] variant: EnumType, #[serde(flatten)] rest: serde_json::Value, } #[derive(Deserialize)] pub enum EnumType { BPE, WordPiece, WordLevel, Unigram, } #[derive(Deserialize)] #[serde(untagged)] pub enum ModelHelper { Tagged(Tagged), Legacy(serde_json::Value), } #[derive(Deserialize)] #[serde(untagged)] pub enum ModelUntagged { BPE(BPE), // WordPiece must stay before WordLevel here for deserialization (for retrocompatibility // with the versions not including the "type"), since WordLevel is a subset of WordPiece WordPiece(WordPiece), WordLevel(WordLevel), Unigram(Unigram), } let helper = ModelHelper::deserialize(deserializer)?; Ok(match helper { ModelHelper::Tagged(model) => match model.variant { EnumType::BPE => ModelWrapper::BPE( serde_json::from_value(model.rest).map_err(serde::de::Error::custom)?, ), EnumType::WordPiece => ModelWrapper::WordPiece( serde_json::from_value(model.rest).map_err(serde::de::Error::custom)?, ), EnumType::WordLevel => ModelWrapper::WordLevel( serde_json::from_value(model.rest).map_err(serde::de::Error::custom)?, ), EnumType::Unigram => ModelWrapper::Unigram( serde_json::from_value(model.rest).map_err(serde::de::Error::custom)?, ), }, ModelHelper::Legacy(value) => { let untagged = serde_json::from_value(value).map_err(serde::de::Error::custom)?; match untagged { ModelUntagged::BPE(bpe) => ModelWrapper::BPE(bpe), ModelUntagged::WordPiece(bpe) => ModelWrapper::WordPiece(bpe), ModelUntagged::WordLevel(bpe) => ModelWrapper::WordLevel(bpe), ModelUntagged::Unigram(bpe) => ModelWrapper::Unigram(bpe), } } }) } } impl_enum_from!(WordLevel, ModelWrapper, WordLevel); impl_enum_from!(WordPiece, ModelWrapper, WordPiece); impl_enum_from!(BPE, ModelWrapper, BPE); impl_enum_from!(Unigram, ModelWrapper, Unigram); impl Model for ModelWrapper { type Trainer = TrainerWrapper; fn tokenize(&self, tokens: &str) -> Result> { match self { Self::WordLevel(t) => t.tokenize(tokens), Self::WordPiece(t) => t.tokenize(tokens), Self::BPE(t) => t.tokenize(tokens), Self::Unigram(t) => t.tokenize(tokens), } } fn token_to_id(&self, token: &str) -> Option { match self { Self::WordLevel(t) => t.token_to_id(token), Self::WordPiece(t) => t.token_to_id(token), Self::BPE(t) => t.token_to_id(token), Self::Unigram(t) => t.token_to_id(token), } } fn id_to_token(&self, id: u32) -> Option { match self { Self::WordLevel(t) => t.id_to_token(id), Self::WordPiece(t) => t.id_to_token(id), Self::BPE(t) => t.id_to_token(id), Self::Unigram(t) => t.id_to_token(id), } } fn get_vocab(&self) -> HashMap { match self { Self::WordLevel(t) => t.get_vocab(), Self::WordPiece(t) => t.get_vocab(), Self::BPE(t) => t.get_vocab(), Self::Unigram(t) => t.get_vocab(), } } fn get_vocab_size(&self) -> usize { match self { Self::WordLevel(t) => t.get_vocab_size(), Self::WordPiece(t) => t.get_vocab_size(), Self::BPE(t) => t.get_vocab_size(), Self::Unigram(t) => t.get_vocab_size(), } } fn save(&self, folder: &Path, name: Option<&str>) -> Result> { match self { Self::WordLevel(t) => t.save(folder, name), Self::WordPiece(t) => t.save(folder, name), Self::BPE(t) => t.save(folder, name), Self::Unigram(t) => t.save(folder, name), } } fn get_trainer(&self) -> Self::Trainer { match self { Self::WordLevel(t) => t.get_trainer().into(), Self::WordPiece(t) => t.get_trainer().into(), Self::BPE(t) => t.get_trainer().into(), Self::Unigram(t) => t.get_trainer().into(), } } } impl ModelWrapper { pub fn clear_cache(&mut self) { match self { Self::Unigram(model) => model.clear_cache(), Self::BPE(model) => model.clear_cache(), _ => (), } } pub fn resize_cache(&mut self, capacity: usize) { match self { Self::Unigram(model) => model.resize_cache(capacity), Self::BPE(model) => model.resize_cache(capacity), _ => (), } } } #[derive(Clone, Serialize, Deserialize)] pub enum TrainerWrapper { BpeTrainer(BpeTrainer), WordPieceTrainer(WordPieceTrainer), WordLevelTrainer(WordLevelTrainer), UnigramTrainer(UnigramTrainer), } impl Trainer for TrainerWrapper { type Model = ModelWrapper; fn should_show_progress(&self) -> bool { match self { Self::BpeTrainer(bpe) => bpe.should_show_progress(), Self::WordPieceTrainer(wpt) => wpt.should_show_progress(), Self::WordLevelTrainer(wpt) => wpt.should_show_progress(), Self::UnigramTrainer(wpt) => wpt.should_show_progress(), } } fn train(&self, model: &mut ModelWrapper) -> Result> { match self { Self::BpeTrainer(t) => match model { ModelWrapper::BPE(bpe) => t.train(bpe), _ => Err("BpeTrainer can only train a BPE".into()), }, Self::WordPieceTrainer(t) => match model { ModelWrapper::WordPiece(wp) => t.train(wp), _ => Err("WordPieceTrainer can only train a WordPiece".into()), }, Self::WordLevelTrainer(t) => match model { ModelWrapper::WordLevel(wl) => t.train(wl), _ => Err("WordLevelTrainer can only train a WordLevel".into()), }, Self::UnigramTrainer(t) => match model { ModelWrapper::Unigram(u) => t.train(u), _ => Err("UnigramTrainer can only train a Unigram".into()), }, } } fn feed(&mut self, iterator: I, process: F) -> Result<()> where I: Iterator + Send, S: AsRef + Send, F: Fn(&str) -> Result> + Sync, { match self { Self::BpeTrainer(bpe) => bpe.feed(iterator, process), Self::WordPieceTrainer(wpt) => wpt.feed(iterator, process), Self::WordLevelTrainer(wpt) => wpt.feed(iterator, process), Self::UnigramTrainer(wpt) => wpt.feed(iterator, process), } } } impl_enum_from!(BpeTrainer, TrainerWrapper, BpeTrainer); impl_enum_from!(WordPieceTrainer, TrainerWrapper, WordPieceTrainer); impl_enum_from!(UnigramTrainer, TrainerWrapper, UnigramTrainer); impl_enum_from!(WordLevelTrainer, TrainerWrapper, WordLevelTrainer); #[cfg(test)] mod tests { use super::*; use crate::models::bpe::{BpeBuilder, Vocab}; #[test] fn trainer_wrapper_train_model_wrapper() { let trainer = TrainerWrapper::BpeTrainer(BpeTrainer::default()); let mut model = ModelWrapper::Unigram(Unigram::default()); let result = trainer.train(&mut model); assert!(result.is_err()); } #[test] fn incomplete_ordered_vocab() { let vocab_r: AHashMap = AHashMap::from([(0, "Hi".to_string()), (2, "There".to_string())]); let ordered = OrderedVocabIter::new(&vocab_r); let serialized = serde_json::to_string(&ordered).unwrap(); assert_eq!(serialized, "{\"Hi\":0,\"There\":2}"); } #[test] fn serialization() { let vocab: Vocab = [ ("".into(), 0), ("a".into(), 1), ("b".into(), 2), ("ab".into(), 3), ] .iter() .cloned() .collect(); let bpe = BpeBuilder::default() .vocab_and_merges(vocab, vec![("a".to_string(), "b".to_string())]) .unk_token("".to_string()) .ignore_merges(true) .build() .unwrap(); let model = ModelWrapper::BPE(bpe); let legacy = r#"{"type":"BPE","dropout":null,"unk_token":"","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"byte_fallback":false,"ignore_merges":true,"vocab":{"":0,"a":1,"b":2,"ab":3},"merges":["a b"]}"#; let legacy = serde_json::from_str(legacy).unwrap(); assert_eq!(model, legacy); let data = serde_json::to_string(&model).unwrap(); assert_eq!( data, r#"{"type":"BPE","dropout":null,"unk_token":"","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"byte_fallback":false,"ignore_merges":true,"vocab":{"":0,"a":1,"b":2,"ab":3},"merges":[["a","b"]]}"# ); let reconstructed = serde_json::from_str(&data).unwrap(); assert_eq!(model, reconstructed); // Legacy check, type is not necessary. let legacy = r#"{"dropout":null,"unk_token":"","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"byte_fallback":false,"ignore_merges":true,"vocab":{"":0,"a":1,"b":2,"ab":3},"merges":["a b"]}"#; let reconstructed = serde_json::from_str(legacy).unwrap(); assert_eq!(model, reconstructed); let invalid = r#"{"type":"BPE","dropout":null,"unk_token":"","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"byte_fallback":false,"ignore_merges":true,"vocab":{"":0,"a":1,"b":2,"ab":3},"merges":["a b c"]}"#; let reconstructed: std::result::Result = serde_json::from_str(invalid); match reconstructed { Err(err) => assert_eq!(err.to_string(), "Merges text file invalid at line 1"), _ => panic!("Expected an error here"), } } } ================================================ FILE: tokenizers/src/models/unigram/lattice.rs ================================================ use dary_heap::QuaternaryHeap; use rand::distr::weighted::WeightedIndex; use rand::{prelude::*, rng}; use std::cell::RefCell; use std::cmp::{min, Ordering}; use std::rc::Rc; type NodeRef = Rc>; type HypothesisRef = Rc>; type Agenda = QuaternaryHeap; struct Hypothesis { node_ref: NodeRef, next: Option, fx: f64, gx: f64, } impl Hypothesis { pub fn new(node_ref: NodeRef, next: Option, fx: f64, gx: f64) -> Self { Self { node_ref, next, fx, gx, } } } impl PartialEq for Hypothesis { fn eq(&self, other: &Self) -> bool { self.fx == other.fx } } impl Eq for Hypothesis {} impl PartialOrd for Hypothesis { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } // TODO Maybe use Ordered Floats (https://docs.rs/ordered-float/1.0.2/ordered_float/) impl Ord for Hypothesis { fn cmp(&self, other: &Self) -> Ordering { if self.fx < other.fx { Ordering::Less } else { Ordering::Greater } } } /// Structure to implement Viterbi algorithm to find the best encoding, or sample /// from all possible encodings of a given sentence. #[derive(Debug)] pub struct Lattice<'a> { pub(super) sentence: &'a str, len: usize, nodes: Vec, pub(super) begin_nodes: Vec>, pub(super) end_nodes: Vec>, _bos_id: usize, _eos_id: usize, } impl std::fmt::Display for Lattice<'_> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { let display_pieces = |nodes: &Vec>| { nodes .iter() .map(|l| { l.iter() .map(|n| self.piece(&n.borrow())) .collect::>() }) .collect::>() }; f.debug_struct("Lattice") .field("sentence", &self.sentence) .field("begin_nodes", &display_pieces(&self.begin_nodes)) .field("end_nodes", &display_pieces(&self.end_nodes)) .finish() } } /// A node from the lattice, that helps reconstruct the underlying `String` #[derive(Debug, Clone)] pub struct Node { // Vocabulary id pub(super) id: usize, // Local lattice identifier pub(super) node_id: usize, pos: usize, length: usize, prev: Option, backtrace_score: f64, score: f64, } impl PartialEq for Node { fn eq(&self, other: &Node) -> bool { self.id == other.id } } impl Node { pub fn new(id: usize, node_id: usize, pos: usize, length: usize, score: f64) -> Self { Self { id, node_id, pos, length, prev: None, score, backtrace_score: 0.0, } } } /// Returns log(exp(x) + exp(y)). /// if init_mode is true, returns log(exp(y)) == y. /// log(\sum_i exp(a[i])) can be computed as /// for (int i = 0; i < a.size(); ++i) /// x = LogSumExp(x, a[i], i == 0); fn log_sum_exp(x: f64, y: f64, init_mode: bool) -> f64 { if init_mode { y } else { let (vmin, vmax) = if x > y { (y, x) } else { (x, y) }; let k_minus_log_epsilon = 50.0; if vmax > vmin + k_minus_log_epsilon { vmax } else { vmax + ((vmin - vmax).exp() + 1.0).ln() } } } impl<'a> Lattice<'a> { pub fn from(sentence: &'a str, bos_id: usize, eos_id: usize) -> Self { let len = sentence.len(); let k_reserved_node_size = 16; // We are adding 2 tokens, bos and eos let mut nodes: Vec = Vec::with_capacity(k_reserved_node_size); let mut begin_nodes = vec![Vec::with_capacity(k_reserved_node_size); len + 1]; let mut end_nodes = vec![Vec::with_capacity(k_reserved_node_size); len + 1]; let bos = Rc::new(RefCell::new(Node::new(bos_id, 0, 0, 0, 0.0))); let eos = Rc::new(RefCell::new(Node::new(eos_id, 1, len, 0, 0.0))); begin_nodes[len].push(Rc::clone(&eos)); end_nodes[0].push(Rc::clone(&bos)); nodes.push(bos); nodes.push(eos); Self { sentence, len, nodes, begin_nodes, end_nodes, _bos_id: bos_id, _eos_id: eos_id, } } pub fn insert(&mut self, pos: usize, length: usize, score: f64, id: usize) { let node_id = self.nodes.len(); let node = Rc::new(RefCell::new(Node::new(id, node_id, pos, length, score))); self.begin_nodes[pos].push(Rc::clone(&node)); self.end_nodes[pos + length].push(Rc::clone(&node)); self.nodes.push(node); } pub fn viterbi(&mut self) -> Vec { let len = self.len; let mut pos = 0; while pos <= len { if self.begin_nodes[pos].is_empty() { return vec![]; } for rnode in &self.begin_nodes[pos] { rnode.borrow_mut().prev = None; let mut best_score = 0.0; let mut best_node: Option = None; for lnode in &self.end_nodes[pos] { let score = lnode.borrow().backtrace_score + rnode.borrow().score; if best_node.is_none() || score > best_score { // TODO can we remove this clone ? best_node = Some(lnode.clone()); best_score = score } } match best_node { Some(bnode) => { rnode.borrow_mut().prev = Some(Rc::clone(&bnode)); rnode.borrow_mut().backtrace_score = best_score; } None => return vec![], } } if let Some(c) = self.sentence[pos..].chars().next() { pos += c.len_utf8(); } else { break; } } let mut results: Vec = vec![]; let root = self.begin_nodes[len][0].borrow(); let prev = root.prev.as_ref(); if prev.is_none() { return vec![]; } let mut node: NodeRef = prev.unwrap().clone(); while node.borrow().prev.is_some() { results.push(node.clone()); let n = node.borrow().clone(); node = n.prev.as_ref().unwrap().clone(); } results.reverse(); results } pub fn piece(&self, node: &Node) -> String { self.sentence[node.pos..node.pos + node.length].to_owned() } pub fn tokens(&mut self) -> Vec { self.viterbi() .iter() .map(|node| self.piece(&node.borrow())) .collect() } pub fn nbest(&mut self, n: usize) -> Vec> { match n { 0 => vec![], 1 => vec![self.viterbi()], _ => { // let k_reserved_hypothesis_size = 512; let mut agenda: Agenda = QuaternaryHeap::new(); let mut hypotheses: Vec> = vec![]; let eos = self.eos_node(); let score = eos.borrow().score; let hypo = Hypothesis::new(eos, None, score, score); agenda.push(hypo); // Fill backtrace scores self.viterbi(); while !agenda.is_empty() { let top = Rc::new(RefCell::new(agenda.pop().unwrap())); let node = Rc::clone(&top.borrow().node_ref); if node.borrow().id == self.bos_node().borrow().id { let mut hypothesis = vec![]; let mut next: HypothesisRef = Rc::clone(top.borrow().next.as_ref().unwrap()); while next.borrow().next.is_some() { hypothesis.push(next.borrow().node_ref.clone()); let c: HypothesisRef = next.clone(); // let c: Ref = next.clone().borrow(); next = Rc::clone(c.borrow().next.as_ref().unwrap()); } hypotheses.push(hypothesis); if hypotheses.len() == n { return hypotheses; } } else { for lnode in &self.end_nodes[node.borrow().pos] { let top_gx = top.borrow().gx; let fx = lnode.borrow().backtrace_score + top_gx; let gx = lnode.borrow().score + top_gx; let hyp = Hypothesis::new(Rc::clone(lnode), Some(Rc::clone(&top)), fx, gx); agenda.push(hyp); } // When the input is too long or contains duplicated phrases, // `agenda` will get extremely big. Here we avoid this case by // dynamically shrinking the agenda. let k_max_agenda_size = 100_000; let k_min_agenda_size = 512; if agenda.len() > k_max_agenda_size { let mut new_agenda = QuaternaryHeap::new(); let len = min(k_min_agenda_size, n * 10); for _i in 0..len { new_agenda.push(agenda.pop().unwrap()); } agenda = new_agenda; } } } hypotheses } } } pub fn nbest_tokens(&mut self, n: usize) -> Vec> { self.nbest(n) .iter() .map(|v| v.iter().map(|node| self.piece(&node.borrow())).collect()) .collect() } pub fn len(&self) -> usize { self.len } pub fn is_empty(&self) -> bool { self.len == 0 } pub fn bos_node(&self) -> NodeRef { Rc::clone(&self.end_nodes[0][0]) } pub fn eos_node(&self) -> NodeRef { Rc::clone(&self.begin_nodes[self.len][0]) } pub fn surface(&self, n: usize) -> &str { match self.sentence.char_indices().nth(n) { Some((pos, _)) => &self.sentence[pos..], None => "", } } pub fn sentence(&self) -> &str { self.sentence } pub fn populate_marginal(&self, freq: f64, expected: &mut [f64]) -> f64 { let len = self.len(); let n_nodes = self.nodes.len(); let mut alpha = vec![0.0; n_nodes]; let mut beta = vec![0.0; n_nodes]; for pos in 0..=len { for rnode in &self.begin_nodes[pos] { for lnode in &self.end_nodes[pos] { let lid = lnode.borrow().node_id; let rid = rnode.borrow().node_id; alpha[rid] = log_sum_exp( alpha[rid], lnode.borrow().score + alpha[lid], *lnode == self.end_nodes[pos][0], ); } } } for pos in (0..=len).rev() { // let rpos = len - pos; for lnode in &self.end_nodes[pos] { for rnode in &self.begin_nodes[pos] { let lid = lnode.borrow().node_id; let rid = rnode.borrow().node_id; beta[lid] = log_sum_exp( beta[lid], rnode.borrow().score + beta[rid], *rnode == self.begin_nodes[pos][0], ); } } } let eos_id = self.begin_nodes[len][0].borrow().node_id; let z = alpha[eos_id]; for pos in 0..len { for node in &self.begin_nodes[pos] { let node_id = node.borrow().node_id; let id = node.borrow().id; let a = alpha[node_id]; let b = beta[node_id]; let total = a + node.borrow().score + b - z; let update = freq * total.exp(); expected[id] += update; } } freq * z } pub fn sample(&self, theta: f64) -> Vec { let len = self.len(); if len == 0 { return vec![]; } let mut alpha = vec![0.0; self.nodes.len()]; for pos in 0..=len { for rnode in &self.begin_nodes[pos] { for lnode in &self.end_nodes[pos] { let lid = lnode.borrow().node_id; let rid = rnode.borrow().node_id; alpha[rid] = log_sum_exp( alpha[rid], theta * (lnode.borrow().score + alpha[lid]), *lnode == self.end_nodes[pos][0], ); } } } let mut rng = rng(); let mut results: Vec = vec![]; let mut probs: Vec = vec![]; let mut z = alpha[self.eos_node().borrow().node_id]; let mut node = self.eos_node(); loop { probs.clear(); let pos = node.borrow().pos; for lnode in &self.end_nodes[pos] { let lid = lnode.borrow().node_id; probs.push((alpha[lid] + theta * lnode.borrow().score - z).exp()) } let dist = WeightedIndex::new(&probs).unwrap(); let index = dist.sample(&mut rng); node = Rc::clone(&self.end_nodes[pos][index]); if node == self.bos_node() { break; } z = alpha[node.borrow().node_id]; results.push(Rc::clone(&node)); } results.reverse(); results } pub fn sample_token(&self, theta: f64) -> Vec { self.sample(theta) .iter() .map(|node| self.piece(&node.borrow())) .collect() } } #[cfg(test)] mod tests { use super::*; use assert_approx_eq::assert_approx_eq; #[test] fn set_sentence() { let lattice = Lattice::from("", 1, 2); assert_eq!(lattice.len(), 0); let lattice = Lattice::from("", 1, 2); assert_eq!(lattice.len(), 0); assert_eq!(lattice.sentence(), ""); assert_eq!(lattice.surface(0), ""); let lattice = Lattice::from("test", 1, 2); assert_eq!(lattice.len(), 4); assert_eq!(lattice.sentence(), "test"); assert_eq!(lattice.surface(0), "test"); assert_eq!(lattice.surface(1), "est"); assert_eq!(lattice.surface(2), "st"); assert_eq!(lattice.surface(3), "t"); let bos = lattice.bos_node(); let eos = lattice.eos_node(); assert_eq!(bos.borrow().id, 1); assert_eq!(eos.borrow().id, 2); assert_eq!( lattice.end_nodes[0].first().unwrap().borrow().id, bos.borrow().id ); assert_eq!( lattice.begin_nodes[4].first().unwrap().borrow().id, eos.borrow().id ); let lattice = Lattice::from("テストab", 1, 2); assert_eq!(lattice.len(), 11); assert_eq!(lattice.sentence(), "テストab"); assert_eq!(lattice.surface(0), "テストab"); assert_eq!(lattice.surface(1), "ストab"); assert_eq!(lattice.surface(2), "トab"); assert_eq!(lattice.surface(3), "ab"); assert_eq!(lattice.surface(4), "b"); } #[test] fn insert_test() { let mut lattice = Lattice::from("ABあい", 1, 2); lattice.insert(0, 1, 0.0, 3); lattice.insert(1, 1, 0.0, 4); lattice.insert(2, 3, 0.0, 5); lattice.insert(5, 3, 0.0, 6); lattice.insert(0, 2, 0.0, 7); lattice.insert(1, 4, 0.0, 8); lattice.insert(2, 6, 0.0, 9); // 0 & 1 are bos and eos let node0 = lattice.nodes[2].borrow(); let node1 = lattice.nodes[3].borrow(); let node2 = lattice.nodes[4].borrow(); let node3 = lattice.nodes[5].borrow(); let node4 = lattice.nodes[6].borrow(); let node5 = lattice.nodes[7].borrow(); let node6 = lattice.nodes[8].borrow(); assert_eq!(lattice.piece(&node0), "A"); assert_eq!(lattice.piece(&node1), "B"); assert_eq!(lattice.piece(&node2), "あ"); assert_eq!(lattice.piece(&node3), "い"); assert_eq!(lattice.piece(&node4), "AB"); assert_eq!(lattice.piece(&node5), "Bあ"); assert_eq!(lattice.piece(&node6), "あい"); assert_eq!(node0.pos, 0); assert_eq!(node1.pos, 1); assert_eq!(node2.pos, 2); assert_eq!(node3.pos, 5); assert_eq!(node4.pos, 0); assert_eq!(node5.pos, 1); assert_eq!(node6.pos, 2); assert_eq!(node0.length, 1); assert_eq!(node1.length, 1); assert_eq!(node2.length, 3); assert_eq!(node3.length, 3); assert_eq!(node4.length, 2); assert_eq!(node5.length, 4); assert_eq!(node6.length, 6); assert_eq!(lattice.bos_node().borrow().id, 1); assert_eq!(lattice.eos_node().borrow().id, 2); assert_eq!(node0.id, 3); assert_eq!(node1.id, 4); assert_eq!(node2.id, 5); assert_eq!(node3.id, 6); assert_eq!(node4.id, 7); assert_eq!(node5.id, 8); assert_eq!(node6.id, 9); assert_eq!(lattice.begin_nodes[0].len(), 2); assert_eq!(lattice.begin_nodes[1].len(), 2); assert_eq!(lattice.begin_nodes[2].len(), 2); assert_eq!(lattice.begin_nodes[5].len(), 1); assert_eq!(lattice.begin_nodes[8].len(), 1); assert_eq!(lattice.end_nodes[0].len(), 1); assert_eq!(lattice.end_nodes[1].len(), 1); assert_eq!(lattice.end_nodes[2].len(), 2); assert_eq!(lattice.end_nodes[5].len(), 2); assert_eq!(lattice.end_nodes[8].len(), 2); assert_eq!(lattice.begin_nodes[0][0].borrow().id, node0.id); assert_eq!(lattice.begin_nodes[0][1].borrow().id, node4.id); assert_eq!(lattice.begin_nodes[1][0].borrow().id, node1.id); assert_eq!(lattice.begin_nodes[1][1].borrow().id, node5.id); assert_eq!(lattice.begin_nodes[2][0].borrow().id, node2.id); assert_eq!(lattice.begin_nodes[2][1].borrow().id, node6.id); assert_eq!(lattice.begin_nodes[5][0].borrow().id, node3.id); assert_eq!( lattice.eos_node().borrow().id, lattice.begin_nodes[8][0].borrow().id ); assert_eq!( lattice.bos_node().borrow().id, lattice.end_nodes[0][0].borrow().id ); assert_eq!(node0.id, lattice.end_nodes[1][0].borrow().id); assert_eq!(node1.id, lattice.end_nodes[2][0].borrow().id); assert_eq!(node4.id, lattice.end_nodes[2][1].borrow().id); assert_eq!(node2.id, lattice.end_nodes[5][0].borrow().id); assert_eq!(node5.id, lattice.end_nodes[5][1].borrow().id); assert_eq!(node3.id, lattice.end_nodes[8][0].borrow().id); assert_eq!(node6.id, lattice.end_nodes[8][1].borrow().id); } #[test] fn test_viterbi() { let mut lattice = Lattice::from("ABC", 1, 2); assert_eq!(lattice.viterbi(), vec![]); // Still incomplete lattice.insert(0, 1, 0.0, 3); assert_eq!(lattice.viterbi(), vec![]); lattice.insert(1, 1, 0.0, 4); lattice.insert(2, 1, 0.0, 5); // XXX: In sentence piece this is not tested, still incomplete ? assert_eq!(lattice.viterbi().len(), 3); } #[test] fn test_viterbi2() { let mut lattice = Lattice::from("ABC", 1, 2); lattice.insert(0, 1, 0.0, 3); lattice.insert(1, 1, 0.0, 4); lattice.insert(2, 1, 0.0, 5); assert_eq!(lattice.tokens(), ["A", "B", "C"]); lattice.insert(0, 2, 2.0, 6); assert_eq!(lattice.tokens(), ["AB", "C"]); lattice.insert(1, 2, 5.0, 7); assert_eq!(lattice.tokens(), ["A", "BC"]); lattice.insert(0, 3, 10.0, 8); assert_eq!(lattice.tokens(), ["ABC"]); } #[test] fn test_nbest() { let mut lattice = Lattice::from("ABC", 1, 2); lattice.insert(0, 1, 0.0, 3); lattice.insert(1, 1, 0.0, 4); lattice.insert(2, 1, 0.0, 5); lattice.insert(0, 2, 2.0, 6); lattice.insert(1, 2, 5.0, 7); lattice.insert(0, 3, 10.0, 8); let nbests = lattice.nbest_tokens(10); assert_eq!( nbests, vec![ vec!["ABC"], vec!["A", "BC"], vec!["AB", "C"], vec!["A", "B", "C"] ] ); assert!(lattice.nbest_tokens(0).is_empty()); assert_eq!(lattice.nbest_tokens(1), vec![vec!["ABC"]]); } #[test] fn test_log_sum_exp() { let mut x = 0.0; let v: Vec = vec![1.0, 2.0, 3.0]; for (i, y) in v.iter().enumerate() { x = log_sum_exp(x, *y, i == 0); } assert_approx_eq!(x, v.iter().map(|n| n.exp()).sum::().ln(), 0.001); } #[test] fn test_populate() { let mut lattice = Lattice::from("ABC", 1, 2); lattice.insert(0, 1, 1.0, 3); // A lattice.insert(1, 1, 1.2, 4); // B lattice.insert(2, 1, 2.5, 5); // C lattice.insert(0, 2, 3.0, 6); // AB lattice.insert(1, 2, 4.0, 7); // BC lattice.insert(0, 3, 2.0, 8); // ABC let mut probs = vec![0.0; 9]; let p1 = (1.0_f64 + 1.2 + 2.5).exp(); let p2 = (3.0_f64 + 2.5).exp(); let p3 = (1.0_f64 + 4.0).exp(); let p4 = 2.0_f64.exp(); let z = p1 + p2 + p3 + p4; let log_z = lattice.populate_marginal(1.0, &mut probs); assert_approx_eq!(log_z, z.ln(), 0.001); assert_approx_eq!(probs[0], 0.0, 0.001); assert_approx_eq!(probs[1], 0.0, 0.001); assert_approx_eq!(probs[2], 0.0, 0.001); assert_approx_eq!(probs[3], (p1 + p3) / z, 0.001); assert_approx_eq!(probs[4], (p1) / z, 0.001); assert_approx_eq!(probs[5], (p1 + p2) / z, 0.001); assert_approx_eq!(probs[6], (p2) / z, 0.001); assert_approx_eq!(probs[7], (p3) / z, 0.001); assert_approx_eq!(probs[8], (p4) / z, 0.001); } } ================================================ FILE: tokenizers/src/models/unigram/mod.rs ================================================ //! [Unigram](https://arxiv.org/abs/1804.10959) model. mod lattice; mod model; mod serialization; mod trainer; mod trie; pub use lattice::*; pub use model::*; pub use trainer::*; ================================================ FILE: tokenizers/src/models/unigram/model.rs ================================================ use super::{ lattice::Lattice, trainer::UnigramTrainer, trie::{Trie, TrieBuilder}, }; use crate::tokenizer::{Model, Result, Token}; use crate::utils::cache::{Cache, MAX_LENGTH}; use std::collections::HashMap; use ahash::AHashMap; use std::convert::TryInto; use std::fs::read_to_string; use std::path::{Path, PathBuf}; type TokenMap = AHashMap; type Vocab = Vec<(String, f64)>; /// A `Unigram` model to encode sentences. pub struct Unigram { token_to_ids: TokenMap, pub(crate) vocab: Vocab, cache: Cache>, trie: Trie, pub min_score: f64, pub(super) unk_id: Option, pub(super) bos_id: usize, pub(super) eos_id: usize, fuse_unk: bool, is_optimized: bool, byte_fallback: bool, } impl PartialEq for Unigram { fn eq(&self, other: &Self) -> bool { self.unk_id == other.unk_id && self.vocab == other.vocab } } impl Clone for Unigram { // `Clone` can't be derive because it's not implemented for `Cache`. // To keep things simple when we clone, the new Unigram will start with a fresh cache. fn clone(&self) -> Self { let fresh_cache = self.cache.fresh(); Self { vocab: self.vocab.clone(), cache: fresh_cache, token_to_ids: self.token_to_ids.clone(), trie: self.trie.clone(), min_score: self.min_score, unk_id: self.unk_id, bos_id: self.bos_id, eos_id: self.eos_id, fuse_unk: self.fuse_unk, is_optimized: self.is_optimized, byte_fallback: self.byte_fallback, } } } impl std::fmt::Debug for Unigram { fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { fmt.debug_struct("Unigram") .field("vocab", &self.vocab.len()) .field("unk_id", &self.unk_id) .field("byte_fallback", &self.byte_fallback) .finish() } } static K_UNK_PENALTY: f64 = 10.0; #[derive(thiserror::Error, Debug)] pub enum UnigramError { #[error("The vocabulary is empty but at least is needed")] EmptyVocabulary, #[error("The `unk_id` is larger than vocabulary size")] UnkIdNotInVocabulary, #[error("Encountered an unknown token but `unk_id` is missing")] MissingUnkId, } impl Default for Unigram { fn default() -> Self { let vocab = vec![("".to_string(), 0.0)]; Self::from(vocab, Some(0), false).unwrap() } } impl Unigram { /// Create a `Unigram` model from a given vocabulary. /// Vocabulary are the various tokens and their associated score which is a sort of a logprob of /// their frequency, which will enable tokenization and sampling. /// unk_id, is the index within the vocabulary. /// For now `Unigram` *requires* at least `unk` because we might find a never seen char. /// Further versions might allow that part to be hidden. pub fn from( vocab: Vec<(String, f64)>, unk_id: Option, byte_fallback: bool, ) -> Result { let n = vocab.len(); let mut token_to_ids: TokenMap = AHashMap::new(); let mut builder = TrieBuilder::default(); if let Some(unk_id) = unk_id { if vocab.is_empty() { return Err(Box::new(UnigramError::EmptyVocabulary)); } if unk_id >= vocab.len() { return Err(Box::new(UnigramError::UnkIdNotInVocabulary)); } } let bos_id = n + 1; let eos_id = n + 2; let mut min_score = f64::INFINITY; for (id, (token, score)) in vocab.iter().enumerate() { token_to_ids.insert(token.to_string(), id as u32); builder.push(token.as_bytes()); if score < &min_score { min_score = *score; } } let trie = builder.build(); let fuse_unk = true; let is_optimized = true; Ok(Self { vocab, token_to_ids, trie, min_score, bos_id, eos_id, unk_id, fuse_unk, cache: Cache::default(), is_optimized, byte_fallback, }) } #[cfg(test)] pub(super) fn set_fuse_unk(&mut self, fuse_unk: bool) { self.fuse_unk = fuse_unk; self.cache = self.cache.fresh(); } #[cfg(test)] pub(super) fn set_optimized(&mut self, is_optimized: bool) { self.is_optimized = is_optimized; } pub fn byte_fallback(&self) -> bool { self.byte_fallback } pub(super) fn len(&self) -> usize { self.vocab.len() } pub(super) fn populate_nodes(&self, lattice: &mut Lattice) { let unk_score = self.min_score - K_UNK_PENALTY; let len = lattice.len(); let mut begin_pos = 0; while begin_pos < len { let mblen = lattice.sentence[begin_pos..] .chars() .next() .unwrap() .len_utf8(); let mut has_single_node = false; for bytes in self .trie .common_prefix_search(lattice.sentence.bytes().skip(begin_pos)) { let n = bytes.len(); let tok = String::from_utf8(bytes).unwrap(); let id = *self.token_to_ids.get(&tok).unwrap(); let item = &self.vocab[id as usize]; assert_eq!(item.0, tok); let score: f64 = item.1; lattice.insert(begin_pos, n, score, id.try_into().unwrap()); if !has_single_node && n == mblen { has_single_node = true; } } if !has_single_node { if let Some(unk_id) = self.unk_id { lattice.insert(begin_pos, mblen, unk_score, unk_id); } } begin_pos += mblen } } /// This functions take a String, and will encode it in a Vec of Strings, /// of the best tokenization available to the current model. /// ``` /// use tokenizers::models::unigram::Unigram; /// /// let pieces = vec![ /// ("".to_string(), 0.0), /// ("a".to_string(), 0.0), /// ("b".to_string(), 0.0), /// ("c".to_string(), 0.0), /// ("d".to_string(), 0.0), /// ("cd".to_string(), 1.0), /// ("ab".to_string(), 2.0), /// ("abc".to_string(), 5.0), /// ("abcd".to_string(), 10.0), /// ]; /// let model = Unigram::from(pieces, Some(0), false).unwrap(); /// let result = model.encode("abcdacdxx").unwrap(); /// assert_eq!(result, vec!["abcd", "a", "cd", "xx"]); /// ``` pub fn encode(&self, sentence: &str) -> Result> { if sentence.is_empty() { return Ok(vec![]); } if let Some(result) = self.cache.get(sentence) { Ok(result.to_vec()) } else { let result = if self.is_optimized { self.encode_optimized(sentence)? } else { self.encode_unoptimized(sentence)? }; if sentence.len() < MAX_LENGTH { self.cache.set(sentence.to_owned(), result.clone()); } Ok(result) } } fn encode_optimized(&self, sentence: &str) -> Result> { // https://github.com/google/sentencepiece/blob/d48247191a6d50e469ed1a4a36e877befffd1851/src/unigram_model.cc#L600 #[derive(Debug, Clone)] struct BestPathNode { /// The vocab id. (maybe UNK) id: usize, /// The total score of the best path ending at this node. best_path_score: f64, /// The starting position (in utf-8) of this node. The entire best /// path can be constructed by backtracking along this link. starts_at: Option, } impl Default for BestPathNode { fn default() -> Self { Self { id: 0, best_path_score: 0.0, starts_at: None, } } } let size = sentence.len(); let unk_score = self.min_score - K_UNK_PENALTY; let mut best_path_ends_at = vec![BestPathNode::default(); size + 1]; let mut starts_at = 0; while starts_at < size { let best_path_score_till_here = best_path_ends_at[starts_at].best_path_score; let mut has_single_node = false; let mblen = sentence[starts_at..].chars().next().unwrap().len_utf8(); for tok_bytes in self .trie .common_prefix_search(sentence.bytes().skip(starts_at)) { let key_pos = starts_at + tok_bytes.len(); let token: String = String::from_utf8(tok_bytes).unwrap(); let target_node = &mut best_path_ends_at[key_pos]; let length = key_pos - starts_at; let id = self.token_to_ids.get(&token).unwrap(); let score = self.vocab.get(*id as usize).unwrap().1; let candidate_best_path_score = score + best_path_score_till_here; if target_node.starts_at.is_none() || candidate_best_path_score > target_node.best_path_score { target_node.best_path_score = candidate_best_path_score; target_node.starts_at = Some(starts_at); target_node.id = *id as usize; } if !has_single_node && length == mblen { has_single_node = true; } } if !has_single_node { let target_node = &mut best_path_ends_at[starts_at + mblen]; let candidate_best_path_score = unk_score + best_path_score_till_here; if target_node.starts_at.is_none() || candidate_best_path_score > target_node.best_path_score { target_node.best_path_score = candidate_best_path_score; target_node.starts_at = Some(starts_at); target_node.id = self.unk_id.ok_or(UnigramError::MissingUnkId)?; } } starts_at += mblen } let mut ends_at = size; let mut results: Vec = vec![]; let mut token = vec![]; while ends_at > 0 { let node = &best_path_ends_at[ends_at]; let starts_at = node.starts_at.unwrap(); if self.fuse_unk && Some(node.id) == self.unk_id { token.push(sentence[starts_at..ends_at].to_string()); } else { if !token.is_empty() { token.reverse(); results.push(token.concat()); token = vec![]; } results.push(sentence[starts_at..ends_at].to_string()); } ends_at = starts_at; } if !token.is_empty() { token.reverse(); results.push(token.concat()); } results.reverse(); Ok(results) } fn encode_unoptimized(&self, sentence: &str) -> Result> { let mut lattice = Lattice::from(sentence, self.bos_id, self.eos_id); self.populate_nodes(&mut lattice); if self.fuse_unk { let mut results = vec![]; let mut token = String::new(); for node in lattice.viterbi().iter() { let item = lattice.piece(&node.borrow()); if node.borrow().id == self.unk_id.ok_or(UnigramError::MissingUnkId)? { token.push_str(&item); } else { if !token.is_empty() { results.push(token); token = String::new(); } results.push(item); } } if !token.is_empty() { results.push(token); } Ok(results) } else { Ok(lattice.tokens()) } } /// Iterate of vocabulary of the model as a pair of `(token, score)`. pub fn iter(&self) -> UnigramIterator<'_> { UnigramIterator { model: self, i: 0 } } /// Loads a SentencePiece output model after being trained by tokenizers. /// After that you can use the model with tokenizers library. /// ```no_run /// use tokenizers::models::unigram::Unigram; /// use std::path::Path; /// /// let model = Unigram::load("mymodel-unigram.json").unwrap(); /// ``` pub fn load>(path: P) -> Result { let string = read_to_string(path)?; Ok(serde_json::from_str(&string)?) } /// Clears the internal cache pub fn clear_cache(&mut self) { self.cache.clear(); } /// Resize the cache pub fn resize_cache(&mut self, capacity: usize) { self.cache.resize(capacity); } } /// Iterator to iterate of vocabulary of the model, and their relative score. pub struct UnigramIterator<'a> { model: &'a Unigram, i: usize, } impl<'a> Iterator for UnigramIterator<'a> { type Item = &'a (String, f64); fn next(&mut self) -> Option { let i = self.i; if i < self.model.len() { let r = Some(&self.model.vocab[i]); self.i += 1; r } else { None } } } impl Model for Unigram { type Trainer = UnigramTrainer; fn get_vocab(&self) -> HashMap { self.token_to_ids.clone().into_iter().collect() } fn get_vocab_size(&self) -> usize { self.vocab.len() } fn tokenize(&self, sentence: &str) -> Result> { let str_tokens = self.encode(sentence)?; let mut offset = 0; let mut tokens = Vec::with_capacity(str_tokens.len()); for string in str_tokens { let len = string.len(); let offsets = (offset, offset + len); let id: u32 = match self.token_to_ids.get(&string) { Some(id) => *id, None => { if self.byte_fallback { let byte_tokens: Option> = string .bytes() .map(|byte| -> Option { let byte_string = format!("<0x{byte:02X}>"); let id = self.token_to_ids.get(&byte_string); id.map(|id| Token::new(*id, byte_string, (offset, offset + len))) }) .collect(); if let Some(byte_tokens) = byte_tokens { for token in byte_tokens { tokens.push(token); } offset += len; continue; } } self.unk_id.ok_or(UnigramError::MissingUnkId)? as u32 } }; offset += len; tokens.push(Token::new(id, string, offsets)); } Ok(tokens) } fn token_to_id(&self, token: &str) -> Option { self.token_to_ids.get(token).copied() } fn id_to_token(&self, id: u32) -> Option { self.vocab.get(id as usize).map(|item| item.0.clone()) } fn save(&self, folder: &Path, name: Option<&str>) -> Result> { let name = match name { Some(name) => format!("{name}-unigram.json"), None => "unigram.json".to_string(), }; let mut fullpath = PathBuf::new(); fullpath.push(folder); fullpath.push(name); let string = serde_json::to_string_pretty(self)?; std::fs::write(&fullpath, string)?; Ok(vec![fullpath]) } fn get_trainer(&self) -> Self::Trainer { UnigramTrainer::default() } } #[cfg(test)] mod tests { use super::*; #[test] fn test_populate_nodes_unk() { let pieces = vec![("".to_string(), 0.0)]; let model = Unigram::from(pieces, Some(0), false).unwrap(); let mut lattice = Lattice::from("abc", model.bos_id, model.eos_id); model.populate_nodes(&mut lattice); assert_eq!(lattice.begin_nodes[0].len(), 1); assert_eq!(lattice.begin_nodes[1].len(), 1); assert_eq!(lattice.begin_nodes[2].len(), 1); assert_eq!(lattice.begin_nodes[0][0].borrow().id, 0); assert_eq!(lattice.begin_nodes[1][0].borrow().id, 0); assert_eq!(lattice.begin_nodes[2][0].borrow().id, 0); assert_eq!(lattice.begin_nodes[0][0].borrow().node_id, 2); assert_eq!(lattice.begin_nodes[1][0].borrow().node_id, 3); assert_eq!(lattice.begin_nodes[2][0].borrow().node_id, 4); } #[test] fn test_populate_nodes() { let pieces = vec![ ("".to_string(), 0.0), ("a".to_string(), 0.1), ("b".to_string(), 0.2), ("ab".to_string(), 0.3), ("bc".to_string(), 0.4), ]; let model = Unigram::from(pieces, Some(0), false).unwrap(); let mut lattice = Lattice::from("abc", model.bos_id, model.eos_id); model.populate_nodes(&mut lattice); assert_eq!(lattice.begin_nodes[0].len(), 2); // a, ab assert_eq!(lattice.begin_nodes[1].len(), 2); // b, bc assert_eq!(lattice.begin_nodes[2].len(), 1); // c(unk) // Id is the vocabulary id from Unigram model // node_id is simply the rank of the given node in the lattice. assert_eq!(lattice.begin_nodes[0][0].borrow().id, 1); assert_eq!(lattice.begin_nodes[0][1].borrow().id, 3); assert_eq!(lattice.begin_nodes[1][0].borrow().id, 2); assert_eq!(lattice.begin_nodes[1][1].borrow().id, 4); assert_eq!(lattice.begin_nodes[2][0].borrow().id, 0); assert_eq!(lattice.begin_nodes[0][0].borrow().node_id, 2); assert_eq!(lattice.begin_nodes[0][1].borrow().node_id, 3); assert_eq!(lattice.begin_nodes[1][0].borrow().node_id, 4); assert_eq!(lattice.begin_nodes[1][1].borrow().node_id, 5); assert_eq!(lattice.begin_nodes[2][0].borrow().node_id, 6); } #[test] fn test_encode() { let sentencepieces = vec![ ("".to_string(), 0.0), ("a".to_string(), 0.0), ("b".to_string(), 0.0), ("c".to_string(), 0.0), ("d".to_string(), 0.0), ("cd".to_string(), 1.0), ("ab".to_string(), 2.0), ("abc".to_string(), 5.0), ("abcd".to_string(), 10.0), ]; let model = Unigram::from(sentencepieces, Some(0), false).unwrap(); let result = model.encode("abcd").unwrap(); assert_eq!(result, vec!["abcd"]); } #[test] fn test_encode2() { let sentencepieces = vec![ ("".to_string(), 0.0), ("ab".to_string(), 0.0), ("cd".to_string(), -0.1), ("abc".to_string(), -0.2), ("a".to_string(), -0.3), ("b".to_string(), -0.4), ("c".to_string(), -0.5), ("ABC".to_string(), -0.5), ("abcdabcd".to_string(), 20.0), // User defined just max the scores. ("q".to_string(), 20.5), ("r".to_string(), 20.5), ("qr".to_string(), -0.5), ]; let mut model = Unigram::from(sentencepieces, Some(0), false).unwrap(); for is_optimized in &[true, false] { model.set_optimized(*is_optimized); println!("IsOptimized {is_optimized:?}"); assert_eq!(model.encode("abc").unwrap(), vec!["abc"]); assert_eq!(model.encode("AB").unwrap(), vec!["AB"]); model.set_fuse_unk(false); assert_eq!(model.encode("AB").unwrap(), vec!["A", "B"]); model.set_fuse_unk(true); assert_eq!(model.encode("AB").unwrap(), vec!["AB"]); assert_eq!(model.encode("abcd").unwrap(), vec!["ab", "cd"]); assert_eq!(model.encode("abcc").unwrap(), vec!["abc", "c"]); assert_eq!( model.encode("xabcabaabcdd").unwrap(), vec!["x", "abc", "ab", "a", "ab", "cd", "d"] ); model.set_fuse_unk(false); assert_eq!( model.encode("xyz東京").unwrap(), vec!["x", "y", "z", "東", "京"] ); model.set_fuse_unk(true); assert_eq!(model.encode("xyz東京").unwrap(), vec!["xyz東京"]); // User encoded in original version assert_eq!(model.encode("ABC").unwrap(), vec!["ABC"]); assert_eq!(model.encode("abABCcd").unwrap(), vec!["ab", "ABC", "cd"]); assert_eq!( model.encode("ababcdabcdcd").unwrap(), vec!["ab", "abcdabcd", "cd"] ); assert_eq!(model.encode("abqrcd").unwrap(), vec!["ab", "q", "r", "cd"]); } } #[test] fn test_unigram_bytefallback() { // In [97]: processor.encode_as_pieces("⅐⅛⅑ ") // Out[97]: ['▁', '<0xE2>', '<0x85>', '<0x90>', '⅛', '<0xE2>', '<0x85>', '<0x91>', '▁'] let sentencepieces = vec![ ("".to_string(), 0.0), ("<0xC3>".to_string(), -0.01), ("<0xA9>".to_string(), -0.03), ]; let unigram = Unigram::from(sentencepieces, Some(0), true).unwrap(); let tokens: Vec = unigram.tokenize("é").unwrap(); assert_eq!( tokens, [ Token { id: 1, value: "<0xC3>".to_string(), offsets: (0, 2) }, Token { id: 2, value: "<0xA9>".to_string(), offsets: (0, 2) } ] ); let tokens = unigram.tokenize("?é").unwrap(); assert_eq!(tokens[0].id, 0); } } ================================================ FILE: tokenizers/src/models/unigram/serialization.rs ================================================ use super::model::Unigram; use serde::{ de::{Error, MapAccess, Visitor}, ser::SerializeStruct, Deserialize, Deserializer, Serialize, Serializer, }; impl Serialize for Unigram { fn serialize(&self, serializer: S) -> Result where S: Serializer, { let mut model = serializer.serialize_struct("Unigram", 3)?; model.serialize_field("type", "Unigram")?; model.serialize_field("unk_id", &self.unk_id)?; model.serialize_field("vocab", &self.vocab)?; model.serialize_field("byte_fallback", &self.byte_fallback())?; model.end() } } impl<'de> Deserialize<'de> for Unigram { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { deserializer.deserialize_struct( "Unigram", &["type", "vocab", "unk_id", "byte_fallback"], UnigramVisitor, ) } } struct UnigramVisitor; impl<'de> Visitor<'de> for UnigramVisitor { type Value = Unigram; fn expecting(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { write!(fmt, "struct Unigram") } fn visit_map(self, mut map: V) -> std::result::Result where V: MapAccess<'de>, { let mut vocab: Option> = None; let mut unk_id: Option = None; let mut byte_fallback: bool = false; while let Some(key) = map.next_key::()? { match key.as_ref() { "unk_id" => { unk_id = map.next_value()?; } "byte_fallback" => byte_fallback = map.next_value()?, "vocab" => vocab = Some(map.next_value()?), "type" => match map.next_value()? { "Unigram" => {} u => { return Err(serde::de::Error::invalid_value( serde::de::Unexpected::Str(u), &"Unigram", )) } }, _ => (), } } match (vocab, unk_id, byte_fallback) { (Some(vocab), unk_id, byte_fallback) => Ok(Unigram::from(vocab, unk_id, byte_fallback) .map_err(|err| Error::custom(format!("Unable to load vocab {err:?}")))?), (None, _, _) => Err(Error::custom("Missing vocab")), } } } #[cfg(test)] mod test { use super::*; #[test] fn test_serialization() { let vocab = vec![("".to_string(), 0.0), ("a".to_string(), -0.5)]; let model = Unigram::from(vocab, Some(0), false).unwrap(); let data = serde_json::to_string(&model).unwrap(); let reconstructed = serde_json::from_str(&data).unwrap(); assert_eq!(model, reconstructed); } #[test] fn test_serialization_unk_id_not_zero() { let vocab = vec![("a".to_string(), -0.5), ("".to_string(), 0.0)]; let model = Unigram::from(vocab, Some(1), false).unwrap(); let data = serde_json::to_string(&model).unwrap(); let reconstructed = serde_json::from_str(&data).unwrap(); assert_eq!(model, reconstructed); } #[test] fn test_serialization_no_unk_id() { let vocab = vec![("a".to_string(), -0.5)]; let model = Unigram::from(vocab, None, false).unwrap(); let data = serde_json::to_string(&model).unwrap(); let reconstructed = serde_json::from_str(&data).unwrap(); assert_eq!(model, reconstructed); } } ================================================ FILE: tokenizers/src/models/unigram/trainer.rs ================================================ use crate::models::unigram::{lattice::Lattice, model::Unigram}; use crate::tokenizer::{AddedToken, Result, Trainer}; use crate::utils::parallelism::*; use crate::utils::progress::{ProgressBar, ProgressStyle}; use ahash::{AHashMap, AHashSet}; use log::debug; use serde::{Deserialize, Serialize}; use std::cmp::Reverse; use std::convert::TryInto; // A token and a score type SentencePiece = (String, f64); // A full sentence or word + it's count within the dataset type Sentence = (String, u32); fn digamma(mut x: f64) -> f64 { let mut result = 0.0; while x < 7.0 { result -= 1.0 / x; x += 1.0; } x -= 1.0 / 2.0; let xx = 1.0 / x; let xx2 = xx * xx; let xx4 = xx2 * xx2; result += x.ln() + (1.0 / 24.0) * xx2 - 7.0 / 960.0 * xx4 + (31.0 / 8064.0) * xx4 * xx2 - (127.0 / 30720.0) * xx4 * xx4; result } #[derive(thiserror::Error, Debug)] pub enum UnigramTrainerError { #[error("The vocabulary is not large enough to contain all chars")] VocabularyTooSmall, } fn to_log_prob(pieces: &mut [SentencePiece]) { let sum: f64 = pieces.iter().map(|(_, score)| score).sum(); let logsum = sum.ln(); for (_, score) in pieces.iter_mut() { *score = score.ln() - logsum; } } /// A `UnigramTrainer` can train a `Unigram` model from `word_counts`. #[non_exhaustive] #[derive(Builder, Debug, Clone, Serialize, Deserialize)] pub struct UnigramTrainer { #[builder(default = "true")] pub show_progress: bool, #[builder(default = "8000")] pub vocab_size: u32, #[builder(default = "2")] pub n_sub_iterations: u32, #[builder(default = "0.75")] pub shrinking_factor: f64, #[builder(default = "vec![]")] pub special_tokens: Vec, #[builder(default = "AHashSet::new()")] pub initial_alphabet: AHashSet, #[builder(default = "None")] pub unk_token: Option, #[builder(default = "16")] pub max_piece_length: usize, #[builder(default = "1_000_000")] seed_size: usize, #[builder(default = "AHashMap::new()")] words: AHashMap, } impl Default for UnigramTrainer { fn default() -> Self { Self::builder().build().unwrap() } } impl UnigramTrainer { pub fn builder() -> UnigramTrainerBuilder { UnigramTrainerBuilder::default() } /// Setup a progress bar if asked to show progress fn setup_progress(&self) -> Option { if self.show_progress { let p = ProgressBar::new(0); p.set_style( ProgressStyle::default_bar() .template("[{elapsed_precise}] {msg:<30!} {wide_bar} {pos:<9!}/{len:>9!}") .expect("Invalid progress template"), ); Some(p) } else { None } } fn is_valid_sentencepiece(&self, char_string: &[char]) -> bool { // Checks string length // Space not in the substring, numbers, hiragana and more should be taken // care of within pre_tokenizers. // https://github.com/google/sentencepiece/blob/26be9516cd81d5315ee31c48d2438018e0eab879/src/trainer_interface.cc#L203 let n = char_string.len(); if char_string.is_empty() || n > self.max_piece_length { return false; } true } fn finalize(&self, model: Unigram, required_chars: AHashSet) -> Result { let mut min_score_penalty = 0.0; let min_score_penalty_delta = 0.0001; let mut pieces: Vec<(String, f64)> = vec![]; let mut inserted: AHashSet = AHashSet::new(); // We don't want to include the that was used to train inserted.insert("".into()); let existing_pieces: AHashMap = model.iter().cloned().collect(); for c in required_chars { if let Some(t) = existing_pieces.get(&c) { inserted.insert(c.clone()); pieces.push((c, *t)); } else { let score = model.min_score + min_score_penalty; inserted.insert(c.clone()); pieces.push((c, score)); min_score_penalty += min_score_penalty_delta; } } let (unk_id, need_add_unk) = if let Some(ref unk) = self.unk_token { let unk_id = self.special_tokens.iter().enumerate().find_map(|(i, t)| { if t.content == *unk { Some(i) } else { None } }); match unk_id { Some(id) => (Some(id), false), None => (Some(0), true), } } else { (None, false) }; let vocab_size_without_special_tokens = if need_add_unk { self.vocab_size as usize - self.special_tokens.len() - 1 } else { self.vocab_size as usize - self.special_tokens.len() }; for (token, score) in model.iter() { if inserted.contains::(token) { continue; } inserted.insert(token.to_string()); pieces.push((token.to_string(), if score.is_nan() { 0.0 } else { *score })); if pieces.len() == vocab_size_without_special_tokens { break; } } pieces.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap()); // Insert the necessary tokens let mut special_tokens = self .special_tokens .iter() .map(|t| (t.content.clone(), 0.0)) .collect::>(); if need_add_unk { special_tokens.insert(0, (self.unk_token.clone().unwrap(), 0.0)); } Unigram::from( special_tokens.into_iter().chain(pieces).collect(), unk_id, model.byte_fallback(), ) } fn required_chars(&self, word_counts: &[Sentence]) -> AHashSet { word_counts .iter() .flat_map(|(s, _count)| s.chars()) .chain(self.initial_alphabet.iter().copied()) .map(|c| c.to_string()) .collect() } fn make_seed_sentence_pieces( &self, sentences: &[Sentence], _progress: &Option, ) -> Vec { // Put all sentences in a string, separated by \0 let total: usize = sentences .iter() .map(|(s, _)| s.chars().count()) .sum::() + sentences.len(); let mut flat_string = String::with_capacity(total); let mut all_chars: AHashMap = AHashMap::new(); let c_sentence_boundary = '\0'; let k_sentence_boundary = '\0'.to_string(); for (string, n) in sentences { if string.is_empty() { continue; } flat_string.push_str(string); // XXX // Comment suggests we add sentence boundary, but it seems to be missing from actual // code in spm. flat_string.push_str(&k_sentence_boundary); for c in string.chars() { if c != c_sentence_boundary { *all_chars.entry(c).or_insert(0) += n; } } } flat_string.shrink_to_fit(); #[cfg(feature = "esaxx_fast")] let suffix = esaxx_rs::suffix(&flat_string).unwrap(); #[cfg(not(feature = "esaxx_fast"))] let suffix = esaxx_rs::suffix_rs(&flat_string).unwrap(); // Basic chars need to be in sentence pieces. let mut seed_sentencepieces: Vec = vec![]; let mut sall_chars: Vec<_> = all_chars.into_iter().map(|(a, b)| (b, a)).collect(); // Reversed order sall_chars.sort_by_key(|&a| Reverse(a)); let mut substr_index: Vec<_> = suffix .iter() .filter_map(|(string, freq)| { if string.len() <= 1 { return None; } if string.contains(&c_sentence_boundary) { return None; } if !self.is_valid_sentencepiece(string) { return None; } let score = freq * string.len() as u32; // if let Some(p) = &progress { // p.inc(1); // } Some((score, string)) }) .collect(); // Fill seed_sentencepieces for (count, character) in sall_chars { seed_sentencepieces.push((character.to_string(), count.into())); } // sort by decreasing score substr_index.sort_by_key(|&a| Reverse(a)); for (score, char_string) in substr_index { // Just in case assert!(self.is_valid_sentencepiece(char_string)); let string: String = char_string.iter().collect(); seed_sentencepieces.push((string, score.into())); if seed_sentencepieces.len() >= self.seed_size { break; } } to_log_prob(&mut seed_sentencepieces); seed_sentencepieces } fn prune_sentence_pieces( &self, model: &Unigram, pieces: &[SentencePiece], sentences: &[Sentence], ) -> Vec { let mut always_keep = vec![true; pieces.len()]; let mut alternatives: Vec> = vec![Vec::new(); pieces.len()]; let bos_id = pieces.len() + 1; let eos_id = pieces.len() + 2; // First, segments the current sentencepieces to know // how each sentencepiece is resegmented if this sentencepiece is removed // from the vocabulary. // To do so, we take the second best segmentation of sentencepiece[i]. // alternatives[i] stores the sequence of second best sentencepieces. for (id, (token, _score)) in pieces.iter().enumerate() { // Always keep unk. if id == 0 { always_keep[id] = false; continue; } let mut lattice = Lattice::from(token, bos_id, eos_id); model.populate_nodes(&mut lattice); let nbests = lattice.nbest(2); if nbests.len() == 1 { always_keep[id] = true; } else if nbests[0].len() >= 2 { always_keep[id] = false; } else if nbests[0].len() == 1 { always_keep[id] = true; for node in &nbests[1] { let alt_id = node.borrow().id; alternatives[id].push(alt_id); } } } // Second, segments all sentences to compute likelihood // with a unigram language model. inverted[i] stores // the set of sentence index where the sentencepieces[i] appears. let chunk_size = std::cmp::max(sentences.len() / current_num_threads(), 1); let indexed_sentences: Vec<(usize, &Sentence)> = sentences.iter().enumerate().collect(); let collected: (f64, Vec, Vec>) = indexed_sentences .maybe_par_chunks(chunk_size) .map(|enumerated_sentence_count_chunk| { let mut vsum = 0.0; let mut freq: Vec = vec![0.0; pieces.len()]; let mut inverted: Vec> = vec![Vec::new(); pieces.len()]; for (i, (sentence, count)) in enumerated_sentence_count_chunk { let mut lattice = Lattice::from(sentence, bos_id, eos_id); model.populate_nodes(&mut lattice); vsum += *count as f64; for node_ref in lattice.viterbi() { let id = node_ref.borrow().id; freq[id] += *count as f64; inverted[id].push(*i); } } (vsum, freq, inverted) }) .reduce( || (0.0, vec![0.0; pieces.len()], vec![Vec::new(); pieces.len()]), |(vsum, freq, inverted), (lvsum, lfreq, linverted)| { ( vsum + lvsum, freq.iter() .zip(lfreq) .map(|(global_el, local_el)| global_el + local_el) .collect(), inverted .iter() .zip(linverted) .map(|(global_el, local_el)| [&global_el[..], &local_el[..]].concat()) .collect(), ) }, ); let (vsum, freq, inverted) = collected; let sum: f64 = freq.iter().sum(); let logsum = sum.ln(); let mut candidates: Vec<(usize, f64)> = vec![]; let mut new_pieces: Vec = Vec::with_capacity(self.vocab_size as usize); new_pieces.push(pieces[0].clone()); // Finally, computes how likely the LM likelihood is reduced if // the sentencepiece[i] is removed from the vocabulary. // Since the exact computation of loss is difficult, we compute the // loss approximately by assuming that all sentencepiece[i] in the sentences // are replaced with alternatives[i] when sentencepiece[i] is removed. for (id, (token, score)) in pieces.iter().enumerate() { if id == 0 { continue; } if freq[id] == 0.0 && !always_keep[id] { // not found in Viterbi path. Can remove this entry safely. continue; } else if alternatives[id].is_empty() { // no alternatives. Keeps this entry. new_pieces.push((token.to_string(), *score)); } else { let mut f = 0.0; // the frequency of pieces[i]; for n in &inverted[id] { let score = sentences[*n].1 as f64; f += score; } // TODO: Temporary hack to avoid Nans. if f == 0.0 || f.is_nan() { // new_pieces.push((token.to_string(), *score)); continue; } f /= vsum; // normalizes by all sentence frequency. let logprob_sp = freq[id].ln() - logsum; // After removing the sentencepiece[i], its frequency freq[i] is // re-assigned to alternatives. // new_sum = current_sum - freq[i] + freq[i] * alternatives.size() // = current_sum + freq[i] (alternatives - 1) let logsum_alt = (sum + freq[id] * (alternatives.len() - 1) as f64).ln(); // The frequencies of alternatives are increased by freq[i]. let mut logprob_alt = 0.0; for n in &alternatives[id] { logprob_alt += (freq[*n] + freq[id]).ln() - logsum_alt; } // loss: the diff of likelihood after removing the sentencepieces[i]. let loss = f * (logprob_sp - logprob_alt); if loss.is_nan() { panic!(""); } candidates.push((id, loss)); } } let desired_vocab_size: usize = (self.vocab_size as usize * 11) / 10; // * 1.1 let pruned_size: usize = ((pieces.len() as f64) * self.shrinking_factor) as usize; let pruned_size = desired_vocab_size.max(pruned_size); candidates.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap()); for (id, _score) in candidates { if new_pieces.len() == pruned_size { break; } new_pieces.push(pieces[id].clone()); } new_pieces } /// Update the progress bar with the new provided length and message fn update_progress(&self, p: &Option, len: usize, message: &'static str) { if let Some(p) = p { p.set_message(message); p.set_length(len as u64); p.reset(); } } /// Set the progress bar in the finish state fn finalize_progress(&self, p: &Option, final_len: usize) { if let Some(p) = p { p.set_length(final_len as u64); p.finish(); println!(); } } fn run_e_step(&self, model: &Unigram, sentences: &[Sentence]) -> (f64, u32, Vec) { let all_sentence_freq: u32 = sentences.iter().map(|(_a, b)| *b).sum(); let chunk_size = std::cmp::max(sentences.len() / current_num_threads(), 1); let collected: (f64, u32, Vec) = sentences .maybe_par_chunks(chunk_size) .map(|sentences_chunk| { let mut expected: Vec = vec![0.0; model.len()]; let mut objs: f64 = 0.0; let mut ntokens: u32 = 0; for (string, freq) in sentences_chunk { let mut lattice = Lattice::from(string, model.bos_id, model.eos_id); model.populate_nodes(&mut lattice); let z: f64 = lattice.populate_marginal(*freq as f64, &mut expected); if z.is_nan() { panic!("likelihood is NAN. Input sentence may be too long."); } ntokens += lattice.viterbi().len() as u32; objs -= z / (all_sentence_freq as f64); } (objs, ntokens, expected) }) .reduce( || (0.0, 0, vec![0.0; model.len()]), |(objs, ntokens, expected), (lobjs, lntokens, lexpected)| { ( objs + lobjs, ntokens + lntokens, expected .iter() .zip(lexpected) .map(|(global_el, local_el)| global_el + local_el) .collect(), ) }, ); collected } fn run_m_step(&self, pieces: &[SentencePiece], expected: &[f64]) -> Vec { if pieces.len() != expected.len() { panic!( "Those two iterators are supposed to be the same length ({} vs {})", pieces.len(), expected.len() ); } let mut new_pieces: Vec = Vec::with_capacity(self.vocab_size.try_into().unwrap()); let mut sum = 0.0; let expected_frequency_threshold = 0.5; for (i, (freq, (piece, _score))) in expected.iter().zip(pieces).enumerate() { // Always keep unk. if i == 0 { new_pieces.push((piece.clone(), f64::NAN)); continue; } if *freq < expected_frequency_threshold { continue; } new_pieces.push((piece.clone(), *freq)); sum += freq; } // // Here we do not use the original EM, but use the // // Bayesianified/DPified EM algorithm. // // https://cs.stanford.edu/~pliang/papers/tutorial-acl2007-talk.pdf // // This modification will act as a sparse prior. let logsum = digamma(sum); let new_pieces: Vec<_> = new_pieces .into_iter() .map(|(s, c)| (s, digamma(c) - logsum)) .collect(); new_pieces } pub fn do_train( &self, sentences: Vec, model: &mut Unigram, ) -> Result> { let progress = self.setup_progress(); // // 1. Compute frequent substrings // TODO Should be able to upgrade to u64 when needed self.update_progress(&progress, sentences.len(), "Suffix array seeds"); let mut pieces: Vec = Vec::with_capacity(self.vocab_size.try_into().unwrap()); // We use a UNK token when training, whatever the `self.unk_token` pieces.push(("".into(), f64::NAN)); pieces.extend(self.make_seed_sentence_pieces(&sentences, &progress)); self.finalize_progress(&progress, sentences.len()); // Useful to check compatibility with spm. debug!( "Using {} pieces on {} sentences for EM training", pieces.len(), sentences.len() ); let desired_vocab_size: usize = (self.vocab_size as usize * 11) / 10; // * 1.1 // 2. Run E-M Loops to fine grain the pieces. // We will shrink the vocab by shrinking_factor every loop on average // Some other pieces are dropped if logprob is too small // V = N * (f)**k // k = log(V / N) / log(f) let expected_loops = (((desired_vocab_size as f64).ln() - (pieces.len() as f64).ln()) / self.shrinking_factor.ln()) as usize + 1; let expected_updates = expected_loops * self.n_sub_iterations as usize; self.update_progress(&progress, expected_updates, "EM training"); let required_chars = self.required_chars(&sentences); if required_chars.len() as u32 > self.vocab_size { return Err(Box::new(UnigramTrainerError::VocabularyTooSmall)); } let mut new_model = Unigram::from(pieces.clone(), Some(0), false)?; loop { // Sub-EM iteration. for _iter in 0..self.n_sub_iterations { // Executes E step let (_objective, _num_tokens, expected) = self.run_e_step(&new_model, &sentences); // Executes M step. pieces = self.run_m_step(&pieces, &expected); new_model = Unigram::from(pieces.clone(), Some(0), false)?; // Useful comment for checking compatibility with spm debug!( "Em iter={} size={} obj={} num_tokens={} num_tokens/piece={}", _iter, new_model.len(), _objective, _num_tokens, _num_tokens as f64 / model.len() as f64 ); if let Some(p) = &progress { p.inc(1); } } // end of Sub EM iteration // Stops the iteration when the size of sentences reaches to the // desired symbol size. if pieces.len() <= desired_vocab_size { break; } // Prunes pieces. pieces = self.prune_sentence_pieces(&new_model, &pieces, &sentences); new_model = Unigram::from(pieces.clone(), Some(0), false)?; } self.finalize_progress(&progress, expected_updates); // Finally, adjusts the size of sentencepices to be |vocab_size|. *model = self.finalize(new_model, required_chars)?; Ok(self.special_tokens.clone()) } } impl Trainer for UnigramTrainer { type Model = Unigram; /// Train a Unigram model fn train(&self, model: &mut Unigram) -> Result> { let sentences: Vec<_> = self.words.iter().map(|(s, i)| (s.to_owned(), *i)).collect(); self.do_train(sentences, model) } /// Whether we should show progress fn should_show_progress(&self) -> bool { self.show_progress } fn feed(&mut self, iterator: I, process: F) -> Result<()> where I: Iterator + Send, S: AsRef + Send, F: Fn(&str) -> Result> + Sync, { let words: Result> = iterator .maybe_par_bridge() .map(|sequence| { let words = process(sequence.as_ref())?; let mut map = AHashMap::new(); for word in words { *map.entry(word).or_default() += 1; } Ok(map) }) .reduce( || Ok(AHashMap::new()), |acc, ws| { let mut acc = acc?; for (k, v) in ws? { *acc.entry(k).or_default() += v; } Ok(acc) }, ); self.words = words?; Ok(()) } } #[cfg(test)] mod tests { use super::*; use assert_approx_eq::assert_approx_eq; use std::iter::FromIterator; #[test] fn test_unigram_chars() { let trainer = UnigramTrainerBuilder::default() .show_progress(false) .build() .unwrap(); let sentences = vec![ ("This is a".to_string(), 1), ("こんにちは友達".to_string(), 1), ]; let required_chars = trainer.required_chars(&sentences); assert_eq!(required_chars.len(), 13); let progress = None; let table = trainer.make_seed_sentence_pieces(&sentences, &progress); let target_strings = vec![ "s", "i", " ", "達", "友", "ん", "は", "に", "ち", "こ", "h", "a", "T", "is ", "s ", ]; let strings: Vec<_> = table.iter().map(|(string, _)| string).collect(); assert_eq!(strings, target_strings); let scores = table.iter().map(|(_, score)| score); let target_scores = vec![ -2.5649493574615367, // 2.0 -2.5649493574615367, // 2.0 -2.5649493574615367, // 2.0 -3.258096538021482, // 1.0 -3.258096538021482, // 1.0 -3.258096538021482, // 1.0 -3.258096538021482, // 1.0 -3.258096538021482, // 1.0 -3.258096538021482, // 1.0 -3.258096538021482, // 1.0 -3.258096538021482, // 1.0 -3.258096538021482, // 1.0 -3.258096538021482, // 1.0 -1.4663370687934272, // 6.0 -1.8718021769015916, // 4.0 ]; for (score, target_score) in scores.zip(target_scores) { assert_approx_eq!(*score, target_score, 0.01); } } #[test] fn test_initial_alphabet() { let trainer = UnigramTrainerBuilder::default() .show_progress(false) .initial_alphabet(AHashSet::from_iter(vec!['a', 'b', 'c', 'd', 'e', 'f'])) .build() .unwrap(); let sentences = vec![("こんにちは友達".to_string(), 1)]; let required_chars = trainer.required_chars(&sentences); assert_eq!( required_chars, vec!["こ", "ん", "に", "ち", "は", "友", "達", "a", "b", "c", "d", "e", "f"] .into_iter() .map(|s| s.to_owned()) .collect::>() ); } #[test] fn test_unk_token() { // 1. Should add `unk_token` as first special token let trainer = UnigramTrainerBuilder::default() .show_progress(false) .special_tokens(vec![ AddedToken::from("[SEP]", true), AddedToken::from("[CLS]", true), ]) .unk_token(Some("[UNK]".into())) .build() .unwrap(); let mut unigram = Unigram::default(); trainer .do_train(vec![("The".into(), 12), ("are".into(), 11)], &mut unigram) .unwrap(); let mut pieces = unigram.iter(); assert_eq!(pieces.next(), Some(&("[UNK]".into(), 0.0))); assert_eq!(pieces.next(), Some(&("[SEP]".into(), 0.0))); assert_eq!(pieces.next(), Some(&("[CLS]".into(), 0.0))); // 2. Let it where it is let trainer = UnigramTrainerBuilder::default() .show_progress(false) .special_tokens(vec![ AddedToken::from("[SEP]", true), AddedToken::from("[CLS]", true), AddedToken::from("[UNK]", true), ]) .unk_token(Some("[UNK]".into())) .build() .unwrap(); let mut unigram = Unigram::default(); trainer .do_train(vec![("The".into(), 12), ("are".into(), 11)], &mut unigram) .unwrap(); let mut pieces = unigram.iter(); assert_eq!(pieces.next(), Some(&("[SEP]".into(), 0.0))); assert_eq!(pieces.next(), Some(&("[CLS]".into(), 0.0))); assert_eq!(pieces.next(), Some(&("[UNK]".into(), 0.0))); // 3. Don't put it there if not needed let trainer = UnigramTrainerBuilder::default() .show_progress(false) .build() .unwrap(); let mut unigram = Unigram::default(); trainer .do_train(vec![("The".into(), 12), ("are".into(), 11)], &mut unigram) .unwrap(); let mut pieces = unigram.iter(); assert_eq!(pieces.next().unwrap().0, "e".to_string()); } #[test] fn test_special_tokens() { let trainer = UnigramTrainerBuilder::default() .show_progress(false) .special_tokens(vec![ AddedToken::from("[SEP]", true), AddedToken::from("[CLS]", true), ]) .build() .unwrap(); let mut unigram = Unigram::default(); trainer .do_train(vec![("The".into(), 12), ("are".into(), 11)], &mut unigram) .unwrap(); let mut pieces = unigram.iter(); assert_eq!(pieces.next(), Some(&("[SEP]".into(), 0.0))); assert_eq!(pieces.next(), Some(&("[CLS]".into(), 0.0))); } #[test] fn test_to_log_prob() { let mut a = vec![("".to_string(), 1.0), ("".to_string(), 2.0)]; to_log_prob(&mut a); let scores = a.iter().map(|(_, score)| *score).collect::>(); // ln(1) - ln(3) assert_approx_eq!(scores[0], -1.098, 0.01); // ln(2) - ln(3) assert_approx_eq!(scores[1], -0.405, 0.01); } } ================================================ FILE: tokenizers/src/models/unigram/trie.rs ================================================ use ahash::AHashMap; use std::hash::Hash; #[derive(Default)] pub struct TrieBuilder