Copy disabled (too large)
Download .txt
Showing preview only (44,096K chars total). Download the full file to get everything.
Repository: google/sentencepiece
Branch: master
Commit: 439fec3ade0a
Files: 261
Total size: 42.0 MB
Directory structure:
gitextract_h_6popb3/
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.md
│ │ └── feature_request.md
│ ├── dependabot.yml
│ ├── pull_request_template.md
│ └── workflows/
│ ├── cifuzz.yml
│ ├── cmake.yml
│ ├── cross_build.yml
│ ├── requirements/
│ │ ├── base.in
│ │ ├── base.txt
│ │ ├── cibuildwheel.in
│ │ └── cibuildwheel.txt
│ └── wheel.yml
├── .gitignore
├── CMakeLists.txt
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── VERSION.txt
├── cmake/
│ └── ios.toolchain.cmake
├── config.h.in
├── contrib/
│ └── docker/
│ ├── Dockerfile
│ └── README.md
├── data/
│ ├── Scripts.txt
│ ├── botchan.txt
│ ├── extract_headers.pl
│ ├── gen_spec_parser.pl
│ ├── gen_unicode_scripts_code.pl
│ ├── ids_denorm.tsv
│ ├── ids_norm.tsv
│ ├── nfc.tsv
│ ├── nfc_cf.tsv
│ ├── nfd.tsv
│ ├── nfd_cf.tsv
│ ├── nfkc.tsv
│ ├── nfkc_cf.tsv
│ ├── nfkd.tsv
│ ├── nfkd_cf.tsv
│ ├── nmt_nfkc.tsv
│ ├── nmt_nfkc_cf.tsv
│ └── wagahaiwa_nekodearu.txt
├── doc/
│ ├── api.md
│ ├── experiments.md
│ ├── normalization.md
│ ├── options.md
│ └── special_symbols.md
├── python/
│ ├── .gitignore
│ ├── MANIFEST.in
│ ├── README.md
│ ├── add_new_vocab.ipynb
│ ├── build_bundled.sh
│ ├── build_sdist.sh
│ ├── pyproject.toml
│ ├── sentencepiece_python_module_example.ipynb
│ ├── setup.cfg
│ ├── setup.py
│ ├── src/
│ │ └── sentencepiece/
│ │ ├── __init__.py
│ │ ├── _version.py
│ │ ├── sentencepiece.i
│ │ ├── sentencepiece_model_pb2.py
│ │ ├── sentencepiece_pb2.py
│ │ └── sentencepiece_wrap.cxx
│ └── test/
│ ├── __init__.py
│ ├── botchan.txt
│ ├── sentencepiece_test.py
│ ├── test_ja_model.model
│ └── test_model.model
├── sentencepiece.pc.in
├── src/
│ ├── CMakeLists.txt
│ ├── bpe_model.cc
│ ├── bpe_model.h
│ ├── bpe_model_test.cc
│ ├── bpe_model_trainer.cc
│ ├── bpe_model_trainer.h
│ ├── bpe_model_trainer_test.cc
│ ├── builder.cc
│ ├── builder.h
│ ├── builder_test.cc
│ ├── builtin_pb/
│ │ ├── sentencepiece.pb.cc
│ │ ├── sentencepiece.pb.h
│ │ ├── sentencepiece_model.pb.cc
│ │ └── sentencepiece_model.pb.h
│ ├── char_model.cc
│ ├── char_model.h
│ ├── char_model_test.cc
│ ├── char_model_trainer.cc
│ ├── char_model_trainer.h
│ ├── char_model_trainer_test.cc
│ ├── common.h
│ ├── compile_charsmap_main.cc
│ ├── error.cc
│ ├── filesystem.cc
│ ├── filesystem.h
│ ├── filesystem_test.cc
│ ├── freelist.h
│ ├── freelist_test.cc
│ ├── init.cc
│ ├── init.h
│ ├── init_test.cc
│ ├── model_factory.cc
│ ├── model_factory.h
│ ├── model_factory_test.cc
│ ├── model_interface.cc
│ ├── model_interface.h
│ ├── model_interface_test.cc
│ ├── normalization_rule.h
│ ├── normalizer.cc
│ ├── normalizer.h
│ ├── normalizer_test.cc
│ ├── pretokenizer_for_training.cc
│ ├── pretokenizer_for_training.h
│ ├── pretokenizer_for_training_test.cc
│ ├── sentencepiece.proto
│ ├── sentencepiece_model.proto
│ ├── sentencepiece_processor.cc
│ ├── sentencepiece_processor.h
│ ├── sentencepiece_processor_test.cc
│ ├── sentencepiece_trainer.cc
│ ├── sentencepiece_trainer.h
│ ├── sentencepiece_trainer_test.cc
│ ├── spec_parser.h
│ ├── spm_decode_main.cc
│ ├── spm_encode_main.cc
│ ├── spm_export_vocab_main.cc
│ ├── spm_normalize_main.cc
│ ├── spm_train_main.cc
│ ├── test_main.cc
│ ├── testharness.cc
│ ├── testharness.h
│ ├── trainer_factory.cc
│ ├── trainer_factory.h
│ ├── trainer_factory_test.cc
│ ├── trainer_interface.cc
│ ├── trainer_interface.h
│ ├── trainer_interface_test.cc
│ ├── unicode_script.cc
│ ├── unicode_script.h
│ ├── unicode_script_map.h
│ ├── unicode_script_test.cc
│ ├── unigram_model.cc
│ ├── unigram_model.h
│ ├── unigram_model_test.cc
│ ├── unigram_model_trainer.cc
│ ├── unigram_model_trainer.h
│ ├── unigram_model_trainer_test.cc
│ ├── util.cc
│ ├── util.h
│ ├── util_test.cc
│ ├── word_model.cc
│ ├── word_model.h
│ ├── word_model_test.cc
│ ├── word_model_trainer.cc
│ ├── word_model_trainer.h
│ └── word_model_trainer_test.cc
└── third_party/
├── CMakeLists.txt
├── absl/
│ ├── LICENSE
│ ├── container/
│ │ ├── btree_set.h
│ │ ├── flat_hash_map.h
│ │ └── flat_hash_set.h
│ ├── flags/
│ │ ├── flag.cc
│ │ ├── flag.h
│ │ ├── parse.h
│ │ ├── usage.h
│ │ └── usage_config.h
│ ├── log/
│ │ ├── check.h
│ │ ├── globals.h
│ │ ├── log.cc
│ │ └── log.h
│ └── strings/
│ ├── ascii.h
│ ├── match.h
│ ├── numbers.h
│ ├── str_cat.h
│ ├── str_format.h
│ ├── str_join.h
│ ├── str_replace.h
│ ├── str_split.h
│ ├── string_view.h
│ └── strip.h
├── darts_clone/
│ ├── LICENSE
│ └── darts.h
├── esaxx/
│ ├── LICENSE
│ ├── esa.hxx
│ └── sais.hxx
└── protobuf-lite/
├── LICENSE
├── arena.cc
├── arenastring.cc
├── bytestream.cc
├── coded_stream.cc
├── common.cc
├── extension_set.cc
├── generated_enum_util.cc
├── generated_message_table_driven_lite.cc
├── generated_message_util.cc
├── google/
│ └── protobuf/
│ ├── any.h
│ ├── arena.h
│ ├── arena_impl.h
│ ├── arenastring.h
│ ├── descriptor.h
│ ├── extension_set.h
│ ├── extension_set_inl.h
│ ├── generated_enum_reflection.h
│ ├── generated_enum_util.h
│ ├── generated_message_table_driven.h
│ ├── generated_message_table_driven_lite.h
│ ├── generated_message_util.h
│ ├── has_bits.h
│ ├── implicit_weak_message.h
│ ├── io/
│ │ ├── coded_stream.h
│ │ ├── io_win32.h
│ │ ├── zero_copy_stream.h
│ │ ├── zero_copy_stream_impl.h
│ │ └── zero_copy_stream_impl_lite.h
│ ├── map.h
│ ├── map_entry_lite.h
│ ├── map_field_lite.h
│ ├── map_type_handler.h
│ ├── message_lite.h
│ ├── metadata_lite.h
│ ├── parse_context.h
│ ├── port.h
│ ├── port_def.inc
│ ├── port_undef.inc
│ ├── repeated_field.h
│ ├── stubs/
│ │ ├── bytestream.h
│ │ ├── callback.h
│ │ ├── casts.h
│ │ ├── common.h
│ │ ├── hash.h
│ │ ├── int128.h
│ │ ├── logging.h
│ │ ├── macros.h
│ │ ├── map_util.h
│ │ ├── mutex.h
│ │ ├── once.h
│ │ ├── platform_macros.h
│ │ ├── port.h
│ │ ├── status.h
│ │ ├── statusor.h
│ │ ├── stl_util.h
│ │ ├── stringpiece.h
│ │ ├── stringprintf.h
│ │ ├── strutil.h
│ │ └── time.h
│ ├── unknown_field_set.h
│ └── wire_format_lite.h
├── implicit_weak_message.cc
├── int128.cc
├── io_win32.cc
├── message_lite.cc
├── parse_context.cc
├── repeated_field.cc
├── status.cc
├── statusor.cc
├── stringpiece.cc
├── stringprintf.cc
├── structurally_valid.cc
├── strutil.cc
├── time.cc
├── wire_format_lite.cc
├── zero_copy_stream.cc
├── zero_copy_stream_impl.cc
└── zero_copy_stream_impl_lite.cc
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: bug
assignees: ''
---
**Describe the bug**
A clear and concise description of what the bug is.
Please do not report on separate modules that use SentencePiece as a library, such as those in Hugging Face and/or Conda.
**To Reproduce**
Kindly provide all information that allows us to reproduce this issue in our environment. Please avoid providing just an error message without any context. Please be aware that issues in non-standard environments may be challenging to resolve.
- Operating system, Python version.
- Actual command line or python code to reproduce the bug.
- training data and model files.
**Expected behavior**
A clear and concise description of what you expected to happen.
**Screenshots**
If applicable, add screenshots to help explain your problem.
**Additional context**
Add any other context about the problem here.
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: feature request
assignees: ''
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is.
- Please check existing issues to see if your problem has already been reported or resolved.
- When submitting a feature request, please explain the context behind why the feature is needed. Sometimes your use case may be solvable with existing functionality.
================================================
FILE: .github/dependabot.yml
================================================
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "monthly"
groups:
github-actions:
patterns:
- "*"
- package-ecosystem: "pip"
directory: "/.github/workflows/requirements"
schedule:
interval: "monthly"
groups:
build-time-deps:
patterns:
- "*"
================================================
FILE: .github/pull_request_template.md
================================================
## Thank you for your contribution
We sincerely appreciate your interest in contributing to this project. Our goal is to maintain a high-quality, stable, and secure codebase. Your cooperation in adhering to these guidelines helps us achieve that.
---
### Important Notes on the Review Process
Reviewing and validating code takes a significant amount of time and resources. To mitigate security risks and prevent regressions, all changes undergo a rigorous review process. Please understand the following:
* **Keep Pull Requests Small and Focused:** Each pull request should address a single, specific issue or feature. Avoid bundling multiple bug fixes, new features, or unrelated refactoring into a single PR. This makes the review process faster and more effective.
* **Not All Contributions Will Be Merged:** We cannot guarantee that every pull request will be merged. The decision to merge is based on various factors, including code quality, adherence to project style, performance implications, and alignment with the project's long-term vision.
---
### Before Submitting Your Pull Request
To ensure your contribution has the best chance of being accepted, please check the following:
* **New Features:** New feature requests are not accepted directly via pull requests. To propose a new feature, you must first create an **Issue** to discuss the idea with the project maintainers and community. This allows us to align on the design and necessity of the feature before any code is written.
* **Bug Fixes:** For bug fixes, please open a corresponding **Issue** first. Describe the bug, provide steps to reproduce it, and explain the expected behavior. This helps us confirm the bug and track its resolution. Once a bug is confirmed, you may submit a pull request referencing the issue.
* **Testing:** All code changes, especially bug fixes and new features, must be accompanied by appropriate unit tests. If your change is not covered by existing tests, please include new tests to demonstrate that your code works correctly and that the issue is resolved.
* **Code Style:** Please ensure your code follows the [Google C++ coding style](https://google.github.io/styleguide/cppguide.html) guidelines.
### Pull Request Details
* **Describe the change:** Clearly and concisely describe the purpose of your pull request. Explain what it does, why it's needed, and how it was implemented.
* **Link to a related Issue:** If this pull request resolves or is related to a specific issue, please link it here using the `Fixes #` or `Closes #` syntax (e.g., `Fixes #123`).
* **Testing Information:** Detail the steps you took to test your changes. Include any relevant test cases, commands, or scenarios you used.
================================================
FILE: .github/workflows/cifuzz.yml
================================================
name: CIFuzz
on: [pull_request]
permissions:
contents: read
jobs:
Fuzzing:
runs-on: ubuntu-latest
steps:
- name: Build Fuzzers
id: build
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
with:
oss-fuzz-project-name: 'sentencepiece'
dry-run: false
language: c++
- name: Run Fuzzers
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
with:
oss-fuzz-project-name: 'sentencepiece'
fuzz-seconds: 300
dry-run: false
language: c++
- name: Upload Crash
uses: actions/upload-artifact@v6
if: failure() && steps.build.outcome == 'success'
with:
name: artifacts
path: ./out/artifacts
================================================
FILE: .github/workflows/cmake.yml
================================================
name: CI for general build
on:
push:
branches: [ master ]
tags:
- 'v*'
pull_request:
branches: [ master ]
workflow_dispatch:
release:
types: [ created ]
permissions:
contents: read
jobs:
build:
strategy:
matrix:
os: [ ubuntu-latest, macos-latest ]
arch: [ x64, arm64 ]
absl_provider: [ module, internal ]
include:
- os: windows-latest
arch: Win32
absl_provider: internal
- os: windows-latest
arch: x64
absl_provider: internal
- os: windows-11-arm
arch: ARM64
absl_provider: internal
- os: windows-latest
arch: Win32
absl_provider: module
- os: windows-latest
arch: x64
absl_provider: module
- os: windows-11-arm
arch: ARM64
absl_provider: module
runs-on: ${{ matrix.os }}
permissions:
contents: write # svenstaro/upload-release-action
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Config for Windows
if: runner.os == 'Windows'
run: cmake -A ${{ matrix.arch }} -B build -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=build/root -DSPM_ABSL_PROVIDER=${{ matrix.absl_provider }}
- name: Config for Linux/MacOSX
if: runner.os != 'Windows'
run: cmake -B build -DSPM_BUILD_TEST=ON -DCMAKE_INSTALL_PREFIX=build/root -DSPM_ABSL_PROVIDER=${{ matrix.absl_provider }}
env:
CMAKE_OSX_ARCHITECTURES: arm64;x86_64
- name: Build
run: cmake --build build --config Release --target install --parallel 32
- name: Test
working-directory: build
run: ctest -C Release --output-on-failure
- name: Package
working-directory: build
run: cpack
- name: Upload artifacts
uses: actions/upload-artifact@v6
with:
name: artifacts-${{ matrix.os }}-${{ matrix.arch }}
path: ./build/*.7z
overwrite: true
- name: Upload Release Assets
if: startsWith(github.ref, 'refs/tags/')
uses: svenstaro/upload-release-action@6b7fa9f267e90b50a19fef07b3596790bb941741 # v2.11.3
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: ./build/*.7z
tag: ${{ github.ref }}
overwrite: true
prerelease: true
file_glob: true
body: "This is my release text"
================================================
FILE: .github/workflows/cross_build.yml
================================================
name: CrossBuild
on:
push:
branches: [ master ]
tags:
- 'v*'
pull_request:
branches: [ master ]
workflow_dispatch:
release:
types: [ created ]
permissions:
contents: read
jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
arch: [ i686, arm, aarch64, riscv64, powerpc, powerpc64, powerpc64le, s390x, sparc64, m68k, sh4, alpha ]
absl_provider: [ module, internal ]
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Install cross tools
run: |
sudo apt-get update
sudo apt-get install -y sudo qemu-user gdb zstd dwarfdump {gcc,g++}-10-{i686,aarch64,riscv64,powerpc,powerpc64,powerpc64le,s390x,sparc64,m68k,sh4,alpha}-linux-gnu {gcc,g++}-10-arm-linux-gnueabihf
sudo ln -sf /usr/bin/arm-linux-gnueabihf-gcc-10 /usr/bin/arm-linux-gnu-gcc-10
sudo ln -sf /usr/bin/arm-linux-gnueabihf-g++-10 /usr/bin/arm-linux-gnu-g++-10
sudo ln -sf /usr/arm-linux-gnueabihf /usr/arm-linux-gnu
- name: Build
run: |
mkdir -p build
cd build
env CXX=/usr/bin/${{matrix.arch}}-linux-gnu-g++-10 CC=/usr/bin/${{matrix.arch}}-linux-gnu-gcc-10 cmake .. -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=OFF -DCMAKE_FIND_ROOT_PATH=/usr/${{matrix.arch}}-linux-gnu -DSPM_CROSS_SYSTEM_PROCESSOR=${{matrix.arch}} -DSPM_ABSL_PROVIDER=${{ matrix.absl_provider }}
make -j$(nproc)
- name: Test on QEMU
if: matrix.arch != 'sparc64' && matrix.arch != 'm68k' && matrix.arch != 'sh4'
run: |
cd build
qemu_arch=`echo ${{matrix.arch}} | sed -e s/powerpc/ppc/ -e s/686/386/`
qemu-${qemu_arch} -L /usr/${{matrix.arch}}-linux-gnu src/spm_test
================================================
FILE: .github/workflows/requirements/base.in
================================================
pip
setuptools
wheel
twine
pytest
build
packaging >= 25.0
================================================
FILE: .github/workflows/requirements/base.txt
================================================
#
# This file is autogenerated by pip-compile with Python 3.11
# by the following command:
#
# pip-compile --allow-unsafe --generate-hashes base.in
#
backports-tarfile==1.2.0 \
--hash=sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34 \
--hash=sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991
# via jaraco-context
build==1.4.0 \
--hash=sha256:6a07c1b8eb6f2b311b96fcbdbce5dab5fe637ffda0fd83c9cac622e927501596 \
--hash=sha256:f1b91b925aa322be454f8330c6fb48b465da993d1e7e7e6fa35027ec49f3c936
# via -r base.in
certifi==2024.7.4 \
--hash=sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b \
--hash=sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90
# via requests
cffi==2.0.0 \
--hash=sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb \
--hash=sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b \
--hash=sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f \
--hash=sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9 \
--hash=sha256:0cf2d91ecc3fcc0625c2c530fe004f82c110405f101548512cce44322fa8ac44 \
--hash=sha256:0f6084a0ea23d05d20c3edcda20c3d006f9b6f3fefeac38f59262e10cef47ee2 \
--hash=sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c \
--hash=sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75 \
--hash=sha256:1cd13c99ce269b3ed80b417dcd591415d3372bcac067009b6e0f59c7d4015e65 \
--hash=sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e \
--hash=sha256:1f72fb8906754ac8a2cc3f9f5aaa298070652a0ffae577e0ea9bd480dc3c931a \
--hash=sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e \
--hash=sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25 \
--hash=sha256:2081580ebb843f759b9f617314a24ed5738c51d2aee65d31e02f6f7a2b97707a \
--hash=sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe \
--hash=sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b \
--hash=sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91 \
--hash=sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592 \
--hash=sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187 \
--hash=sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c \
--hash=sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1 \
--hash=sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94 \
--hash=sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba \
--hash=sha256:3e837e369566884707ddaf85fc1744b47575005c0a229de3327f8f9a20f4efeb \
--hash=sha256:3f4d46d8b35698056ec29bca21546e1551a205058ae1a181d871e278b0b28165 \
--hash=sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529 \
--hash=sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca \
--hash=sha256:4647afc2f90d1ddd33441e5b0e85b16b12ddec4fca55f0d9671fef036ecca27c \
--hash=sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6 \
--hash=sha256:53f77cbe57044e88bbd5ed26ac1d0514d2acf0591dd6bb02a3ae37f76811b80c \
--hash=sha256:5eda85d6d1879e692d546a078b44251cdd08dd1cfb98dfb77b670c97cee49ea0 \
--hash=sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743 \
--hash=sha256:61d028e90346df14fedc3d1e5441df818d095f3b87d286825dfcbd6459b7ef63 \
--hash=sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5 \
--hash=sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5 \
--hash=sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4 \
--hash=sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d \
--hash=sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b \
--hash=sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93 \
--hash=sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205 \
--hash=sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27 \
--hash=sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512 \
--hash=sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d \
--hash=sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c \
--hash=sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037 \
--hash=sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26 \
--hash=sha256:89472c9762729b5ae1ad974b777416bfda4ac5642423fa93bd57a09204712322 \
--hash=sha256:8ea985900c5c95ce9db1745f7933eeef5d314f0565b27625d9a10ec9881e1bfb \
--hash=sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c \
--hash=sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8 \
--hash=sha256:9332088d75dc3241c702d852d4671613136d90fa6881da7d770a483fd05248b4 \
--hash=sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414 \
--hash=sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9 \
--hash=sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664 \
--hash=sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9 \
--hash=sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775 \
--hash=sha256:b18a3ed7d5b3bd8d9ef7a8cb226502c6bf8308df1525e1cc676c3680e7176739 \
--hash=sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc \
--hash=sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062 \
--hash=sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe \
--hash=sha256:b882b3df248017dba09d6b16defe9b5c407fe32fc7c65a9c69798e6175601be9 \
--hash=sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92 \
--hash=sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5 \
--hash=sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13 \
--hash=sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d \
--hash=sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26 \
--hash=sha256:cb527a79772e5ef98fb1d700678fe031e353e765d1ca2d409c92263c6d43e09f \
--hash=sha256:cf364028c016c03078a23b503f02058f1814320a56ad535686f90565636a9495 \
--hash=sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b \
--hash=sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6 \
--hash=sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c \
--hash=sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef \
--hash=sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5 \
--hash=sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18 \
--hash=sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad \
--hash=sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3 \
--hash=sha256:de8dad4425a6ca6e4e5e297b27b5c824ecc7581910bf9aee86cb6835e6812aa7 \
--hash=sha256:e11e82b744887154b182fd3e7e8512418446501191994dbf9c9fc1f32cc8efd5 \
--hash=sha256:e6e73b9e02893c764e7e8d5bb5ce277f1a009cd5243f8228f75f842bf937c534 \
--hash=sha256:f73b96c41e3b2adedc34a7356e64c8eb96e03a3782b535e043a986276ce12a49 \
--hash=sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2 \
--hash=sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5 \
--hash=sha256:fc7de24befaeae77ba923797c7c87834c73648a05a4bde34b3b7e5588973a453 \
--hash=sha256:fe562eb1a64e67dd297ccc4f5addea2501664954f2692b69a76449ec7913ecbf
# via cryptography
charset-normalizer==3.3.2 \
--hash=sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027 \
--hash=sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087 \
--hash=sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786 \
--hash=sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8 \
--hash=sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09 \
--hash=sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185 \
--hash=sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574 \
--hash=sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e \
--hash=sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519 \
--hash=sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898 \
--hash=sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269 \
--hash=sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3 \
--hash=sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f \
--hash=sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6 \
--hash=sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8 \
--hash=sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a \
--hash=sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73 \
--hash=sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc \
--hash=sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714 \
--hash=sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2 \
--hash=sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc \
--hash=sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce \
--hash=sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d \
--hash=sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e \
--hash=sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6 \
--hash=sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269 \
--hash=sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96 \
--hash=sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d \
--hash=sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a \
--hash=sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4 \
--hash=sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77 \
--hash=sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d \
--hash=sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0 \
--hash=sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed \
--hash=sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068 \
--hash=sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac \
--hash=sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25 \
--hash=sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8 \
--hash=sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab \
--hash=sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26 \
--hash=sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2 \
--hash=sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db \
--hash=sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f \
--hash=sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5 \
--hash=sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99 \
--hash=sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c \
--hash=sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d \
--hash=sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811 \
--hash=sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa \
--hash=sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a \
--hash=sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03 \
--hash=sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b \
--hash=sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04 \
--hash=sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c \
--hash=sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001 \
--hash=sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458 \
--hash=sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389 \
--hash=sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99 \
--hash=sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985 \
--hash=sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537 \
--hash=sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238 \
--hash=sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f \
--hash=sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d \
--hash=sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796 \
--hash=sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a \
--hash=sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143 \
--hash=sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8 \
--hash=sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c \
--hash=sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5 \
--hash=sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5 \
--hash=sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711 \
--hash=sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4 \
--hash=sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6 \
--hash=sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c \
--hash=sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7 \
--hash=sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4 \
--hash=sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b \
--hash=sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae \
--hash=sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12 \
--hash=sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c \
--hash=sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae \
--hash=sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8 \
--hash=sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887 \
--hash=sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b \
--hash=sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4 \
--hash=sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f \
--hash=sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5 \
--hash=sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33 \
--hash=sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519 \
--hash=sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561
# via requests
cryptography==46.0.5 \
--hash=sha256:02f547fce831f5096c9a567fd41bc12ca8f11df260959ecc7c3202555cc47a72 \
--hash=sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235 \
--hash=sha256:1abfdb89b41c3be0365328a410baa9df3ff8a9110fb75e7b52e66803ddabc9a9 \
--hash=sha256:2ae6971afd6246710480e3f15824ed3029a60fc16991db250034efd0b9fb4356 \
--hash=sha256:2b7a67c9cd56372f3249b39699f2ad479f6991e62ea15800973b956f4b73e257 \
--hash=sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad \
--hash=sha256:38946c54b16c885c72c4f59846be9743d699eee2b69b6988e0a00a01f46a61a4 \
--hash=sha256:3b4995dc971c9fb83c25aa44cf45f02ba86f71ee600d81091c2f0cbae116b06c \
--hash=sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614 \
--hash=sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed \
--hash=sha256:4108d4c09fbbf2789d0c926eb4152ae1760d5a2d97612b92d508d96c861e4d31 \
--hash=sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229 \
--hash=sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0 \
--hash=sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731 \
--hash=sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b \
--hash=sha256:4d8ae8659ab18c65ced284993c2265910f6c9e650189d4e3f68445ef82a810e4 \
--hash=sha256:4e817a8920bfbcff8940ecfd60f23d01836408242b30f1a708d93198393a80b4 \
--hash=sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263 \
--hash=sha256:556e106ee01aa13484ce9b0239bca667be5004efb0aabbed28d353df86445595 \
--hash=sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1 \
--hash=sha256:5be7bf2fb40769e05739dd0046e7b26f9d4670badc7b032d6ce4db64dddc0678 \
--hash=sha256:60ee7e19e95104d4c03871d7d7dfb3d22ef8a9b9c6778c94e1c8fcc8365afd48 \
--hash=sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76 \
--hash=sha256:68f68d13f2e1cb95163fa3b4db4bf9a159a418f5f6e7242564fc75fcae667fd0 \
--hash=sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18 \
--hash=sha256:7d731d4b107030987fd61a7f8ab512b25b53cef8f233a97379ede116f30eb67d \
--hash=sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d \
--hash=sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1 \
--hash=sha256:8293f3dea7fc929ef7240796ba231413afa7b68ce38fd21da2995549f5961981 \
--hash=sha256:8456928655f856c6e1533ff59d5be76578a7157224dbd9ce6872f25055ab9ab7 \
--hash=sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82 \
--hash=sha256:94a76daa32eb78d61339aff7952ea819b1734b46f73646a07decb40e5b3448e2 \
--hash=sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4 \
--hash=sha256:a3d1fae9863299076f05cb8a778c467578262fae09f9dc0ee9b12eb4268ce663 \
--hash=sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c \
--hash=sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d \
--hash=sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a \
--hash=sha256:bc84e875994c3b445871ea7181d424588171efec3e185dced958dad9e001950a \
--hash=sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d \
--hash=sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b \
--hash=sha256:c3bcce8521d785d510b2aad26ae2c966092b7daa8f45dd8f44734a104dc0bc1a \
--hash=sha256:c4143987a42a2397f2fc3b4d7e3a7d313fbe684f67ff443999e803dd75a76826 \
--hash=sha256:c69fd885df7d089548a42d5ec05be26050ebcd2283d89b3d30676eb32ff87dee \
--hash=sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9 \
--hash=sha256:d66e421495fdb797610a08f43b05269e0a5ea7f5e652a89bfd5a7d3c1dee3648 \
--hash=sha256:d861ee9e76ace6cf36a6a89b959ec08e7bc2493ee39d07ffe5acb23ef46d27da \
--hash=sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2 \
--hash=sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2 \
--hash=sha256:fe346b143ff9685e40192a4960938545c699054ba11d4f9029f94751e3f71d87
# via secretstorage
docutils==0.21.2 \
--hash=sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f \
--hash=sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2
# via readme-renderer
id==1.5.0 \
--hash=sha256:292cb8a49eacbbdbce97244f47a97b4c62540169c976552e497fd57df0734c1d \
--hash=sha256:f1434e1cef91f2cbb8a4ec64663d5a23b9ed43ef44c4c957d02583d61714c658
# via twine
idna==3.7 \
--hash=sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc \
--hash=sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0
# via requests
importlib-metadata==8.2.0 \
--hash=sha256:11901fa0c2f97919b288679932bb64febaeacf289d18ac84dd68cb2e74213369 \
--hash=sha256:72e8d4399996132204f9a16dcc751af254a48f8d1b20b9ff0f98d4a8f901e73d
# via keyring
iniconfig==2.0.0 \
--hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \
--hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
# via pytest
jaraco-classes==3.4.0 \
--hash=sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd \
--hash=sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790
# via keyring
jaraco-context==5.3.0 \
--hash=sha256:3e16388f7da43d384a1a7cd3452e72e14732ac9fe459678773a3608a812bf266 \
--hash=sha256:c2f67165ce1f9be20f32f650f25d8edfc1646a8aeee48ae06fb35f90763576d2
# via keyring
jaraco-functools==4.0.2 \
--hash=sha256:3460c74cd0d32bf82b9576bbb3527c4364d5b27a21f5158a62aed6c4b42e23f5 \
--hash=sha256:c9d16a3ed4ccb5a889ad8e0b7a343401ee5b2a71cee6ed192d3f68bc351e94e3
# via keyring
jeepney==0.8.0 \
--hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \
--hash=sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755
# via
# keyring
# secretstorage
keyring==25.3.0 \
--hash=sha256:8d85a1ea5d6db8515b59e1c5d1d1678b03cf7fc8b8dcfb1651e8c4a524eb42ef \
--hash=sha256:8d963da00ccdf06e356acd9bf3b743208878751032d8599c6cc89eb51310ffae
# via twine
markdown-it-py==3.0.0 \
--hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \
--hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb
# via rich
mdurl==0.1.2 \
--hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \
--hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba
# via markdown-it-py
more-itertools==10.4.0 \
--hash=sha256:0f7d9f83a0a8dcfa8a2694a770590d98a67ea943e3d9f5298309a484758c4e27 \
--hash=sha256:fe0e63c4ab068eac62410ab05cccca2dc71ec44ba8ef29916a0090df061cf923
# via
# jaraco-classes
# jaraco-functools
nh3==0.2.18 \
--hash=sha256:0411beb0589eacb6734f28d5497ca2ed379eafab8ad8c84b31bb5c34072b7164 \
--hash=sha256:14c5a72e9fe82aea5fe3072116ad4661af5cf8e8ff8fc5ad3450f123e4925e86 \
--hash=sha256:19aaba96e0f795bd0a6c56291495ff59364f4300d4a39b29a0abc9cb3774a84b \
--hash=sha256:34c03fa78e328c691f982b7c03d4423bdfd7da69cd707fe572f544cf74ac23ad \
--hash=sha256:36c95d4b70530b320b365659bb5034341316e6a9b30f0b25fa9c9eff4c27a204 \
--hash=sha256:3a157ab149e591bb638a55c8c6bcb8cdb559c8b12c13a8affaba6cedfe51713a \
--hash=sha256:42c64511469005058cd17cc1537578eac40ae9f7200bedcfd1fc1a05f4f8c200 \
--hash=sha256:5f36b271dae35c465ef5e9090e1fdaba4a60a56f0bb0ba03e0932a66f28b9189 \
--hash=sha256:6955369e4d9f48f41e3f238a9e60f9410645db7e07435e62c6a9ea6135a4907f \
--hash=sha256:7b7c2a3c9eb1a827d42539aa64091640bd275b81e097cd1d8d82ef91ffa2e811 \
--hash=sha256:8ce0f819d2f1933953fca255db2471ad58184a60508f03e6285e5114b6254844 \
--hash=sha256:94a166927e53972a9698af9542ace4e38b9de50c34352b962f4d9a7d4c927af4 \
--hash=sha256:a7f1b5b2c15866f2db413a3649a8fe4fd7b428ae58be2c0f6bca5eefd53ca2be \
--hash=sha256:c8b3a1cebcba9b3669ed1a84cc65bf005728d2f0bc1ed2a6594a992e817f3a50 \
--hash=sha256:de3ceed6e661954871d6cd78b410213bdcb136f79aafe22aa7182e028b8c7307 \
--hash=sha256:f0eca9ca8628dbb4e916ae2491d72957fdd35f7a5d326b7032a345f111ac07fe
# via readme-renderer
packaging==26.0 \
--hash=sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4 \
--hash=sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529
# via
# -r base.in
# build
# pytest
# twine
# wheel
pluggy==1.5.0 \
--hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \
--hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669
# via pytest
pycparser==2.22 \
--hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \
--hash=sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc
# via cffi
pygments==2.18.0 \
--hash=sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199 \
--hash=sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a
# via
# pytest
# readme-renderer
# rich
pyproject-hooks==1.2.0 \
--hash=sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8 \
--hash=sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913
# via build
pytest==9.0.2 \
--hash=sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b \
--hash=sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11
# via -r base.in
readme-renderer==44.0 \
--hash=sha256:2fbca89b81a08526aadf1357a8c2ae889ec05fb03f5da67f9769c9a592166151 \
--hash=sha256:8712034eabbfa6805cacf1402b4eeb2a73028f72d1166d6f5cb7f9c047c5d1e1
# via twine
requests==2.32.4 \
--hash=sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c \
--hash=sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422
# via
# id
# requests-toolbelt
# twine
requests-toolbelt==1.0.0 \
--hash=sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6 \
--hash=sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06
# via twine
rfc3986==2.0.0 \
--hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \
--hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c
# via twine
rich==13.7.1 \
--hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \
--hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432
# via twine
secretstorage==3.3.3 \
--hash=sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77 \
--hash=sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99
# via keyring
twine==6.2.0 \
--hash=sha256:418ebf08ccda9a8caaebe414433b0ba5e25eb5e4a927667122fbe8f829f985d8 \
--hash=sha256:e5ed0d2fd70c9959770dce51c8f39c8945c574e18173a7b81802dab51b4b75cf
# via -r base.in
urllib3==2.6.3 \
--hash=sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed \
--hash=sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4
# via
# requests
# twine
wheel==0.46.3 \
--hash=sha256:4b399d56c9d9338230118d705d9737a2a468ccca63d5e813e2a4fc7815d8bc4d \
--hash=sha256:e3e79874b07d776c40bd6033f8ddf76a7dad46a7b8aa1b2787a83083519a1803
# via -r base.in
zipp==3.20.0 \
--hash=sha256:0145e43d89664cfe1a2e533adc75adafed82fe2da404b4bbb6b026c0157bdb31 \
--hash=sha256:58da6168be89f0be59beb194da1250516fdaa062ccebd30127ac65d30045e10d
# via importlib-metadata
# The following packages are considered to be unsafe in a requirements file:
pip==26.0.1 \
--hash=sha256:bdb1b08f4274833d62c1aa29e20907365a2ceb950410df15fc9521bad440122b \
--hash=sha256:c4037d8a277c89b320abe636d59f91e6d0922d08a05b60e85e53b296613346d8
# via -r base.in
setuptools==80.10.2 \
--hash=sha256:8b0e9d10c784bf7d262c4e5ec5d4ec94127ce206e8738f29a437945fbc219b70 \
--hash=sha256:95b30ddfb717250edb492926c92b5221f7ef3fbcc2b07579bcd4a27da21d0173
# via -r base.in
================================================
FILE: .github/workflows/requirements/cibuildwheel.in
================================================
-c base.txt
cibuildwheel
================================================
FILE: .github/workflows/requirements/cibuildwheel.txt
================================================
#
# This file is autogenerated by pip-compile with Python 3.11
# by the following command:
#
# pip-compile --allow-unsafe --generate-hashes cibuildwheel.in
#
bashlex==0.18 \
--hash=sha256:5bb03a01c6d5676338c36fd1028009c8ad07e7d61d8a1ce3f513b7fff52796ee \
--hash=sha256:91d73a23a3e51711919c1c899083890cdecffc91d8c088942725ac13e9dcfffa
# via cibuildwheel
bracex==2.4 \
--hash=sha256:a27eaf1df42cf561fed58b7a8f3fdf129d1ea16a81e1fadd1d17989bc6384beb \
--hash=sha256:efdc71eff95eaff5e0f8cfebe7d01adf2c8637c8c92edaf63ef348c241a82418
# via cibuildwheel
build==1.4.0 \
--hash=sha256:6a07c1b8eb6f2b311b96fcbdbce5dab5fe637ffda0fd83c9cac622e927501596 \
--hash=sha256:f1b91b925aa322be454f8330c6fb48b465da993d1e7e7e6fa35027ec49f3c936
# via
# -c base.txt
# cibuildwheel
certifi==2024.7.4 \
--hash=sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b \
--hash=sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90
# via
# -c base.txt
# cibuildwheel
cibuildwheel==3.3.1 \
--hash=sha256:6d3c387e77c5850819294863eeee4e57fb7e8ecdf87b7412e763222c16e26424 \
--hash=sha256:ae6eafe6f7ed3bab38919e08bf3eb92085f6387c5f7a746b40cc4775a8462f9a
# via -r cibuildwheel.in
dependency-groups==1.3.0 \
--hash=sha256:1abf34d712deda5581e80d507512664d52b35d1c2d7caf16c85e58ca508547e0 \
--hash=sha256:5b9751d5d98fbd6dfd038a560a69c8382e41afcbf7ffdbcc28a2a3f85498830f
# via cibuildwheel
filelock==3.20.3 \
--hash=sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1 \
--hash=sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1
# via cibuildwheel
humanize==4.12.3 \
--hash=sha256:2cbf6370af06568fa6d2da77c86edb7886f3160ecd19ee1ffef07979efc597f6 \
--hash=sha256:8430be3a615106fdfceb0b2c1b41c4c98c6b0fc5cc59663a5539b111dd325fb0
# via cibuildwheel
packaging==26.0 \
--hash=sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4 \
--hash=sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529
# via
# -c base.txt
# build
# cibuildwheel
# dependency-groups
# wheel
patchelf==0.17.2.4 \
--hash=sha256:09fd848d625a165fc7b7e07745508c24077129b019c4415a882938781d43adf8 \
--hash=sha256:2931a1b5b85f3549661898af7bf746afbda7903c7c9a967cfc998a3563f84fad \
--hash=sha256:343bb1b94e959f9070ca9607453b04390e36bbaa33c88640b989cefad0aa049e \
--hash=sha256:680a266a70f60a7a4f4c448482c5bdba80cc8e6bb155a49dcc24238ba49927b0 \
--hash=sha256:7076d9e127230982e20a81a6e2358d3343004667ba510d9f822d4fdee29b0d71 \
--hash=sha256:970ee5cd8af33e5ea2099510b2f9013fa1b8d5cd763bf3fd3961281c18101a09 \
--hash=sha256:ae44cb3c857d50f54b99e5697aa978726ada33a8a6129d4b8b7ffd28b996652d \
--hash=sha256:d842b51f0401460f3b1f3a3a67d2c266a8f515a5adfbfa6e7b656cb3ac2ed8bc \
--hash=sha256:d9b35ebfada70c02679ad036407d9724ffe1255122ba4ac5e4be5868618a5689
# via cibuildwheel
platformdirs==4.1.0 \
--hash=sha256:11c8f37bcca40db96d8144522d925583bdb7a31f7b0e37e3ed4318400a8e2380 \
--hash=sha256:906d548203468492d432bcb294d4bc2fff751bf84971fbb2c10918cc206ee420
# via cibuildwheel
pyelftools==0.32 \
--hash=sha256:013df952a006db5e138b1edf6d8a68ecc50630adbd0d83a2d41e7f846163d738 \
--hash=sha256:6de90ee7b8263e740c8715a925382d4099b354f29ac48ea40d840cf7aa14ace5
# via cibuildwheel
pyproject-hooks==1.2.0 \
--hash=sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8 \
--hash=sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913
# via
# -c base.txt
# build
wheel==0.46.3 \
--hash=sha256:4b399d56c9d9338230118d705d9737a2a468ccca63d5e813e2a4fc7815d8bc4d \
--hash=sha256:e3e79874b07d776c40bd6033f8ddf76a7dad46a7b8aa1b2787a83083519a1803
# via
# -c base.txt
# cibuildwheel
================================================
FILE: .github/workflows/wheel.yml
================================================
name: Build Wheels
on:
push:
branches: [ master ]
tags:
- 'v*'
pull_request:
branches: [ master ]
workflow_dispatch:
release:
types: [ created ]
permissions:
contents: read
jobs:
build_wheels:
outputs:
digests-linux: ${{ steps.hash-linux.outputs.digests }}
digests-macos: ${{ steps.hash-macos.outputs.digests }}
digests-windows: ${{ steps.hash-windows.outputs.digests }}
strategy:
matrix:
os: [ubuntu-latest, ubuntu-24.04-arm, windows-latest, windows-11-arm, macos-latest]
runs-on: ${{ matrix.os }}
name: Build wheels on ${{ matrix.os }}
permissions:
contents: write # svenstaro/upload-release-action
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.x"
- name: Build for Windows (Win32/x64)
if: runner.os == 'Windows' && runner.arch != 'ARM64'
run: |
cmake -A Win32 -B build_win32 -DSPM_ENABLE_SHARED=OFF -DSPM_DISABLE_EMBEDDED_DATA=ON -DCMAKE_INSTALL_PREFIX=build/root_win32
cmake --build build_win32 --config Release --target install --parallel 8
cmake -A x64 -B build_amd64 -DSPM_ENABLE_SHARED=OFF -DSPM_DISABLE_EMBEDDED_DATA=ON -DCMAKE_INSTALL_PREFIX=build/root_amd64
cmake --build build_amd64 --config Release --target install --parallel 8
- name: Build for Windows (ARM64)
if: runner.os == 'Windows' && runner.arch == 'ARM64'
run: |
cmake -A arm64 -B build_arm64 -DSPM_ENABLE_SHARED=OFF -DSPM_DISABLE_EMBEDDED_DATA=ON -DCMAKE_INSTALL_PREFIX=build/root_arm64
cmake --build build_arm64 --config Release --target install --parallel 8
- name: Build for Mac
if: runner.os == 'macOS'
run: |
cmake -B build -DSPM_ENABLE_SHARED=OFF -DSPM_DISABLE_EMBEDDED_DATA=ON -DCMAKE_INSTALL_PREFIX=build/root
cmake --build build --config Release --target install --parallel 8
env:
MACOSX_DEPLOYMENT_TARGET: 10.13
CMAKE_OSX_ARCHITECTURES: arm64;x86_64
- name: Install cibuildwheel
working-directory: python
run: |
python -m pip install --require-hashes --no-dependencies -r ../.github/workflows/requirements/base.txt
python -m pip install --require-hashes --no-dependencies -r ../.github/workflows/requirements/cibuildwheel.txt
- name: Build wheels
working-directory: python
run: |
mkdir -p src/sentencepiece/package_data
cp ../data/*.bin src/sentencepiece/package_data
python -m cibuildwheel --output-dir wheelhouse
env:
CIBW_ARCHS_LINUX: auto
CIBW_ARCHS_MACOS: x86_64 universal2 arm64
CIBW_ARCHS_WINDOWS: auto
CIBW_SKIP: "*-musllinux_*"
CIBW_ENVIRONMENT: "CMAKE_BUILD_PARALLEL_LEVEL=8"
CIBW_BUILD_VERBOSITY: 1
CIBW_ENABLE: cpython-freethreading
- name: Build sdist archive
working-directory: python
run: |
sh build_sdist.sh
- name: Fetch sdist archive
uses: tj-actions/glob@2deae40528141fc53131606d56b4e4ce2a486b29 # v22.0.2
id: sdist
with:
files: python/dist/*.tar.gz
- name: Build wheel from sdist
run: python -m pip wheel "${{ steps.sdist.outputs.paths }}" --verbose
- name: Copy sdist
working-directory: python
if: runner.os == 'macOS'
run: |
mkdir -p wheelhouse
cp -f dist/*.tar.gz wheelhouse/
- name: Validate artifact
run: |
python -m pip install twine
twine check --strict python/wheelhouse/*
- name: Upload artifact
uses: actions/upload-artifact@v6
with:
name: artifacts-${{ matrix.os }}
path: python/wheelhouse/*
overwrite: true
- name: Upload wheel release
if: startsWith(github.ref, 'refs/tags/')
uses: svenstaro/upload-release-action@6b7fa9f267e90b50a19fef07b3596790bb941741 # v2.11.3
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: python/wheelhouse/*
tag: ${{ github.ref }}
overwrite: true
prerelease: true
file_glob: true
- name: Generate SLSA subjects - Macos
id: hash-macos
if: runner.os == 'macOS'
run: echo "digests=$(shasum -a 256 python/wheelhouse/* | base64)" >> $GITHUB_OUTPUT
- name: Generate SLSA subjects - Linux
id: hash-linux
if: runner.os == 'Linux'
run: echo "digests=$(sha256sum python/wheelhouse/* | base64 -w0)" >> $GITHUB_OUTPUT
- name: Generate SLSA subjects - Windows
id: hash-windows
if: runner.os == 'Windows'
run: echo "digests=$(sha256sum python/wheelhouse/* | base64 -w0)" >> $GITHUB_OUTPUT
free-threading:
needs: [build_wheels]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
sparse-checkout: |
python/test
data/botchan.txt
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.13t"
- name: Download all artifacts
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
path: python/wheelhouse
merge-multiple: true
- name: Install sentencepiece wheel
run: pip install --find-links=python/wheelhouse sentencepiece
- name: Install test dependencies
run: pip install pytest pytest-run-parallel
- name: Run free-threading tests
working-directory: python
run: pytest -v --parallel-threads 4
gather-digests:
needs: [build_wheels]
outputs:
digests: ${{ steps.hash.outputs.digests }}
runs-on: ubuntu-latest
steps:
- name: Merge results
id: hash
env:
LINUX_DIGESTS: "${{ needs.build_wheels.outputs.digests-linux }}"
MACOS_DIGESTS: "${{ needs.build_wheels.outputs.digests-macos }}"
WINDOWS_DIGESTS: "${{ needs.build_wheels.outputs.digests-windows }}"
run: |
set -euo pipefail
echo "$LINUX_DIGESTS" | base64 -d > checksums.txt
echo "$MACOS_DIGESTS" | base64 -d >> checksums.txt
echo "$WINDOWS_DIGESTS" | base64 -d >> checksums.txt
echo "digests=$(cat checksums.txt | base64 -w0)" >> $GITHUB_OUTPUT
provenance:
if: startsWith(github.ref, 'refs/tags/')
needs: [build_wheels, gather-digests]
permissions:
actions: read # To read the workflow path.
id-token: write # To sign the provenance.
contents: write # To add assets to a release.
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
with:
base64-subjects: "${{ needs.gather-digests.outputs.digests }}"
upload-assets: true # Optional: Upload to a new release
================================================
FILE: .gitignore
================================================
Makefile
Makefile.in
/ar-lib
/mdate-sh
/py-compile
/test-driver
/ylwrap
/build
/autom4te.cache
/autoscan.log
/autoscan-*.log
/aclocal.m4
/compile
/config.guess
/config.sub
/configure
/configure.scan
/depcomp
/install-sh
/missing
/stamp-h1
/libtool
/config.h
/config.status
/autogen.sh
/ltmain.sh
CMakeFiles
CMakeCache.txt
config.h
sentencepiece.pc
CPackConfig.cmake
CTestTestfile.cmake
CPackSourceConfig.cmake
DartConfiguration.tcl
*.o
*.lo
*.a
*.la
*.pyc
.libs
.deps
*.m4
*.log
*.trs
compile_charsmap
spm_decode
spm_encode
spm_export_vocab
spm_train
spm_normalize
spm_test
.DS_Store
*.egg-info/
dist/
*.swp
*.swo
*.pyc
m.model
m.vocab
cmake_install.cmake
libsentencepiece.so*
libsentencepiece_train.so*
python/bundled
_sentencepiece.*.so
third_party/abseil-cpp
python/sentencepiece
================================================
FILE: CMakeLists.txt
================================================
# Copyright 2018 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.!
cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
file(STRINGS "VERSION.txt" SPM_VERSION)
message(STATUS "VERSION: ${SPM_VERSION}")
if(POLICY CMP0091)
cmake_policy(SET CMP0091 NEW)
endif()
project(sentencepiece VERSION ${SPM_VERSION} LANGUAGES C CXX)
option(SPM_ENABLE_NFKC_COMPILE "Enables NFKC compile" OFF)
option(SPM_ENABLE_SHARED "Builds shared libaries in addition to static libraries." ON)
option(SPM_BUILD_TEST "Builds test binaries." OFF)
option(SPM_ENABLE_TCMALLOC "Enable TCMalloc if available." ON)
option(SPM_TCMALLOC_STATIC "Link static library of TCMALLOC." OFF)
option(SPM_ENABLE_MSVC_MT_BUILD, "Use /MT flag in MSVC build" OFF)
option(SPM_CROSS_SYSTEM_PROCESSOR, "Override system processor" "")
option(SPM_DISABLE_EMBEDDED_DATA, "Disable to embed pre-compiled data." OFF)
set(SPM_PROTOBUF_PROVIDER "internal" CACHE STRING "Provider of protobuf library")
set_property(CACHE SPM_PROTOBUF_PROVIDER PROPERTY STRINGS "internal" "package")
set(SPM_ABSL_PROVIDER "internal" CACHE STRING "Provider of absl library")
set_property(CACHE SPM_ABSL_PROVIDER PROPERTY STRINGS "internal" "module" "package")
if (SPM_ENABLE_SHARED)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
endif()
# Includes processor name to avoid conflicts.
set(CPACK_PACKAGE_FILE_NAME
"sentencepiece-${SPM_VERSION}-${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}")
if (SPM_CROSS_SYSTEM_PROCESSOR)
set(CMAKE_SYSTEM_PROCESSOR ${SPM_CROSS_SYSTEM_PROCESSOR})
endif()
# Disable shared build on windows
if(WIN32)
set(SPM_ENABLE_SHARED OFF)
endif()
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
if((CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 10.0) OR
(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0))
string(APPEND CMAKE_CXX_FLAGS " -fmacro-prefix-map=${CMAKE_SOURCE_DIR}/=''")
endif()
if (NOT DEFINED CMAKE_INSTALL_BINDIR)
set(CMAKE_INSTALL_BINDIR bin)
endif()
if (NOT DEFINED CMAKE_INSTALL_LIBDIR)
set(CMAKE_INSTALL_LIBDIR lib)
endif()
if (NOT DEFINED CMAKE_INSTALL_INCLUDEDIR)
set(CMAKE_INSTALL_INCLUDEDIR include)
endif()
if (UNIX)
include(GNUInstallDirs)
set(prefix ${CMAKE_INSTALL_PREFIX})
set(exec_prefix "\${prefix}")
set(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
set(includedir "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}")
else()
set(prefix ${CMAKE_INSTALL_PREFIX})
set(exec_prefix "\${prefix}")
set(libdir "\${exec_prefix}/lib")
set(includedir "\${prefix}/include")
endif()
set(GNUCXX_STD_SUPPORT_VERSION "4.3")
if(${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
add_definitions(-D_FREEBSD)
endif()
if (SPM_USE_BUILTIN_PROTOBUF)
set(libprotobuf_lite "")
else()
set(libprotobuf_lite "protobuf-lite")
endif()
if (MSVC)
add_definitions("/wd4267 /wd4244 /wd4305 /Zc:strictStrings /utf-8")
if (SPM_ENABLE_MSVC_MT_BUILD)
string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG})
string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_MINSIZEREL ${CMAKE_CXX_FLAGS_MINSIZEREL})
string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO})
endif()
endif()
if (APPLE)
set(CMAKE_MACOSX_RPATH ON)
set(CMAKE_SKIP_BUILD_RPATH FALSE)
set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir)
if ("${isSystemDir}" STREQUAL "-1")
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
endif()
endif()
# Add -latomic on riscv64
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "riscv64")
string(APPEND CMAKE_C_STANDARD_LIBRARIES " -latomic")
string(APPEND CMAKE_CXX_STANDARD_LIBRARIES " -latomic")
endif()
# SPDX-License-Identifier: (MIT OR CC0-1.0)
# Copyright 2020 Jan Tojnar
# https://github.com/jtojnar/cmake-snips
#
# Modelled after Python’s os.path.join
# https://docs.python.org/3.7/library/os.path.html#os.path.join
# Windows not supported
function(join_paths joined_path first_path_segment)
set(temp_path "${first_path_segment}")
foreach(current_segment IN LISTS ARGN)
if(NOT ("${current_segment}" STREQUAL ""))
if(IS_ABSOLUTE "${current_segment}")
set(temp_path "${current_segment}")
else()
set(temp_path "${temp_path}/${current_segment}")
endif()
endif()
endforeach()
set(${joined_path} "${temp_path}" PARENT_SCOPE)
endfunction()
join_paths(libdir_for_pc_file "\${exec_prefix}" "${CMAKE_INSTALL_LIBDIR}")
join_paths(includedir_for_pc_file "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}")
join_paths(datadir_for_pc_file "\${prefix}" "${CMAKE_INSTALL_DATADIR}" "sentencepiece")
join_paths(datadir_for_config_file "${prefix}" "${CMAKE_INSTALL_DATADIR}" "sentencepiece")
set(INSTALL_DATADIR ${datadir_for_config_file})
configure_file("${PROJECT_SOURCE_DIR}/config.h.in" "config.h")
configure_file("${PROJECT_SOURCE_DIR}/sentencepiece.pc.in" "sentencepiece.pc" @ONLY)
if (NOT MSVC)
# suppress warning for C++11 features.
# add_definitions("-Wno-deprecated-declarations -Wno-deprecated-enum-enum-conversion")
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/sentencepiece.pc" DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
endif()
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_BINARY_DIR})
if (SPM_BUILD_TEST)
enable_testing()
endif()
if (SPM_DISABLE_EMBEDDED_DATA)
file(GLOB EMBEDDED_DATA_FILES "${CMAKE_CURRENT_SOURCE_DIR}/data/*.bin")
install(FILES ${EMBEDDED_DATA_FILES} DESTINATION ${INSTALL_DATADIR})
endif()
if (SPM_ABSL_PROVIDER STREQUAL "internal")
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/absl)
elseif (SPM_ABSL_PROVIDER STREQUAL "module")
include(FetchContent)
FetchContent_Populate(abseil-cpp
GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/abseil-cpp
GIT_PROGRESS TRUE
GIT_TAG 20250814.1)
add_subdirectory(third_party/abseil-cpp)
if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/third_party/absl.org)
file(RENAME ${CMAKE_CURRENT_SOURCE_DIR}/third_party/absl ${CMAKE_CURRENT_SOURCE_DIR}/third_party/absl.org)
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_SOURCE_DIR}/third_party/abseil-cpp/absl
${CMAKE_CURRENT_SOURCE_DIR}/third_party/absl)
endif()
elseif (SPM_ABSL_PROVIDER STREQUAL "package")
find_package(absl REQUIRED)
get_target_property(ABSL_INCLUDE_DIRS absl::base INTERFACE_INCLUDE_DIRECTORIES)
if (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/third_party/absl.org)
file(RENAME ${CMAKE_CURRENT_SOURCE_DIR}/third_party/absl ${CMAKE_CURRENT_SOURCE_DIR}/third_party/absl.org)
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink
${ABSL_INCLUDE_DIRS}/absl ${CMAKE_CURRENT_SOURCE_DIR}/third_party/absl)
endif()
include_directories(${ABSL_INCLUDE_DIRS})
endif()
add_subdirectory(src)
add_subdirectory(third_party)
set(CPACK_SOURCE_GENERATOR "TXZ")
set(CPACK_GENERATOR "7Z")
set(CPACK_PACKAGE_VERSION "${SPM_VERSION}")
set(CPACK_STRIP_FILES TRUE)
set(CPACK_RESOURCE_FILE_LICENSE "${PROJECT_SOURCE_DIR}/LICENSE")
set(CPACK_RESOURCE_FILE_README "${PROJECT_SOURCE_DIR}/README.md")
set(CPACK_PACKAGE_CONTACT "taku@google.com")
set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Taku Kudo")
set(CPACK_SOURCE_IGNORE_FILES "/build/;/.git/;/dist/;/sdist/;~$;${CPACK_SOURCE_IGNORE_FILES}")
include(CPack)
================================================
FILE: CONTRIBUTING.md
================================================
Want to contribute? Great! First, read this page (including the small print at the end).
### Before you contribute
Before we can use your code, you must sign the
[Google Individual Contributor License Agreement](https://cla.developers.google.com/about/google-individual)
(CLA), which you can do online. The CLA is necessary mainly because you own the
copyright to your changes even after your contribution becomes part of our
codebase, so we need your permission to use and distribute your code. We also
need to be sure of various other things—for instance, that you'll tell us if you
know that your code infringes on other people's patents. You don't have to sign
the CLA until after you've submitted your code for review and a member has
approved it, but you must do it before we can put your code into our codebase.
Before you start working on a larger contribution, you should get in touch with
us first through the issue tracker with your idea so that we can help out and
possibly guide you. Coordinating up-front makes it much easier to avoid
frustration later on.
### Code reviews
All submissions, including submissions by project members, require review. We
use Github pull requests for this purpose.
### The small print
Contributions made by corporations are covered by a different agreement than
the one above, the [Software Grant and Corporate Contributor License Agreement](https://cla.developers.google.com/about/google-corporate).
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
# SentencePiece
[](https://github.com/google/sentencepiece/actions/workflows/cmake.yml)
[](https://github.com/google/sentencepiece/actions/workflows/wheel.yml)
[](https://github.com/google/sentencepiece/issues)

[](https://badge.fury.io/py/sentencepiece)
[](https://pypi.org/project/sentencepiece/)
[](CONTRIBUTING.md)
[](https://opensource.org/licenses/Apache-2.0)
[](https://slsa.dev)
SentencePiece is an unsupervised text tokenizer and detokenizer mainly for
Neural Network-based text generation systems where the vocabulary size
is predetermined prior to the neural model training. SentencePiece implements
**subword units** (e.g., **byte-pair-encoding (BPE)** [[Sennrich et al.](https://www.aclweb.org/anthology/P16-1162)]) and
**unigram language model** [[Kudo.](https://arxiv.org/abs/1804.10959)])
with the extension of direct training from raw sentences. SentencePiece allows us to make a purely end-to-end system that does not depend on language-specific pre/postprocessing.
**This is not an official Google product.**
## Technical highlights
- **Purely data driven**: SentencePiece trains tokenization and detokenization
models from sentences. Pre-tokenization ([Moses tokenizer](https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl)/[MeCab](http://taku910.github.io/mecab/)/[KyTea](http://www.phontron.com/kytea/)) is not always required.
- **Language independent**: SentencePiece treats the sentences just as sequences of Unicode characters. There is no language-dependent logic.
- **Multiple subword algorithms**: **BPE** [[Sennrich et al.](https://www.aclweb.org/anthology/P16-1162)] and **unigram language model** [[Kudo.](https://arxiv.org/abs/1804.10959)] are supported.
- **Subword regularization**: SentencePiece implements subword sampling for [subword regularization](https://arxiv.org/abs/1804.10959) and [BPE-dropout](https://arxiv.org/abs/1910.13267) which help to improve the robustness and accuracy of NMT models.
- **Fast and lightweight**: Segmentation speed is around 50k sentences/sec, and memory footprint is around 6MB.
- **Self-contained**: The same tokenization/detokenization is obtained as long as the same model file is used.
- **Direct vocabulary id generation**: SentencePiece manages vocabulary to id mapping and can directly generate vocabulary id sequences from raw sentences.
- **NFKC-based normalization**: SentencePiece performs NFKC-based text normalization.
For those unfamiliar with SentencePiece as a software/algorithm, one can read [a gentle introduction here](https://medium.com/@jacky2wong/understanding-sentencepiece-under-standing-sentence-piece-ac8da59f6b08).
## Comparisons with other implementations
| Feature | SentencePiece | [subword-nmt](https://github.com/rsennrich/subword-nmt) | [WordPiece](https://arxiv.org/pdf/1609.08144.pdf) |
| :-------------------------------------- | :--------------------------------------------: | :-----------------------------------------------------: | :-----------------------------------------------: |
| Supported algorithm | BPE, unigram, char, word | BPE | BPE\* |
| OSS? | Yes | Yes | Google internal |
| Subword regularization | [Yes](#subword-regularization-and-bpe-dropout) | No | No |
| Python Library (pip) | [Yes](python/README.md) | No | N/A |
| C++ Library | [Yes](doc/api.md) | No | N/A |
| Pre-segmentation required? | [No](#whitespace-is-treated-as-a-basic-symbol) | Yes | Yes |
| Customizable normalization (e.g., NFKC) | [Yes](doc/normalization.md) | No | N/A |
| Direct id generation | [Yes](#end-to-end-example) | No | N/A |
Note that BPE algorithm used in WordPiece is slightly different from the original BPE.
## Overview
### What is SentencePiece?
SentencePiece is a re-implementation of **sub-word units**, an effective way to alleviate the open vocabulary
problems in neural machine translation. SentencePiece supports two segmentation, **byte-pair-encoding (BPE)** [[Sennrich et al.](http://www.aclweb.org/anthology/P16-1162)] and **unigram language model** [[Kudo.](https://arxiv.org/abs/1804.10959)]. Here are the high level differences from other implementations.
#### The number of unique tokens is predetermined
Neural Machine Translation models typically operate with a fixed
vocabulary. Unlike most unsupervised word segmentation algorithms, which
assume an infinite vocabulary, SentencePiece trains the segmentation model such
that the final vocabulary size is fixed, e.g., 8k, 16k, or 32k.
Note that SentencePiece specifies the final vocabulary size for training, which is different from
[subword-nmt](https://github.com/rsennrich/subword-nmt) that uses the number of merge operations.
The number of merge operations is a BPE-specific parameter and not applicable to other segmentation algorithms, including unigram, word and character.
#### Trains from raw sentences
Previous sub-word implementations assume that the input sentences are pre-tokenized. This constraint was required for efficient training, but makes the preprocessing complicated as we have to run language dependent tokenizers in advance.
The implementation of SentencePiece is fast enough to train the model from raw sentences. This is useful for training the tokenizer and detokenizer for Chinese and Japanese where no explicit spaces exist between words.
#### Whitespace is treated as a basic symbol
The first step of Natural Language processing is text tokenization. For
example, a standard English tokenizer would segment the text "Hello world." into the
following three tokens.
> [Hello] [World] [.]
One observation is that the original input and tokenized sequence are **NOT
reversibly convertible**. For instance, the information that there is no space between
“World” and “.” is dropped from the tokenized sequence, since e.g., `Tokenize(“World.”) == Tokenize(“World .”)`
SentencePiece treats the input text just as a sequence of Unicode characters. Whitespace is also handled as a normal symbol. To handle the whitespace as a basic token explicitly, SentencePiece first escapes the whitespace with a meta symbol "▁" (U+2581) as follows.
> Hello▁World.
Then, this text is segmented into small pieces, for example:
> [Hello] [▁Wor] [ld] [.]
Since the whitespace is preserved in the segmented text, we can detokenize the text without any ambiguities.
```
detokenized = ''.join(pieces).replace('▁', ' ')
```
This feature makes it possible to perform detokenization without relying on language-specific resources.
Note that we cannot apply the same lossless conversions when splitting the
sentence with standard word segmenters, since they treat the whitespace as a
special symbol. Tokenized sequences do not preserve the necessary information to restore the original sentence.
- (en) Hello world. → [Hello] [World] [.] \(A space between Hello and World\)
- (ja) こんにちは世界。 → [こんにちは] [世界] [。] \(No space between こんにちは and 世界\)
#### Subword regularization and BPE-dropout
Subword regularization [[Kudo.](https://arxiv.org/abs/1804.10959)] and BPE-dropout [Provilkov et al](https://arxiv.org/abs/1910.13267) are simple regularization methods
that virtually augment training data with on-the-fly subword sampling, which helps to improve the accuracy as well as robustness of NMT models.
To enable subword regularization, you would like to integrate SentencePiece library
([C++](doc/api.md#sampling-subword-regularization)/[Python](python/README.md)) into the NMT system to sample one segmentation for each parameter update, which is different from the standard off-line data preparations. Here's the example of [Python library](python/README.md). You can find that 'New York' is segmented differently on each `SampleEncode (C++)` or `encode with enable_sampling=True (Python)` calls. The details of sampling parameters are found in [sentencepiece_processor.h](src/sentencepiece_processor.h).
```
>>> import sentencepiece as spm
>>> s = spm.SentencePieceProcessor(model_file='spm.model')
>>> for n in range(5):
... s.encode('New York', out_type=str, enable_sampling=True, alpha=0.1, nbest_size=-1)
...
['▁', 'N', 'e', 'w', '▁York']
['▁', 'New', '▁York']
['▁', 'New', '▁Y', 'o', 'r', 'k']
['▁', 'New', '▁York']
['▁', 'New', '▁York']
```
## Installation
### Python module
SentencePiece provides Python wrapper that supports both SentencePiece training and segmentation.
You can install Python binary package of SentencePiece with.
```
pip install sentencepiece
```
For more detail, see [Python module](python/README.md)
### Build and install SentencePiece command line tools from C++ source
The following tools and libraries are required to build SentencePiece:
- [cmake](https://cmake.org/)
- C++11 compiler
- [gperftools](https://github.com/gperftools/gperftools) library (optional, 10-40% performance improvement can be obtained.)
On Ubuntu, the build tools can be installed with apt-get:
```
% sudo apt-get install cmake build-essential pkg-config libgoogle-perftools-dev
```
Then, you can build and install command line tools as follows.
```
% git clone https://github.com/google/sentencepiece.git
% cd sentencepiece
% mkdir build
% cd build
% cmake ..
% make -j $(nproc)
% sudo make install
% sudo ldconfig -v
```
On OSX/macOS, replace the last command with `sudo update_dyld_shared_cache`
### Build and install using vcpkg
You can download and install sentencepiece using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager:
sudo git clone https://github.com/Microsoft/vcpkg.git
cd vcpkg
./bootstrap-vcpkg.sh
./vcpkg integrate install
./vcpkg install sentencepiece
The sentencepiece port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
### Download and install SentencePiece from signed released wheels
You can download the wheel from the [GitHub releases page](https://github.com/google/sentencepiece/releases/latest).
We generate [SLSA3 signatures](slsa.dev) using the OpenSSF's [slsa-framework/slsa-github-generator](https://github.com/slsa-framework/slsa-github-generator) during the release process. To verify a release binary:
1. Install the verification tool from [slsa-framework/slsa-verifier#installation](https://github.com/slsa-framework/slsa-verifier#installation).
2. Download the provenance file `attestation.intoto.jsonl` from the [GitHub releases page](https://github.com/google/sentencepiece/releases/latest).
3. Run the verifier:
```shell
slsa-verifier -artifact-path <the-wheel> -provenance attestation.intoto.jsonl -source github.com/google/sentencepiece -tag <the-tag>
```
pip install wheel_file.whl
## Usage instructions
### Train SentencePiece Model
```
% spm_train --input=<input> --model_prefix=<model_name> --vocab_size=8000 --character_coverage=1.0 --model_type=<type>
```
- `--input`: one-sentence-per-line **raw** corpus file. No need to run
tokenizer, normalizer or preprocessor. By default, SentencePiece normalizes
the input with Unicode NFKC. You can pass a comma-separated list of files.
- `--model_prefix`: output model name prefix. `<model_name>.model` and `<model_name>.vocab` are generated.
- `--vocab_size`: vocabulary size, e.g., 8000, 16000, or 32000
- `--character_coverage`: amount of characters covered by the model, good defaults are: `0.9995` for languages with rich character set like Japanese or Chinese and `1.0` for other languages with small character set.
- `--model_type`: model type. Choose from `unigram` (default), `bpe`, `char`, or `word`. The input sentence must be pretokenized when using `word` type.
Use `--help` flag to display all parameters for training, or see [here](doc/options.md) for an overview.
### Encode raw text into sentence pieces/ids
```
% spm_encode --model=<model_file> --output_format=piece < input > output
% spm_encode --model=<model_file> --output_format=id < input > output
```
Use `--extra_options` flag to insert the BOS/EOS markers or reverse the input sequence.
```
% spm_encode --extra_options=eos (add </s> only)
% spm_encode --extra_options=bos:eos (add <s> and </s>)
% spm_encode --extra_options=reverse:bos:eos (reverse input and add <s> and </s>)
```
SentencePiece supports nbest segmentation and segmentation sampling with `--output_format=(nbest|sample)_(piece|id)` flags.
```
% spm_encode --model=<model_file> --output_format=sample_piece --nbest_size=-1 --alpha=0.5 < input > output
% spm_encode --model=<model_file> --output_format=nbest_id --nbest_size=10 < input > output
```
### Decode sentence pieces/ids into raw text
```
% spm_decode --model=<model_file> --input_format=piece < input > output
% spm_decode --model=<model_file> --input_format=id < input > output
```
Use `--extra_options` flag to decode the text in reverse order.
```
% spm_decode --extra_options=reverse < input > output
```
### End-to-End Example
```
% spm_train --input=data/botchan.txt --model_prefix=m --vocab_size=1000
unigram_model_trainer.cc(494) LOG(INFO) Starts training with :
input: "../data/botchan.txt"
... <snip>
unigram_model_trainer.cc(529) LOG(INFO) EM sub_iter=1 size=1100 obj=10.4973 num_tokens=37630 num_tokens/piece=34.2091
trainer_interface.cc(272) LOG(INFO) Saving model: m.model
trainer_interface.cc(281) LOG(INFO) Saving vocabs: m.vocab
% echo "I saw a girl with a telescope." | spm_encode --model=m.model
▁I ▁saw ▁a ▁girl ▁with ▁a ▁ te le s c o pe .
% echo "I saw a girl with a telescope." | spm_encode --model=m.model --output_format=id
9 459 11 939 44 11 4 142 82 8 28 21 132 6
% echo "9 459 11 939 44 11 4 142 82 8 28 21 132 6" | spm_decode --model=m.model --input_format=id
I saw a girl with a telescope.
```
You can find that the original input sentence is restored from the vocabulary id sequence.
### Export vocabulary list
```
% spm_export_vocab --model=<model_file> --output=<output file>
```
`<output file>` stores a list of vocabulary and emission log probabilities. The vocabulary id corresponds to the line number in this file.
### Redefine special meta tokens
By default, SentencePiece uses Unknown (<unk>), BOS (<s>) and EOS (</s>) tokens which have the ids of 0, 1, and 2 respectively. We can redefine this mapping in the training phase as follows.
```
% spm_train --bos_id=0 --eos_id=1 --unk_id=5 --input=... --model_prefix=... --character_coverage=...
```
When setting -1 id e.g., `bos_id=-1`, this special token is disabled. Note that the unknown id cannot be disabled. We can define an id for padding (<pad>) as `--pad_id=3`.
If you want to assign another special tokens, please see [Use custom symbols](doc/special_symbols.md).
### Vocabulary restriction
`spm_encode` accepts a `--vocabulary` and a `--vocabulary_threshold` option so that `spm_encode` will only produce symbols which also appear in the vocabulary (with at least some frequency). The background of this feature is described in [subword-nmt page](https://github.com/rsennrich/subword-nmt#best-practice-advice-for-byte-pair-encoding-in-nmt).
The usage is basically the same as that of `subword-nmt`. Assuming that L1 and L2 are the two languages (source/target languages), train the shared spm model, and get resulting vocabulary for each:
```
% cat {train_file}.L1 {train_file}.L2 | shuffle > train
% spm_train --input=train --model_prefix=spm --vocab_size=8000 --character_coverage=0.9995
% spm_encode --model=spm.model --generate_vocabulary < {train_file}.L1 > {vocab_file}.L1
% spm_encode --model=spm.model --generate_vocabulary < {train_file}.L2 > {vocab_file}.L2
```
`shuffle` command is used just in case because `spm_train` loads the first 10M lines of corpus by default.
Then segment train/test corpus with `--vocabulary` option
```
% spm_encode --model=spm.model --vocabulary={vocab_file}.L1 --vocabulary_threshold=50 < {test_file}.L1 > {test_file}.seg.L1
% spm_encode --model=spm.model --vocabulary={vocab_file}.L2 --vocabulary_threshold=50 < {test_file}.L2 > {test_file}.seg.L2
```
## Advanced topics
- [SentencePiece Experiments](doc/experiments.md)
- [SentencePieceProcessor C++ API](doc/api.md)
- [Use custom text normalization rules](doc/normalization.md)
- [Use custom symbols](doc/special_symbols.md)
- [Python Module](python/README.md)
- [Segmentation and training algorithms in detail]
## Related projects
These are related projects to SentencePiece. They are managed independently. Please send a Pull Request (PR) if additions are needed.
- [Java utilities/bindings for SentencePiece](https://mvnrepository.com/artifact/io.github.eix128/sentencepiece4j)
================================================
FILE: VERSION.txt
================================================
0.2.2
================================================
FILE: cmake/ios.toolchain.cmake
================================================
# This file is part of the ios-cmake project. It was retrieved from
# https://github.com/leetal/ios-cmake.git, which is a fork of
# https://github.com/gerstrong/ios-cmake.git, which is a fork of
# https://github.com/cristeab/ios-cmake.git, which is a fork of
# https://code.google.com/p/ios-cmake/. Which in turn is based off of
# the Platform/Darwin.cmake and Platform/UnixPaths.cmake files which
# are included with CMake 2.8.4
#
# The ios-cmake project is licensed under the new BSD license.
#
# Copyright (c) 2014, Bogdan Cristea and LTE Engineering Software,
# Kitware, Inc., Insight Software Consortium. All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# This file is based off of the Platform/Darwin.cmake and
# Platform/UnixPaths.cmake files which are included with CMake 2.8.4
# It has been altered for iOS development.
#
# Updated by Alex Stewart (alexs.mac@gmail.com)
#
# *****************************************************************************
# Now maintained by Alexander Widerberg (widerbergaren [at] gmail.com)
# under the BSD-3-Clause license
# https://github.com/leetal/ios-cmake
# *****************************************************************************
#
# INFORMATION / HELP
#
###############################################################################
# OPTIONS #
###############################################################################
#
# PLATFORM: (default "OS64")
# OS = Build for iPhoneOS.
# OS64 = Build for arm64 iphoneOS.
# OS64COMBINED = Build for arm64 x86_64 iphoneOS + iphoneOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step)
# SIMULATOR = Build for x86 i386 iphoneOS Simulator.
# SIMULATOR64 = Build for x86_64 iphoneOS Simulator.
# SIMULATORARM64 = Build for arm64 iphoneOS Simulator.
# TVOS = Build for arm64 tvOS.
# TVOSCOMBINED = Build for arm64 x86_64 tvOS + tvOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step)
# SIMULATOR_TVOS = Build for x86_64 tvOS Simulator.
# WATCHOS = Build for armv7k arm64_32 for watchOS.
# WATCHOSCOMBINED = Build for armv7k arm64_32 x86_64 watchOS + watchOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step)
# SIMULATOR_WATCHOS = Build for x86_64 for watchOS Simulator.
# MAC = Build for x86_64 macOS.
# MAC_ARM64 = Build for Apple Silicon macOS.
# MAC_CATALYST = Build for x86_64 macOS with Catalyst support (iOS toolchain on macOS).
# Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS
# MAC_CATALYST_ARM64 = Build for Apple Silicon macOS with Catalyst support (iOS toolchain on macOS).
# Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS
#
# CMAKE_OSX_SYSROOT: Path to the SDK to use. By default this is
# automatically determined from PLATFORM and xcodebuild, but
# can also be manually specified (although this should not be required).
#
# CMAKE_DEVELOPER_ROOT: Path to the Developer directory for the platform
# being compiled for. By default this is automatically determined from
# CMAKE_OSX_SYSROOT, but can also be manually specified (although this should
# not be required).
#
# DEPLOYMENT_TARGET: Minimum SDK version to target. Default 2.0 on watchOS and 9.0 on tvOS+iOS
#
# NAMED_LANGUAGE_SUPPORT:
# ON (default) = Will require "enable_language(OBJC) and/or enable_language(OBJCXX)" for full OBJC|OBJCXX support
# OFF = Will embed the OBJC and OBJCXX flags into the CMAKE_C_FLAGS and CMAKE_CXX_FLAGS (legacy behaviour, CMake version < 3.16)
#
# ENABLE_BITCODE: (ON|OFF) Enables or disables bitcode support. Default ON
#
# ENABLE_ARC: (ON|OFF) Enables or disables ARC support. Default ON (ARC enabled by default)
#
# ENABLE_VISIBILITY: (ON|OFF) Enables or disables symbol visibility support. Default OFF (visibility hidden by default)
#
# ENABLE_STRICT_TRY_COMPILE: (ON|OFF) Enables or disables strict try_compile() on all Check* directives (will run linker
# to actually check if linking is possible). Default OFF (will set CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY)
#
# ARCHS: (armv7 armv7s armv7k arm64 arm64_32 i386 x86_64) If specified, will override the default architectures for the given PLATFORM
# OS = armv7 armv7s arm64 (if applicable)
# OS64 = arm64 (if applicable)
# SIMULATOR = i386
# SIMULATOR64 = x86_64
# SIMULATORARM64 = arm64
# TVOS = arm64
# SIMULATOR_TVOS = x86_64 (i386 has since long been deprecated)
# WATCHOS = armv7k arm64_32 (if applicable)
# SIMULATOR_WATCHOS = x86_64 (i386 has since long been deprecated)
# MAC = x86_64
# MAC_ARM64 = arm64
# MAC_CATALYST = x86_64
# MAC_CATALYST_ARM64 = arm64
#
# NOTE: When manually specifying ARCHS, put a semi-colon between the entries. E.g., -DARCHS="armv7;arm64"
#
###############################################################################
# END OPTIONS #
###############################################################################
#
# This toolchain defines the following properties (available via get_property()) for use externally:
#
# PLATFORM: The currently targeted platform.
# XCODE_VERSION: Version number (not including Build version) of Xcode detected.
# SDK_VERSION: Version of SDK being used.
# OSX_ARCHITECTURES: Architectures being compiled for (generated from PLATFORM).
# APPLE_TARGET_TRIPLE: Used by autoconf build systems. NOTE: If "ARCHS" are overridden, this will *NOT* be set!
#
# This toolchain defines the following macros for use externally:
#
# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE XCODE_VARIANT)
# A convenience macro for setting xcode specific properties on targets.
# Available variants are: All, Release, RelWithDebInfo, Debug, MinSizeRel
# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1" "all").
#
# find_host_package (PROGRAM ARGS)
# A macro used to find executable programs on the host system, not within the
# environment. Thanks to the android-cmake project for providing the
# command.
#
cmake_minimum_required(VERSION 3.8.0)
# CMake invokes the toolchain file twice during the first build, but only once during subsequent rebuilds.
if(DEFINED ENV{_IOS_TOOLCHAIN_HAS_RUN})
return()
endif()
set(ENV{_IOS_TOOLCHAIN_HAS_RUN} true)
# List of supported platform values
list(APPEND _supported_platforms
"OS" "OS64" "OS64COMBINED" "SIMULATOR" "SIMULATOR64" "SIMULATORARM64"
"TVOS" "TVOSCOMBINED" "SIMULATOR_TVOS"
"WATCHOS" "WATCHOSCOMBINED" "SIMULATOR_WATCHOS"
"MAC" "MAC_ARM64"
"MAC_CATALYST" "MAC_CATALYST_ARM64")
# Cache what generator is used
set(USED_CMAKE_GENERATOR "${CMAKE_GENERATOR}")
# Check if using a CMake version capable of building combined FAT builds (simulator and target slices combined in one static lib)
if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14")
set(MODERN_CMAKE YES)
endif()
# Get the Xcode version being used.
# Problem: CMake runs toolchain files multiple times, but can't read cache variables on some runs.
# Workaround: On first run (in which cache variables are always accessible), set an intermediary environment variable.
#
# NOTE: This pattern is used i many places in this toolchain to speed up checks of all sorts
if(DEFINED XCODE_VERSION_INT)
# Environment variables are always preserved.
set(ENV{_XCODE_VERSION_INT} "${XCODE_VERSION_INT}")
elseif(DEFINED ENV{_XCODE_VERSION_INT})
set(XCODE_VERSION_INT "$ENV{_XCODE_VERSION_INT}")
elseif(NOT DEFINED XCODE_VERSION_INT)
find_program(XCODEBUILD_EXECUTABLE xcodebuild)
if(NOT XCODEBUILD_EXECUTABLE)
message(FATAL_ERROR "xcodebuild not found. Please install either the standalone commandline tools or Xcode.")
endif()
execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version
OUTPUT_VARIABLE XCODE_VERSION_INT
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION_INT "${XCODE_VERSION_INT}")
string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION_INT "${XCODE_VERSION_INT}")
set(XCODE_VERSION_INT "${XCODE_VERSION_INT}" CACHE INTERNAL "")
endif()
# Assuming that xcode 12.0 is installed you most probably have ios sdk 14.0 or later installed (tested on Big Sur)
# if you don't set a deployment target it will be set the way you only get 64-bit builds
if(NOT DEFINED DEPLOYMENT_TARGET AND XCODE_VERSION_INT VERSION_GREATER 12.0)
# Temporarily fix the arm64 issues in CMake install-combined by excluding arm64 for simulator builds (needed for Apple Silicon...)
set(CMAKE_XCODE_ATTRIBUTE_EXCLUDED_ARCHS[sdk=iphonesimulator*] "arm64")
endif()
# Check if the platform variable is set
if(DEFINED PLATFORM)
# Environment variables are always preserved.
set(ENV{_PLATFORM} "${PLATFORM}")
elseif(DEFINED ENV{_PLATFORM})
set(PLATFORM "$ENV{_PLATFORM}")
elseif(NOT DEFINED PLATFORM)
message(FATAL_ERROR "PLATFORM argument not set. Bailing configure since I don't know what target you want to build for!")
endif ()
if(PLATFORM MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode")
message(FATAL_ERROR "The combined builds support requires Xcode to be used as generator via '-G Xcode' command-line argument in CMake")
endif()
# Safeguard that the platform value is set and is one of the supported values
list(FIND _supported_platforms ${PLATFORM} contains_PLATFORM)
if("${contains_PLATFORM}" EQUAL "-1")
string(REPLACE ";" "\n * " _supported_platforms_formatted "${_supported_platforms}")
message(FATAL_ERROR " Invalid PLATFORM specified! Current value: ${PLATFORM}.\n"
" Supported PLATFORM values: \n * ${_supported_platforms_formatted}")
endif()
# Check if Apple Silicon is supported
if(PLATFORM MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$" AND ${CMAKE_VERSION} VERSION_LESS "3.19.5")
message(FATAL_ERROR "Apple Silicon builds requires a minimum of CMake 3.19.5")
endif()
# Touch toolchain variable to suppress "unused variable" warning.
# This happens if CMake is invoked with the same command line the second time.
if(CMAKE_TOOLCHAIN_FILE)
endif()
# Fix for PThread library not in path
set(CMAKE_THREAD_LIBS_INIT "-lpthread")
set(CMAKE_HAVE_THREADS_LIBRARY 1)
set(CMAKE_USE_WIN32_THREADS_INIT 0)
set(CMAKE_USE_PTHREADS_INIT 1)
# Specify named language support defaults.
if(NOT DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.16")
set(NAMED_LANGUAGE_SUPPORT ON)
message(STATUS "[DEFAULTS] Using explicit named language support! E.g., enable_language(CXX) is needed in the project files.")
elseif(NOT DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_LESS "3.16")
set(NAMED_LANGUAGE_SUPPORT OFF)
message(STATUS "[DEFAULTS] Disabling explicit named language support. Falling back to legacy behaviour.")
elseif(DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_LESS "3.16")
message(FATAL_ERROR "CMake named language support for OBJC and OBJCXX was added in CMake 3.16.")
endif()
set(NAMED_LANGUAGE_SUPPORT_INT ${NAMED_LANGUAGE_SUPPORT} CACHE BOOL
"Whether or not to enable explicit named language support" FORCE)
# Specify minimum version of deployment target.
if(NOT DEFINED DEPLOYMENT_TARGET)
if (PLATFORM MATCHES "WATCHOS")
# Unless specified, SDK version 4.0 is used by default as minimum target version (watchOS).
set(DEPLOYMENT_TARGET "4.0")
elseif(PLATFORM STREQUAL "MAC")
# Unless specified, SDK version 10.13 (High sierra) is used by default as minimum target version (macos).
set(DEPLOYMENT_TARGET "10.13")
elseif(PLATFORM STREQUAL "MAC_ARM64")
# Unless specified, SDK version 11.0 (Big Sur) is used by default as minimum target version (macos on arm).
set(DEPLOYMENT_TARGET "11.0")
elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64")
# Unless specified, SDK version 13.0 is used by default as minimum target version (mac catalyst minimum requirement).
set(DEPLOYMENT_TARGET "13.1")
else()
# Unless specified, SDK version 11.0 is used by default as minimum target version (iOS, tvOS).
set(DEPLOYMENT_TARGET "11.0")
endif()
message(STATUS "[DEFAULTS] Using the default min-version since DEPLOYMENT_TARGET not provided!")
elseif(DEFINED DEPLOYMENT_TARGET AND PLATFORM MATCHES "^MAC_CATALYST" AND ${DEPLOYMENT_TARGET} VERSION_LESS "13.1")
message(FATAL_ERROR "Mac Catalyst builds requires a minimum deployment target of 13.1!")
endif()
# Store the DEPLOYMENT_TARGET in the cache
set(DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}" CACHE INTERNAL "")
# Handle the case where we are targeting iOS and a version above 10.3.4 (32-bit support dropped officially)
if(PLATFORM STREQUAL "OS" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4)
set(PLATFORM "OS64")
message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.")
elseif(PLATFORM STREQUAL "SIMULATOR" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4)
set(PLATFORM "SIMULATOR64")
message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.")
endif()
set(PLATFORM_INT "${PLATFORM}")
if(DEFINED ARCHS)
string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
endif()
# Determine the platform name and architectures for use in xcodebuild commands
# from the specified PLATFORM_INT name.
if(PLATFORM_INT STREQUAL "OS")
set(SDK_NAME iphoneos)
if(NOT ARCHS)
set(ARCHS armv7 armv7s arm64)
set(APPLE_TARGET_TRIPLE_INT arm-apple-ios${DEPLOYMENT_TARGET})
else()
set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET})
endif()
elseif(PLATFORM_INT STREQUAL "OS64")
set(SDK_NAME iphoneos)
if(NOT ARCHS)
if (XCODE_VERSION_INT VERSION_GREATER 10.0)
set(ARCHS arm64) # FIXME: Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missung bitcode markers for example
else()
set(ARCHS arm64)
endif()
set(APPLE_TARGET_TRIPLE_INT aarch64-apple-ios${DEPLOYMENT_TARGET})
else()
set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET})
endif()
elseif(PLATFORM_INT STREQUAL "OS64COMBINED")
set(SDK_NAME iphoneos)
if(MODERN_CMAKE)
if(NOT ARCHS)
if (XCODE_VERSION_INT VERSION_GREATER 10.0)
set(ARCHS arm64 x86_64) # FIXME: Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missung bitcode markers for example
set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64")
set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64")
set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64")
set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64")
else()
set(ARCHS arm64 x86_64)
set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64")
set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64")
set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64")
set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64")
endif()
set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-ios${DEPLOYMENT_TARGET})
else()
set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET})
endif()
else()
message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the OS64COMBINED setting work")
endif()
elseif(PLATFORM_INT STREQUAL "SIMULATOR")
set(SDK_NAME iphonesimulator)
if(NOT ARCHS)
set(ARCHS i386)
set(APPLE_TARGET_TRIPLE_INT i386-apple-ios${DEPLOYMENT_TARGET}-simulator)
else()
set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator)
endif()
message(DEPRECATION "SIMULATOR IS DEPRECATED. Consider using SIMULATOR64 instead.")
elseif(PLATFORM_INT STREQUAL "SIMULATOR64")
set(SDK_NAME iphonesimulator)
if(NOT ARCHS)
set(ARCHS x86_64)
set(APPLE_TARGET_TRIPLE_INT x86_64-apple-ios${DEPLOYMENT_TARGET}-simulator)
else()
set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator)
endif()
elseif(PLATFORM_INT STREQUAL "SIMULATORARM64")
set(SDK_NAME iphonesimulator)
if(NOT ARCHS)
set(ARCHS arm64)
set(APPLE_TARGET_TRIPLE_INT aarch64-apple-ios${DEPLOYMENT_TARGET}-simulator)
else()
set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator)
endif()
elseif(PLATFORM_INT STREQUAL "TVOS")
set(SDK_NAME appletvos)
if(NOT ARCHS)
set(ARCHS arm64)
set(APPLE_TARGET_TRIPLE_INT aarch64-apple-tvos${DEPLOYMENT_TARGET})
else()
set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET})
endif()
elseif (PLATFORM_INT STREQUAL "TVOSCOMBINED")
set(SDK_NAME appletvos)
if(MODERN_CMAKE)
if(NOT ARCHS)
set(ARCHS arm64 x86_64)
set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-tvos${DEPLOYMENT_TARGET})
set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvos*] "arm64")
set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvsimulator*] "x86_64")
set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvos*] "arm64")
set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvsimulator*] "x86_64")
else()
set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET})
endif()
else()
message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the TVOSCOMBINED setting work")
endif()
elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS")
set(SDK_NAME appletvsimulator)
if(NOT ARCHS)
set(ARCHS x86_64)
set(APPLE_TARGET_TRIPLE_INT x86_64-apple-tvos${DEPLOYMENT_TARGET}-simulator)
else()
set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET}-simulator)
endif()
elseif(PLATFORM_INT STREQUAL "WATCHOS")
set(SDK_NAME watchos)
if(NOT ARCHS)
if (XCODE_VERSION_INT VERSION_GREATER 10.0)
set(ARCHS armv7k arm64_32)
set(APPLE_TARGET_TRIPLE_INT aarch64_32-apple-watchos${DEPLOYMENT_TARGET})
else()
set(ARCHS armv7k)
set(APPLE_TARGET_TRIPLE_INT arm-apple-watchos${DEPLOYMENT_TARGET})
endif()
else()
set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET})
endif()
elseif(PLATFORM_INT STREQUAL "WATCHOSCOMBINED")
set(SDK_NAME watchos)
if(MODERN_CMAKE)
if(NOT ARCHS)
if (XCODE_VERSION_INT VERSION_GREATER 10.0)
set(ARCHS armv7k arm64_32 i386)
set(APPLE_TARGET_TRIPLE_INT aarch64_32-i386-apple-watchos${DEPLOYMENT_TARGET})
set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k arm64_32")
set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386")
set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k arm64_32")
set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386")
else()
set(ARCHS armv7k i386)
set(APPLE_TARGET_TRIPLE_INT arm-i386-apple-watchos${DEPLOYMENT_TARGET})
set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k")
set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386")
set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k")
set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386")
endif()
else()
set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET})
endif()
else()
message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the WATCHOSCOMBINED setting work")
endif()
elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS")
set(SDK_NAME watchsimulator)
if(NOT ARCHS)
set(ARCHS i386)
set(APPLE_TARGET_TRIPLE_INT i386-apple-watchos${DEPLOYMENT_TARGET}-simulator)
else()
set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET}-simulator)
endif()
elseif(PLATFORM_INT STREQUAL "MAC" OR PLATFORM_INT STREQUAL "MAC_CATALYST")
set(SDK_NAME macosx)
if(NOT ARCHS)
set(ARCHS x86_64)
endif()
string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
if(PLATFORM_INT STREQUAL "MAC")
set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET})
elseif(PLATFORM_INT STREQUAL "MAC_CATALYST")
set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi)
endif()
elseif(PLATFORM_INT MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$")
set(SDK_NAME macosx)
if(NOT ARCHS)
set(ARCHS arm64)
endif()
string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}")
if(PLATFORM_INT STREQUAL "MAC_ARM64")
set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET})
elseif(PLATFORM_INT STREQUAL "MAC_CATALYST_ARM64")
set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi)
endif()
else()
message(FATAL_ERROR "Invalid PLATFORM: ${PLATFORM_INT}")
endif()
string(REPLACE ";" " " ARCHS_SPACED "${ARCHS}")
if(MODERN_CMAKE AND PLATFORM_INT MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode")
message(FATAL_ERROR "The COMBINED options only work with Xcode generator, -G Xcode")
endif()
if(CMAKE_GENERATOR MATCHES "Xcode" AND PLATFORM_INT MATCHES "^MAC_CATALYST")
set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++")
set(CMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS "macosx")
set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-maccatalyst")
if(NOT DEFINED MACOSX_DEPLOYMENT_TARGET)
set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "10.15")
else()
set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "${MACOSX_DEPLOYMENT_TARGET}")
endif()
elseif(CMAKE_GENERATOR MATCHES "Xcode")
set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++")
set(CMAKE_XCODE_ATTRIBUTE_IPHONEOS_DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}")
if(NOT PLATFORM_INT MATCHES ".*COMBINED")
set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=${SDK_NAME}*] "${ARCHS_SPACED}")
set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=${SDK_NAME}*] "${ARCHS_SPACED}")
endif()
endif()
# If user did not specify the SDK root to use, then query xcodebuild for it.
if(DEFINED CMAKE_OSX_SYSROOT_INT)
# Environment variables are always preserved.
set(ENV{_CMAKE_OSX_SYSROOT_INT} "${CMAKE_OSX_SYSROOT_INT}")
elseif(DEFINED ENV{_CMAKE_OSX_SYSROOT_INT})
set(CMAKE_OSX_SYSROOT_INT "$ENV{_CMAKE_OSX_SYSROOT_INT}")
elseif(NOT DEFINED CMAKE_OSX_SYSROOT_INT)
execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version -sdk ${SDK_NAME} Path
OUTPUT_VARIABLE CMAKE_OSX_SYSROOT_INT
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
if (NOT DEFINED CMAKE_OSX_SYSROOT_INT AND NOT DEFINED CMAKE_OSX_SYSROOT)
message(SEND_ERROR "Please make sure that Xcode is installed and that the toolchain"
"is pointing to the correct path. Please run:"
"sudo xcode-select -s /Applications/Xcode.app/Contents/Developer"
"and see if that fixes the problem for you.")
message(FATAL_ERROR "Invalid CMAKE_OSX_SYSROOT: ${CMAKE_OSX_SYSROOT} "
"does not exist.")
elseif(DEFINED CMAKE_OSX_SYSROOT_INT)
set(CMAKE_OSX_SYSROOT_INT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
# Specify the location or name of the platform SDK to be used in CMAKE_OSX_SYSROOT.
set(CMAKE_OSX_SYSROOT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
endif()
# Use bitcode or not
if(NOT DEFINED ENABLE_BITCODE AND NOT ARCHS MATCHES "((^|;|, )(i386|x86_64))+")
# Unless specified, enable bitcode support by default
message(STATUS "[DEFAULTS] Enabling bitcode support by default. ENABLE_BITCODE not provided!")
set(ENABLE_BITCODE ON)
elseif(NOT DEFINED ENABLE_BITCODE)
message(STATUS "[DEFAULTS] Disabling bitcode support by default on simulators. ENABLE_BITCODE not provided for override!")
set(ENABLE_BITCODE OFF)
endif()
set(ENABLE_BITCODE_INT ${ENABLE_BITCODE} CACHE BOOL
"Whether or not to enable bitcode" FORCE)
# Use ARC or not
if(NOT DEFINED ENABLE_ARC)
# Unless specified, enable ARC support by default
set(ENABLE_ARC ON)
message(STATUS "[DEFAULTS] Enabling ARC support by default. ENABLE_ARC not provided!")
endif()
set(ENABLE_ARC_INT ${ENABLE_ARC} CACHE BOOL "Whether or not to enable ARC" FORCE)
# Use hidden visibility or not
if(NOT DEFINED ENABLE_VISIBILITY)
# Unless specified, disable symbols visibility by default
set(ENABLE_VISIBILITY OFF)
message(STATUS "[DEFAULTS] Hiding symbols visibility by default. ENABLE_VISIBILITY not provided!")
endif()
set(ENABLE_VISIBILITY_INT ${ENABLE_VISIBILITY} CACHE BOOL "Whether or not to hide symbols from the dynamic linker (-fvisibility=hidden)" FORCE)
# Set strict compiler checks or not
if(NOT DEFINED ENABLE_STRICT_TRY_COMPILE)
# Unless specified, disable strict try_compile()
set(ENABLE_STRICT_TRY_COMPILE OFF)
message(STATUS "[DEFAULTS] Using NON-strict compiler checks by default. ENABLE_STRICT_TRY_COMPILE not provided!")
endif()
set(ENABLE_STRICT_TRY_COMPILE_INT ${ENABLE_STRICT_TRY_COMPILE} CACHE BOOL
"Whether or not to use strict compiler checks" FORCE)
# Get the SDK version information.
if(DEFINED SDK_VERSION)
# Environment variables are always preserved.
set(ENV{_SDK_VERSION} "${SDK_VERSION}")
elseif(DEFINED ENV{_SDK_VERSION})
set(SDK_VERSION "$ENV{_SDK_VERSION}")
elseif(NOT DEFINED SDK_VERSION)
execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -sdk ${CMAKE_OSX_SYSROOT_INT} -version SDKVersion
OUTPUT_VARIABLE SDK_VERSION
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
# Find the Developer root for the specific iOS platform being compiled for
# from CMAKE_OSX_SYSROOT. Should be ../../ from SDK specified in
# CMAKE_OSX_SYSROOT. There does not appear to be a direct way to obtain
# this information from xcrun or xcodebuild.
if (NOT DEFINED CMAKE_DEVELOPER_ROOT AND NOT CMAKE_GENERATOR MATCHES "Xcode")
get_filename_component(PLATFORM_SDK_DIR ${CMAKE_OSX_SYSROOT_INT} PATH)
get_filename_component(CMAKE_DEVELOPER_ROOT ${PLATFORM_SDK_DIR} PATH)
if (NOT EXISTS "${CMAKE_DEVELOPER_ROOT}")
message(FATAL_ERROR "Invalid CMAKE_DEVELOPER_ROOT: ${CMAKE_DEVELOPER_ROOT} does not exist.")
endif()
endif()
# Find the C & C++ compilers for the specified SDK.
if(DEFINED CMAKE_C_COMPILER)
# Environment variables are always preserved.
set(ENV{_CMAKE_C_COMPILER} "${CMAKE_C_COMPILER}")
elseif(DEFINED ENV{_CMAKE_C_COMPILER})
set(CMAKE_C_COMPILER "$ENV{_CMAKE_C_COMPILER}")
set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER})
elseif(NOT DEFINED CMAKE_C_COMPILER)
execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang
OUTPUT_VARIABLE CMAKE_C_COMPILER
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER})
endif()
if(DEFINED CMAKE_CXX_COMPILER)
# Environment variables are always preserved.
set(ENV{_CMAKE_CXX_COMPILER} "${CMAKE_CXX_COMPILER}")
elseif(DEFINED ENV{_CMAKE_CXX_COMPILER})
set(CMAKE_CXX_COMPILER "$ENV{_CMAKE_CXX_COMPILER}")
elseif(NOT DEFINED CMAKE_CXX_COMPILER)
execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang++
OUTPUT_VARIABLE CMAKE_CXX_COMPILER
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
# Find (Apple's) libtool.
if(DEFINED BUILD_LIBTOOL)
# Environment variables are always preserved.
set(ENV{_BUILD_LIBTOOL} "${BUILD_LIBTOOL}")
elseif(DEFINED ENV{_BUILD_LIBTOOL})
set(BUILD_LIBTOOL "$ENV{_BUILD_LIBTOOL}")
elseif(NOT DEFINED BUILD_LIBTOOL)
execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find libtool
OUTPUT_VARIABLE BUILD_LIBTOOL
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
# Find the toolchain's provided install_name_tool if none is found on the host
if(DEFINED CMAKE_INSTALL_NAME_TOOL)
# Environment variables are always preserved.
set(ENV{_CMAKE_INSTALL_NAME_TOOL} "${CMAKE_INSTALL_NAME_TOOL}")
elseif(DEFINED ENV{_CMAKE_INSTALL_NAME_TOOL})
set(CMAKE_INSTALL_NAME_TOOL "$ENV{_CMAKE_INSTALL_NAME_TOOL}")
elseif(NOT DEFINED CMAKE_INSTALL_NAME_TOOL)
execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find install_name_tool
OUTPUT_VARIABLE CMAKE_INSTALL_NAME_TOOL_INT
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
set(CMAKE_INSTALL_NAME_TOOL ${CMAKE_INSTALL_NAME_TOOL_INT} CACHE INTERNAL "")
endif()
# Configure libtool to be used instead of ar + ranlib to build static libraries.
# This is required on Xcode 7+, but should also work on previous versions of
# Xcode.
get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES)
foreach(lang ${languages})
set(CMAKE_${lang}_CREATE_STATIC_LIBRARY "${BUILD_LIBTOOL} -static -o <TARGET> <LINK_FLAGS> <OBJECTS> " CACHE INTERNAL "")
endforeach()
# CMake 3.14+ support building for iOS, watchOS and tvOS out of the box.
if(MODERN_CMAKE)
if(SDK_NAME MATCHES "iphone")
set(CMAKE_SYSTEM_NAME iOS)
elseif(SDK_NAME MATCHES "macosx")
set(CMAKE_SYSTEM_NAME Darwin)
elseif(SDK_NAME MATCHES "appletv")
set(CMAKE_SYSTEM_NAME tvOS)
elseif(SDK_NAME MATCHES "watch")
set(CMAKE_SYSTEM_NAME watchOS)
endif()
# Provide flags for a combined FAT library build on newer CMake versions
if(PLATFORM_INT MATCHES ".*COMBINED")
set(CMAKE_XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO")
set(CMAKE_IOS_INSTALL_COMBINED YES)
endif()
elseif(NOT DEFINED CMAKE_SYSTEM_NAME AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.10")
# Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified
set(CMAKE_SYSTEM_NAME iOS)
elseif(NOT DEFINED CMAKE_SYSTEM_NAME)
# Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified
set(CMAKE_SYSTEM_NAME Darwin)
endif()
# Standard settings.
set(CMAKE_SYSTEM_VERSION ${SDK_VERSION} CACHE INTERNAL "")
set(UNIX ON CACHE BOOL "")
set(APPLE ON CACHE BOOL "")
if(PLATFORM STREQUAL "MAC" OR PLATFORM STREQUAL "MAC_ARM64")
set(IOS OFF CACHE BOOL "")
set(MACOS ON CACHE BOOL "")
elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64")
set(IOS ON CACHE BOOL "")
set(MACOS ON CACHE BOOL "")
else()
set(IOS ON CACHE BOOL "")
endif()
set(CMAKE_AR ar CACHE FILEPATH "" FORCE)
set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE)
set(CMAKE_STRIP strip CACHE FILEPATH "" FORCE)
# Set the architectures for which to build.
set(CMAKE_OSX_ARCHITECTURES ${ARCHS} CACHE INTERNAL "")
# Change the type of target generated for try_compile() so it'll work when cross-compiling, weak compiler checks
if(NOT ENABLE_STRICT_TRY_COMPILE_INT)
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
endif()
# All iOS/Darwin specific settings - some may be redundant.
set(CMAKE_MACOSX_BUNDLE YES)
set(CMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "NO")
set(CMAKE_SHARED_LIBRARY_PREFIX "lib")
set(CMAKE_SHARED_LIBRARY_SUFFIX ".dylib")
set(CMAKE_SHARED_MODULE_PREFIX "lib")
set(CMAKE_SHARED_MODULE_SUFFIX ".so")
set(CMAKE_C_COMPILER_ABI ELF)
set(CMAKE_CXX_COMPILER_ABI ELF)
set(CMAKE_C_HAS_ISYSROOT 1)
set(CMAKE_CXX_HAS_ISYSROOT 1)
set(CMAKE_MODULE_EXISTS 1)
set(CMAKE_DL_LIBS "")
set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ")
set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ")
set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}")
set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}")
if(ARCHS MATCHES "((^|;|, )(arm64|arm64e|x86_64))+")
set(CMAKE_C_SIZEOF_DATA_PTR 8)
set(CMAKE_CXX_SIZEOF_DATA_PTR 8)
if(ARCHS MATCHES "((^|;|, )(arm64|arm64e))+")
set(CMAKE_SYSTEM_PROCESSOR "aarch64")
else()
set(CMAKE_SYSTEM_PROCESSOR "x86_64")
endif()
else()
set(CMAKE_C_SIZEOF_DATA_PTR 4)
set(CMAKE_CXX_SIZEOF_DATA_PTR 4)
set(CMAKE_SYSTEM_PROCESSOR "arm")
endif()
# Note that only Xcode 7+ supports the newer more specific:
# -m${SDK_NAME}-version-min flags, older versions of Xcode use:
# -m(ios/ios-simulator)-version-min instead.
if(${CMAKE_VERSION} VERSION_LESS "3.11")
if(PLATFORM_INT STREQUAL "OS" OR PLATFORM_INT STREQUAL "OS64")
if(XCODE_VERSION_INT VERSION_LESS 7.0)
set(SDK_NAME_VERSION_FLAGS
"-mios-version-min=${DEPLOYMENT_TARGET}")
else()
# Xcode 7.0+ uses flags we can build directly from SDK_NAME.
set(SDK_NAME_VERSION_FLAGS
"-m${SDK_NAME}-version-min=${DEPLOYMENT_TARGET}")
endif()
elseif(PLATFORM_INT STREQUAL "TVOS")
set(SDK_NAME_VERSION_FLAGS
"-mtvos-version-min=${DEPLOYMENT_TARGET}")
elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS")
set(SDK_NAME_VERSION_FLAGS
"-mtvos-simulator-version-min=${DEPLOYMENT_TARGET}")
elseif(PLATFORM_INT STREQUAL "WATCHOS")
set(SDK_NAME_VERSION_FLAGS
"-mwatchos-version-min=${DEPLOYMENT_TARGET}")
elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS")
set(SDK_NAME_VERSION_FLAGS
"-mwatchos-simulator-version-min=${DEPLOYMENT_TARGET}")
elseif(PLATFORM_INT STREQUAL "MAC")
set(SDK_NAME_VERSION_FLAGS
"-mmacosx-version-min=${DEPLOYMENT_TARGET}")
else()
# SIMULATOR or SIMULATOR64 both use -mios-simulator-version-min.
set(SDK_NAME_VERSION_FLAGS
"-mios-simulator-version-min=${DEPLOYMENT_TARGET}")
endif()
elseif(NOT PLATFORM_INT MATCHES "^MAC_CATALYST")
# Newer versions of CMake sets the version min flags correctly, skip this for Mac Catalyst targets
set(CMAKE_OSX_DEPLOYMENT_TARGET ${DEPLOYMENT_TARGET})
endif()
if(DEFINED APPLE_TARGET_TRIPLE_INT)
set(APPLE_TARGET_TRIPLE ${APPLE_TARGET_TRIPLE_INT} CACHE INTERNAL "")
set(CMAKE_C_COMPILER_TARGET ${APPLE_TARGET_TRIPLE})
set(CMAKE_CXX_COMPILER_TARGET ${APPLE_TARGET_TRIPLE})
set(CMAKE_ASM_COMPILER_TARGET ${APPLE_TARGET_TRIPLE})
endif()
if(PLATFORM_INT MATCHES "^MAC_CATALYST")
set(C_TARGET_FLAGS "-isystem ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/usr/include -iframework ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks")
endif()
if(ENABLE_BITCODE_INT)
set(BITCODE "-fembed-bitcode")
set(CMAKE_XCODE_ATTRIBUTE_BITCODE_GENERATION_MODE "bitcode")
set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "YES")
else()
set(BITCODE "")
set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "NO")
endif()
if(ENABLE_ARC_INT)
set(FOBJC_ARC "-fobjc-arc")
set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "YES")
else()
set(FOBJC_ARC "-fno-objc-arc")
set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "NO")
endif()
if(NAMED_LANGUAGE_SUPPORT_INT)
set(OBJC_VARS "-fobjc-abi-version=2 -DOBJC_OLD_DISPATCH_PROTOTYPES=0")
set(OBJC_LEGACY_VARS "")
else()
set(OBJC_VARS "")
set(OBJC_LEGACY_VARS "-fobjc-abi-version=2 -DOBJC_OLD_DISPATCH_PROTOTYPES=0")
endif()
if(NOT ENABLE_VISIBILITY_INT)
foreach(lang ${languages})
set(CMAKE_${lang}_VISIBILITY_PRESET "hidden" CACHE INTERNAL "")
endforeach()
set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "YES")
set(VISIBILITY "-fvisibility=hidden -fvisibility-inlines-hidden")
else()
foreach(lang ${languages})
set(CMAKE_${lang}_VISIBILITY_PRESET "default" CACHE INTERNAL "")
endforeach()
set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "NO")
set(VISIBILITY "-fvisibility=default")
endif()
if(DEFINED APPLE_TARGET_TRIPLE)
set(APPLE_TARGET_TRIPLE_FLAG "-target ${APPLE_TARGET_TRIPLE}")
endif()
#Check if Xcode generator is used, since that will handle these flags automagically
if(CMAKE_GENERATOR MATCHES "Xcode")
message(STATUS "Not setting any manual command-line buildflags, since Xcode is selected as generator. Modifying the Xcode build-settings directly instead.")
else()
set(CMAKE_C_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${OBJC_LEGACY_VARS} ${BITCODE} ${VISIBILITY} ${CMAKE_C_FLAGS}")
set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_C_FLAGS_DEBUG}")
set(CMAKE_C_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_C_FLAGS_MINSIZEREL}")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_C_FLAGS_RELWITHDEBINFO}")
set(CMAKE_C_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_C_FLAGS_RELEASE}")
set(CMAKE_CXX_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${OBJC_LEGACY_VARS} ${BITCODE} ${VISIBILITY} ${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_CXX_FLAGS_DEBUG}")
set(CMAKE_CXX_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_CXX_FLAGS_MINSIZEREL}")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_CXX_FLAGS_RELEASE}")
if(NAMED_LANGUAGE_SUPPORT_INT)
set(CMAKE_OBJC_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} ${FOBJC_ARC} ${OBJC_VARS} ${CMAKE_OBJC_FLAGS}")
set(CMAKE_OBJC_FLAGS_DEBUG "-O0 -g ${CMAKE_OBJC_FLAGS_DEBUG}")
set(CMAKE_OBJC_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_OBJC_FLAGS_MINSIZEREL}")
set(CMAKE_OBJC_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_OBJC_FLAGS_RELWITHDEBINFO}")
set(CMAKE_OBJC_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_OBJC_FLAGS_RELEASE}")
set(CMAKE_OBJCXX_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} ${FOBJC_ARC} ${OBJC_VARS} ${CMAKE_OBJCXX_FLAGS}")
set(CMAKE_OBJCXX_FLAGS_DEBUG "-O0 -g ${CMAKE_OBJCXX_FLAGS_DEBUG}")
set(CMAKE_OBJCXX_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_OBJCXX_FLAGS_MINSIZEREL}")
set(CMAKE_OBJCXX_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_OBJCXX_FLAGS_RELWITHDEBINFO}")
set(CMAKE_OBJCXX_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_OBJCXX_FLAGS_RELEASE}")
endif()
set(CMAKE_C_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}")
set(CMAKE_CXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}")
if(NAMED_LANGUAGE_SUPPORT_INT)
set(CMAKE_OBJC_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_OBJC_LINK_FLAGS}")
set(CMAKE_OBJCXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_OBJCXX_LINK_FLAGS}")
endif()
set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} -x assembler-with-cpp -arch ${CMAKE_OSX_ARCHITECTURES} ${APPLE_TARGET_TRIPLE_FLAG}")
endif()
## Print status messages to inform of the current state
message(STATUS "Configuring ${SDK_NAME} build for platform: ${PLATFORM_INT}, architecture(s): ${ARCHS}")
message(STATUS "Using SDK: ${CMAKE_OSX_SYSROOT_INT}")
message(STATUS "Using C compiler: ${CMAKE_C_COMPILER}")
message(STATUS "Using CXX compiler: ${CMAKE_CXX_COMPILER}")
message(STATUS "Using libtool: ${BUILD_LIBTOOL}")
message(STATUS "Using install name tool: ${CMAKE_INSTALL_NAME_TOOL}")
if(DEFINED APPLE_TARGET_TRIPLE)
message(STATUS "Autoconf target triple: ${APPLE_TARGET_TRIPLE}")
endif()
message(STATUS "Using minimum deployment version: ${DEPLOYMENT_TARGET}"
" (SDK version: ${SDK_VERSION})")
if(MODERN_CMAKE)
message(STATUS "Merging integrated CMake 3.14+ iOS,tvOS,watchOS,macOS toolchain(s) with this toolchain!")
if(PLATFORM_INT MATCHES ".*COMBINED")
message(STATUS "Will combine built (static) artifacts into FAT lib...")
endif()
endif()
if(CMAKE_GENERATOR MATCHES "Xcode")
message(STATUS "Using Xcode version: ${XCODE_VERSION_INT}")
endif()
message(STATUS "CMake version: ${CMAKE_VERSION}")
if(DEFINED SDK_NAME_VERSION_FLAGS)
message(STATUS "Using version flags: ${SDK_NAME_VERSION_FLAGS}")
endif()
message(STATUS "Using a data_ptr size of: ${CMAKE_CXX_SIZEOF_DATA_PTR}")
if(ENABLE_BITCODE_INT)
message(STATUS "Bitcode: Enabled")
else()
message(STATUS "Bitcode: Disabled")
endif()
if(ENABLE_ARC_INT)
message(STATUS "ARC: Enabled")
else()
message(STATUS "ARC: Disabled")
endif()
if(ENABLE_VISIBILITY_INT)
message(STATUS "Hiding symbols: Disabled")
else()
message(STATUS "Hiding symbols: Enabled")
endif()
# Set global properties
set_property(GLOBAL PROPERTY PLATFORM "${PLATFORM}")
set_property(GLOBAL PROPERTY APPLE_TARGET_TRIPLE "${APPLE_TARGET_TRIPLE_INT}")
set_property(GLOBAL PROPERTY SDK_VERSION "${SDK_VERSION}")
set_property(GLOBAL PROPERTY XCODE_VERSION "${XCODE_VERSION_INT}")
set_property(GLOBAL PROPERTY OSX_ARCHITECTURES "${CMAKE_OSX_ARCHITECTURES}")
# Export configurable variables for the try_compile() command.
set(CMAKE_TRY_COMPILE_PLATFORM_VARIABLES
PLATFORM
XCODE_VERSION_INT
SDK_VERSION
NAMED_LANGUAGE_SUPPORT
DEPLOYMENT_TARGET
CMAKE_DEVELOPER_ROOT
CMAKE_OSX_SYSROOT_INT
ENABLE_BITCODE
ENABLE_ARC
CMAKE_ASM_COMPILER
CMAKE_C_COMPILER
CMAKE_C_COMPILER_TARGET
CMAKE_CXX_COMPILER
CMAKE_CXX_COMPILER_TARGET
BUILD_LIBTOOL
CMAKE_INSTALL_NAME_TOOL
CMAKE_C_FLAGS
CMAKE_C_DEBUG
CMAKE_C_MINSIZEREL
CMAKE_C_RELWITHDEBINFO
CMAKE_C_RELEASE
CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_CXX_FLAGS_RELEASE
CMAKE_C_LINK_FLAGS
CMAKE_CXX_LINK_FLAGS
CMAKE_ASM_FLAGS
)
if(NAMED_LANGUAGE_SUPPORT_INT)
list(APPEND CMAKE_TRY_COMPILE_PLATFORM_VARIABLES
CMAKE_OBJC_FLAGS
CMAKE_OBJC_DEBUG
CMAKE_OBJC_MINSIZEREL
CMAKE_OBJC_RELWITHDEBINFO
CMAKE_OBJC_RELEASE
CMAKE_OBJCXX_FLAGS
CMAKE_OBJCXX_DEBUG
CMAKE_OBJCXX_MINSIZEREL
CMAKE_OBJCXX_RELWITHDEBINFO
CMAKE_OBJCXX_RELEASE
CMAKE_OBJC_LINK_FLAGS
CMAKE_OBJCXX_LINK_FLAGS
)
endif()
set(CMAKE_PLATFORM_HAS_INSTALLNAME 1)
set(CMAKE_SHARED_LINKER_FLAGS "-rpath @executable_path/Frameworks -rpath @loader_path/Frameworks")
set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -Wl,-headerpad_max_install_names")
set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -Wl,-headerpad_max_install_names")
set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,")
set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,")
set(CMAKE_FIND_LIBRARY_SUFFIXES ".tbd" ".dylib" ".so" ".a")
set(CMAKE_SHARED_LIBRARY_SONAME_C_FLAG "-install_name")
# Set the find root to the SDK developer roots.
# Note: CMAKE_FIND_ROOT_PATH is only useful when cross-compiling. Thus, do not set on macOS builds.
if(NOT PLATFORM_INT MATCHES "^MAC.*$")
list(APPEND CMAKE_FIND_ROOT_PATH "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "")
set(CMAKE_IGNORE_PATH "/System/Library/Frameworks;/usr/local/lib" CACHE INTERNAL "")
endif()
# Default to searching for frameworks first.
set(CMAKE_FIND_FRAMEWORK FIRST)
# Set up the default search directories for frameworks.
if(PLATFORM_INT MATCHES "^MAC_CATALYST")
set(CMAKE_FRAMEWORK_PATH
${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks
${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks
${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks
${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "")
else()
set(CMAKE_FRAMEWORK_PATH
${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks
${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks
${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "")
endif()
# By default, search both the specified iOS SDK and the remainder of the host filesystem.
if(NOT CMAKE_FIND_ROOT_PATH_MODE_PROGRAM)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH CACHE INTERNAL "")
endif()
if(NOT CMAKE_FIND_ROOT_PATH_MODE_LIBRARY)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH CACHE INTERNAL "")
endif()
if(NOT CMAKE_FIND_ROOT_PATH_MODE_INCLUDE)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH CACHE INTERNAL "")
endif()
if(NOT CMAKE_FIND_ROOT_PATH_MODE_PACKAGE)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH CACHE INTERNAL "")
endif()
#
# Some helper-macros below to simplify and beautify the CMakeFile
#
# This little macro lets you set any Xcode specific property.
macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE XCODE_RELVERSION)
set(XCODE_RELVERSION_I "${XCODE_RELVERSION}")
if(XCODE_RELVERSION_I STREQUAL "All")
set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} "${XCODE_VALUE}")
else()
set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY}[variant=${XCODE_RELVERSION_I}] "${XCODE_VALUE}")
endif()
endmacro(set_xcode_property)
# This macro lets you find executable programs on the host system.
macro(find_host_package)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE NEVER)
set(_TOOLCHAIN_IOS ${IOS})
set(IOS OFF)
find_package(${ARGN})
set(IOS ${_TOOLCHAIN_IOS})
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH)
endmacro(find_host_package)
if (NOT DEFINED CMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM AND NOT DEFINED CMAKE_XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY)
set(CMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED NO)
endif()
SET(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_MODULES "YES")
SET(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "YES")
================================================
FILE: config.h.in
================================================
#ifndef CONFIG_H_
#define CONFIG_H_
#define VERSION "@PROJECT_VERSION@"
#define PACKAGE "@PROJECT_NAME@"
#define PACKAGE_STRING "@PROJECT_NAME@"
#define INSTALL_DATADIR "@INSTALL_DATADIR@"
#endif // CONFIG_H_
================================================
FILE: contrib/docker/Dockerfile
================================================
FROM alpine:3.23 AS build
RUN apk add --no-cache \
cmake \
make \
g++ \
pkgconf \
&& apk add --no-cache --repository=http://dl-cdn.alpinelinux.org/alpine/edge/testing/ \
gperftools-dev \
tcmalloc
WORKDIR /workspace
COPY . /workspace
RUN cd /workspace && \
mkdir build && \
cd build && \
cmake .. && \
make && \
make install
# ---
FROM alpine:3.23
ENV PATH="/usr/local/bin:${PATH}"
COPY --from=build /usr/local /usr/local
# Add runtime dependencies.
RUN apk add --no-cache --repository=http://dl-cdn.alpinelinux.org/alpine/edge/testing/ \
gperftools-dev tcmalloc
================================================
FILE: contrib/docker/README.md
================================================
# Dockerfile
This directory contains a convenient Dockerfile to build and run sentencepiece C++ command line tools in a container.
**IMPORTANT**: this Dockerfile is community-maintained on a *best effort* basis only.
The sentencepiece core team does not take responsibility for ongoing maintenance, security fixes, or guarantees of uptime/compatibility.
Use at your own risk.
## Quickstart
Build locally (under the repository root directory):
```bash
docker build -t sentencepiece -f contrib/docker/Dockerfile .
```
[Run](https://docs.docker.com/reference/cli/docker/container/run/) the command line tools:
```bash
docker run --rm sentencepiece spm_encode --help
```
================================================
FILE: data/Scripts.txt
================================================
# Scripts-9.0.0.txt
# Date: 2016-06-01, 10:34:37 GMT
# © 2016 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see http://www.unicode.org/reports/tr44/
# For more information, see:
# UAX #24, Unicode Script Property: http://www.unicode.org/reports/tr24/
# Especially the sections:
# http://www.unicode.org/reports/tr24/#Assignment_Script_Values
# http://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values
#
# ================================================
# Property: Script
# All code points not explicitly listed for Script
# have the value Unknown (Zzzz).
# @missing: 0000..10FFFF; Unknown
# ================================================
0000..001F ; Common # Cc [32] <control-0000>..<control-001F>
0020 ; Common # Zs SPACE
0021..0023 ; Common # Po [3] EXCLAMATION MARK..NUMBER SIGN
0024 ; Common # Sc DOLLAR SIGN
0025..0027 ; Common # Po [3] PERCENT SIGN..APOSTROPHE
0028 ; Common # Ps LEFT PARENTHESIS
0029 ; Common # Pe RIGHT PARENTHESIS
002A ; Common # Po ASTERISK
002B ; Common # Sm PLUS SIGN
002C ; Common # Po COMMA
002D ; Common # Pd HYPHEN-MINUS
002E..002F ; Common # Po [2] FULL STOP..SOLIDUS
0030..0039 ; Common # Nd [10] DIGIT ZERO..DIGIT NINE
003A..003B ; Common # Po [2] COLON..SEMICOLON
003C..003E ; Common # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN
003F..0040 ; Common # Po [2] QUESTION MARK..COMMERCIAL AT
005B ; Common # Ps LEFT SQUARE BRACKET
005C ; Common # Po REVERSE SOLIDUS
005D ; Common # Pe RIGHT SQUARE BRACKET
005E ; Common # Sk CIRCUMFLEX ACCENT
005F ; Common # Pc LOW LINE
0060 ; Common # Sk GRAVE ACCENT
007B ; Common # Ps LEFT CURLY BRACKET
007C ; Common # Sm VERTICAL LINE
007D ; Common # Pe RIGHT CURLY BRACKET
007E ; Common # Sm TILDE
007F..009F ; Common # Cc [33] <control-007F>..<control-009F>
00A0 ; Common # Zs NO-BREAK SPACE
00A1 ; Common # Po INVERTED EXCLAMATION MARK
00A2..00A5 ; Common # Sc [4] CENT SIGN..YEN SIGN
00A6 ; Common # So BROKEN BAR
00A7 ; Common # Po SECTION SIGN
00A8 ; Common # Sk DIAERESIS
00A9 ; Common # So COPYRIGHT SIGN
00AB ; Common # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
00AC ; Common # Sm NOT SIGN
00AD ; Common # Cf SOFT HYPHEN
00AE ; Common # So REGISTERED SIGN
00AF ; Common # Sk MACRON
00B0 ; Common # So DEGREE SIGN
00B1 ; Common # Sm PLUS-MINUS SIGN
00B2..00B3 ; Common # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE
00B4 ; Common # Sk ACUTE ACCENT
00B5 ; Common # L& MICRO SIGN
00B6..00B7 ; Common # Po [2] PILCROW SIGN..MIDDLE DOT
00B8 ; Common # Sk CEDILLA
00B9 ; Common # No SUPERSCRIPT ONE
00BB ; Common # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
00BC..00BE ; Common # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS
00BF ; Common # Po INVERTED QUESTION MARK
00D7 ; Common # Sm MULTIPLICATION SIGN
00F7 ; Common # Sm DIVISION SIGN
02B9..02C1 ; Common # Lm [9] MODIFIER LETTER PRIME..MODIFIER LETTER REVERSED GLOTTAL STOP
02C2..02C5 ; Common # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
02C6..02D1 ; Common # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
02D2..02DF ; Common # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT
02E5..02E9 ; Common # Sk [5] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER EXTRA-LOW TONE BAR
02EC ; Common # Lm MODIFIER LETTER VOICING
02ED ; Common # Sk MODIFIER LETTER UNASPIRATED
02EE ; Common # Lm MODIFIER LETTER DOUBLE APOSTROPHE
02EF..02FF ; Common # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW
0374 ; Common # Lm GREEK NUMERAL SIGN
037E ; Common # Po GREEK QUESTION MARK
0385 ; Common # Sk GREEK DIALYTIKA TONOS
0387 ; Common # Po GREEK ANO TELEIA
0589 ; Common # Po ARMENIAN FULL STOP
0605 ; Common # Cf ARABIC NUMBER MARK ABOVE
060C ; Common # Po ARABIC COMMA
061B ; Common # Po ARABIC SEMICOLON
061C ; Common # Cf ARABIC LETTER MARK
061F ; Common # Po ARABIC QUESTION MARK
0640 ; Common # Lm ARABIC TATWEEL
06DD ; Common # Cf ARABIC END OF AYAH
08E2 ; Common # Cf ARABIC DISPUTED END OF AYAH
0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT
0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS
10FB ; Common # Po GEORGIAN PARAGRAPH SEPARATOR
16EB..16ED ; Common # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION
1735..1736 ; Common # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
1802..1803 ; Common # Po [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP
1805 ; Common # Po MONGOLIAN FOUR DOTS
1CD3 ; Common # Po VEDIC SIGN NIHSHVASA
1CE1 ; Common # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
1CE9..1CEC ; Common # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
1CEE..1CF1 ; Common # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
1CF2..1CF3 ; Common # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA
1CF5..1CF6 ; Common # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA
2000..200A ; Common # Zs [11] EN QUAD..HAIR SPACE
200B ; Common # Cf ZERO WIDTH SPACE
200E..200F ; Common # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
2010..2015 ; Common # Pd [6] HYPHEN..HORIZONTAL BAR
2016..2017 ; Common # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE
2018 ; Common # Pi LEFT SINGLE QUOTATION MARK
2019 ; Common # Pf RIGHT SINGLE QUOTATION MARK
201A ; Common # Ps SINGLE LOW-9 QUOTATION MARK
201B..201C ; Common # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK
201D ; Common # Pf RIGHT DOUBLE QUOTATION MARK
201E ; Common # Ps DOUBLE LOW-9 QUOTATION MARK
201F ; Common # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK
2020..2027 ; Common # Po [8] DAGGER..HYPHENATION POINT
2028 ; Common # Zl LINE SEPARATOR
2029 ; Common # Zp PARAGRAPH SEPARATOR
202A..202E ; Common # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
202F ; Common # Zs NARROW NO-BREAK SPACE
2030..2038 ; Common # Po [9] PER MILLE SIGN..CARET
2039 ; Common # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK
203A ; Common # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
203B..203E ; Common # Po [4] REFERENCE MARK..OVERLINE
203F..2040 ; Common # Pc [2] UNDERTIE..CHARACTER TIE
2041..2043 ; Common # Po [3] CARET INSERTION POINT..HYPHEN BULLET
2044 ; Common # Sm FRACTION SLASH
2045 ; Common # Ps LEFT SQUARE BRACKET WITH QUILL
2046 ; Common # Pe RIGHT SQUARE BRACKET WITH QUILL
2047..2051 ; Common # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY
2052 ; Common # Sm COMMERCIAL MINUS SIGN
2053 ; Common # Po SWUNG DASH
2054 ; Common # Pc INVERTED UNDERTIE
2055..205E ; Common # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS
205F ; Common # Zs MEDIUM MATHEMATICAL SPACE
2060..2064 ; Common # Cf [5] WORD JOINER..INVISIBLE PLUS
2066..206F ; Common # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES
2070 ; Common # No SUPERSCRIPT ZERO
2074..2079 ; Common # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE
207A..207C ; Common # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN
207D ; Common # Ps SUPERSCRIPT LEFT PARENTHESIS
207E ; Common # Pe SUPERSCRIPT RIGHT PARENTHESIS
2080..2089 ; Common # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE
208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS
208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS
20A0..20BE ; Common # Sc [31] EURO-CURRENCY SIGN..LARI SIGN
2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT
2102 ; Common # L& DOUBLE-STRUCK CAPITAL C
2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA
2107 ; Common # L& EULER CONSTANT
2108..2109 ; Common # So [2] SCRUPLE..DEGREE FAHRENHEIT
210A..2113 ; Common # L& [10] SCRIPT SMALL G..SCRIPT SMALL L
2114 ; Common # So L B BAR SYMBOL
2115 ; Common # L& DOUBLE-STRUCK CAPITAL N
2116..2117 ; Common # So [2] NUMERO SIGN..SOUND RECORDING COPYRIGHT
2118 ; Common # Sm SCRIPT CAPITAL P
2119..211D ; Common # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
211E..2123 ; Common # So [6] PRESCRIPTION TAKE..VERSICLE
2124 ; Common # L& DOUBLE-STRUCK CAPITAL Z
2125 ; Common # So OUNCE SIGN
2127 ; Common # So INVERTED OHM SIGN
2128 ; Common # L& BLACK-LETTER CAPITAL Z
2129 ; Common # So TURNED GREEK SMALL LETTER IOTA
212C..212D ; Common # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C
212E ; Common # So ESTIMATED SYMBOL
212F..2131 ; Common # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F
2133..2134 ; Common # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O
2135..2138 ; Common # Lo [4] ALEF SYMBOL..DALET SYMBOL
2139 ; Common # L& INFORMATION SOURCE
213A..213B ; Common # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN
213C..213F ; Common # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI
2140..2144 ; Common # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y
2145..2149 ; Common # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J
214A ; Common # So PROPERTY LINE
214B ; Common # Sm TURNED AMPERSAND
214C..214D ; Common # So [2] PER SIGN..AKTIESELSKAB
214F ; Common # So SYMBOL FOR SAMARITAN SOURCE
2150..215F ; Common # No [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE
2189 ; Common # No VULGAR FRACTION ZERO THIRDS
218A..218B ; Common # So [2] TURNED DIGIT TWO..TURNED DIGIT THREE
2190..2194 ; Common # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW
2195..2199 ; Common # So [5] UP DOWN ARROW..SOUTH WEST ARROW
219A..219B ; Common # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE
219C..219F ; Common # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW
21A0 ; Common # Sm RIGHTWARDS TWO HEADED ARROW
21A1..21A2 ; Common # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL
21A3 ; Common # Sm RIGHTWARDS ARROW WITH TAIL
21A4..21A5 ; Common # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR
21A6 ; Common # Sm RIGHTWARDS ARROW FROM BAR
21A7..21AD ; Common # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW
21AE ; Common # Sm LEFT RIGHT ARROW WITH STROKE
21AF..21CD ; Common # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE
21CE..21CF ; Common # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE
21D0..21D1 ; Common # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW
21D2 ; Common # Sm RIGHTWARDS DOUBLE ARROW
21D3 ; Common # So DOWNWARDS DOUBLE ARROW
21D4 ; Common # Sm LEFT RIGHT DOUBLE ARROW
21D5..21F3 ; Common # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW
21F4..22FF ; Common # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP
2300..2307 ; Common # So [8] DIAMETER SIGN..WAVY LINE
2308 ; Common # Ps LEFT CEILING
2309 ; Common # Pe RIGHT CEILING
230A ; Common # Ps LEFT FLOOR
230B ; Common # Pe RIGHT FLOOR
230C..231F ; Common # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER
2320..2321 ; Common # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL
2322..2328 ; Common # So [7] FROWN..KEYBOARD
2329 ; Common # Ps LEFT-POINTING ANGLE BRACKET
232A ; Common # Pe RIGHT-POINTING ANGLE BRACKET
232B..237B ; Common # So [81] ERASE TO THE LEFT..NOT CHECK MARK
237C ; Common # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW
237D..239A ; Common # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL
239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE
23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
23E2..23FE ; Common # So [29] WHITE TRAPEZIUM..POWER SLEEP SYMBOL
2400..2426 ; Common # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
249C..24E9 ; Common # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
24EA..24FF ; Common # No [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO
2500..25B6 ; Common # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE
25B7 ; Common # Sm WHITE RIGHT-POINTING TRIANGLE
25B8..25C0 ; Common # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE
25C1 ; Common # Sm WHITE LEFT-POINTING TRIANGLE
25C2..25F7 ; Common # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT
25F8..25FF ; Common # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
2600..266E ; Common # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
266F ; Common # Sm MUSIC SHARP SIGN
2670..2767 ; Common # So [248] WEST SYRIAC CROSS..ROTATED FLORAL HEART BULLET
2768 ; Common # Ps MEDIUM LEFT PARENTHESIS ORNAMENT
2769 ; Common # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT
276A ; Common # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
276B ; Common # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT
276C ; Common # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT
276D ; Common # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT
276E ; Common # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
276F ; Common # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
2770 ; Common # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
2771 ; Common # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
2772 ; Common # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
2773 ; Common # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
2774 ; Common # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT
2775 ; Common # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT
2776..2793 ; Common # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
2794..27BF ; Common # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP
27C0..27C4 ; Common # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET
27C5 ; Common # Ps LEFT S-SHAPED BAG DELIMITER
27C6 ; Common # Pe RIGHT S-SHAPED BAG DELIMITER
27C7..27E5 ; Common # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK
27E6 ; Common # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET
27E7 ; Common # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET
27E8 ; Common # Ps MATHEMATICAL LEFT ANGLE BRACKET
27E9 ; Common # Pe MATHEMATICAL RIGHT ANGLE BRACKET
27EA ; Common # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET
27EB ; Common # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET
27EC ; Common # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET
27ED ; Common # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET
27EE ; Common # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS
27EF ; Common # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS
27F0..27FF ; Common # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW
2900..2982 ; Common # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON
2983 ; Common # Ps LEFT WHITE CURLY BRACKET
2984 ; Common # Pe RIGHT WHITE CURLY BRACKET
2985 ; Common # Ps LEFT WHITE PARENTHESIS
2986 ; Common # Pe RIGHT WHITE PARENTHESIS
2987 ; Common # Ps Z NOTATION LEFT IMAGE BRACKET
2988 ; Common # Pe Z NOTATION RIGHT IMAGE BRACKET
2989 ; Common # Ps Z NOTATION LEFT BINDING BRACKET
298A ; Common # Pe Z NOTATION RIGHT BINDING BRACKET
298B ; Common # Ps LEFT SQUARE BRACKET WITH UNDERBAR
298C ; Common # Pe RIGHT SQUARE BRACKET WITH UNDERBAR
298D ; Common # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
298E ; Common # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
298F ; Common # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
2990 ; Common # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER
2991 ; Common # Ps LEFT ANGLE BRACKET WITH DOT
2992 ; Common # Pe RIGHT ANGLE BRACKET WITH DOT
2993 ; Common # Ps LEFT ARC LESS-THAN BRACKET
2994 ; Common # Pe RIGHT ARC GREATER-THAN BRACKET
2995 ; Common # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET
2996 ; Common # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET
2997 ; Common # Ps LEFT BLACK TORTOISE SHELL BRACKET
2998 ; Common # Pe RIGHT BLACK TORTOISE SHELL BRACKET
2999..29D7 ; Common # Sm [63] DOTTED FENCE..BLACK HOURGLASS
29D8 ; Common # Ps LEFT WIGGLY FENCE
29D9 ; Common # Pe RIGHT WIGGLY FENCE
29DA ; Common # Ps LEFT DOUBLE WIGGLY FENCE
29DB ; Common # Pe RIGHT DOUBLE WIGGLY FENCE
29DC..29FB ; Common # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS
29FC ; Common # Ps LEFT-POINTING CURVED ANGLE BRACKET
29FD ; Common # Pe RIGHT-POINTING CURVED ANGLE BRACKET
29FE..2AFF ; Common # Sm [258] TINY..N-ARY WHITE VERTICAL BAR
2B00..2B2F ; Common # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE
2B30..2B44 ; Common # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET
2B45..2B46 ; Common # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW
2B47..2B4C ; Common # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
2B4D..2B73 ; Common # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
2B76..2B95 ; Common # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
2B98..2BB9 ; Common # So [34] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..UP ARROWHEAD IN A RECTANGLE BOX
2BBD..2BC8 ; Common # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED
2BCA..2BD1 ; Common # So [8] TOP HALF BLACK CIRCLE..UNCERTAINTY SIGN
2BEC..2BEF ; Common # So [4] LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS..DOWNWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS
2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET
2E03 ; Common # Pf RIGHT SUBSTITUTION BRACKET
2E04 ; Common # Pi LEFT DOTTED SUBSTITUTION BRACKET
2E05 ; Common # Pf RIGHT DOTTED SUBSTITUTION BRACKET
2E06..2E08 ; Common # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER
2E09 ; Common # Pi LEFT TRANSPOSITION BRACKET
2E0A ; Common # Pf RIGHT TRANSPOSITION BRACKET
2E0B ; Common # Po RAISED SQUARE
2E0C ; Common # Pi LEFT RAISED OMISSION BRACKET
2E0D ; Common # Pf RIGHT RAISED OMISSION BRACKET
2E0E..2E16 ; Common # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE
2E17 ; Common # Pd DOUBLE OBLIQUE HYPHEN
2E18..2E19 ; Common # Po [2] INVERTED INTERROBANG..PALM BRANCH
2E1A ; Common # Pd HYPHEN WITH DIAERESIS
2E1B ; Common # Po TILDE WITH RING ABOVE
2E1C ; Common # Pi LEFT LOW PARAPHRASE BRACKET
2E1D ; Common # Pf RIGHT LOW PARAPHRASE BRACKET
2E1E..2E1F ; Common # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW
2E20 ; Common # Pi LEFT VERTICAL BAR WITH QUILL
2E21 ; Common # Pf RIGHT VERTICAL BAR WITH QUILL
2E22 ; Common # Ps TOP LEFT HALF BRACKET
2E23 ; Common # Pe TOP RIGHT HALF BRACKET
2E24 ; Common # Ps BOTTOM LEFT HALF BRACKET
2E25 ; Common # Pe BOTTOM RIGHT HALF BRACKET
2E26 ; Common # Ps LEFT SIDEWAYS U BRACKET
2E27 ; Common # Pe RIGHT SIDEWAYS U BRACKET
2E28 ; Common # Ps LEFT DOUBLE PARENTHESIS
2E29 ; Common # Pe RIGHT DOUBLE PARENTHESIS
2E2A..2E2E ; Common # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK
2E2F ; Common # Lm VERTICAL TILDE
2E30..2E39 ; Common # Po [10] RING POINT..TOP HALF SECTION SIGN
2E3A..2E3B ; Common # Pd [2] TWO-EM DASH..THREE-EM DASH
2E3C..2E3F ; Common # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM
2E40 ; Common # Pd DOUBLE HYPHEN
2E41 ; Common # Po REVERSED COMMA
2E42 ; Common # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
2E43..2E44 ; Common # Po [2] DASH WITH LEFT UPTURN..DOUBLE SUSPENSION MARK
2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
3000 ; Common # Zs IDEOGRAPHIC SPACE
3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
3004 ; Common # So JAPANESE INDUSTRIAL STANDARD SYMBOL
3006 ; Common # Lo IDEOGRAPHIC CLOSING MARK
3008 ; Common # Ps LEFT ANGLE BRACKET
3009 ; Common # Pe RIGHT ANGLE BRACKET
300A ; Common # Ps LEFT DOUBLE ANGLE BRACKET
300B ; Common # Pe RIGHT DOUBLE ANGLE BRACKET
300C ; Common # Ps LEFT CORNER BRACKET
300D ; Common # Pe RIGHT CORNER BRACKET
300E ; Common # Ps LEFT WHITE CORNER BRACKET
300F ; Common # Pe RIGHT WHITE CORNER BRACKET
3010 ; Common # Ps LEFT BLACK LENTICULAR BRACKET
3011 ; Common # Pe RIGHT BLACK LENTICULAR BRACKET
3012..3013 ; Common # So [2] POSTAL MARK..GETA MARK
3014 ; Common # Ps LEFT TORTOISE SHELL BRACKET
3015 ; Common # Pe RIGHT TORTOISE SHELL BRACKET
3016 ; Common # Ps LEFT WHITE LENTICULAR BRACKET
3017 ; Common # Pe RIGHT WHITE LENTICULAR BRACKET
3018 ; Common # Ps LEFT WHITE TORTOISE SHELL BRACKET
3019 ; Common # Pe RIGHT WHITE TORTOISE SHELL BRACKET
301A ; Common # Ps LEFT WHITE SQUARE BRACKET
301B ; Common # Pe RIGHT WHITE SQUARE BRACKET
301C ; Common # Pd WAVE DASH
301D ; Common # Ps REVERSED DOUBLE PRIME QUOTATION MARK
301E..301F ; Common # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK
3020 ; Common # So POSTAL MARK FACE
3030 ; Common # Pd WAVY DASH
3031..3035 ; Common # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
3036..3037 ; Common # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL
303C ; Common # Lo MASU MARK
303D ; Common # Po PART ALTERNATION MARK
303E..303F ; Common # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE
309B..309C ; Common # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
30A0 ; Common # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN
30FB ; Common # Po KATAKANA MIDDLE DOT
30FC ; Common # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK
3190..3191 ; Common # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK
3192..3195 ; Common # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
3196..319F ; Common # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
31C0..31E3 ; Common # So [36] CJK STROKE T..CJK STROKE Q
3220..3229 ; Common # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
322A..3247 ; Common # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO
3248..324F ; Common # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE
3250 ; Common # So PARTNERSHIP SIGN
3251..325F ; Common # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE
327F ; Common # So KOREAN STANDARD SYMBOL
3280..3289 ; Common # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
328A..32B0 ; Common # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT
32B1..32BF ; Common # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY
32C0..32CF ; Common # So [16] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..LIMITED LIABILITY SIGN
3358..33FF ; Common # So [168] IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO..SQUARE GAL
4DC0..4DFF ; Common # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION
A700..A716 ; Common # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
A717..A71F ; Common # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
A720..A721 ; Common # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE
A788 ; Common # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT
A789..A78A ; Common # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN
A830..A835 ; Common # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS
A836..A837 ; Common # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK
A838 ; Common # Sc NORTH INDIC RUPEE MARK
A839 ; Common # So NORTH INDIC QUANTITY MARK
A92E ; Common # Po KAYAH LI SIGN CWI
A9CF ; Common # Lm JAVANESE PANGRANGKEP
AB5B ; Common # Sk MODIFIER BREVE WITH INVERTED BREVE
FD3E ; Common # Pe ORNATE LEFT PARENTHESIS
FD3F ; Common # Ps ORNATE RIGHT PARENTHESIS
FE10..FE16 ; Common # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK
FE17 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET
FE18 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET
FE19 ; Common # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
FE30 ; Common # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
FE31..FE32 ; Common # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH
FE33..FE34 ; Common # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
FE35 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
FE36 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS
FE37 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET
FE38 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET
FE39 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET
FE3A ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET
FE3B ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET
FE3C ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET
FE3D ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET
FE3E ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET
FE3F ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET
FE40 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET
FE41 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET
FE42 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET
FE43 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET
FE44 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET
FE45..FE46 ; Common # Po [2] SESAME DOT..WHITE SESAME DOT
FE47 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET
FE48 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET
FE49..FE4C ; Common # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE
FE4D..FE4F ; Common # Pc [3] DASHED LOW LINE..WAVY LOW LINE
FE50..FE52 ; Common # Po [3] SMALL COMMA..SMALL FULL STOP
FE54..FE57 ; Common # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK
FE58 ; Common # Pd SMALL EM DASH
FE59 ; Common # Ps SMALL LEFT PARENTHESIS
FE5A ; Common # Pe SMALL RIGHT PARENTHESIS
FE5B ; Common # Ps SMALL LEFT CURLY BRACKET
FE5C ; Common # Pe SMALL RIGHT CURLY BRACKET
FE5D ; Common # Ps SMALL LEFT TORTOISE SHELL BRACKET
FE5E ; Common # Pe SMALL RIGHT TORTOISE SHELL BRACKET
FE5F..FE61 ; Common # Po [3] SMALL NUMBER SIGN..SMALL ASTERISK
FE62 ; Common # Sm SMALL PLUS SIGN
FE63 ; Common # Pd SMALL HYPHEN-MINUS
FE64..FE66 ; Common # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN
FE68 ; Common # Po SMALL REVERSE SOLIDUS
FE69 ; Common # Sc SMALL DOLLAR SIGN
FE6A..FE6B ; Common # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT
FEFF ; Common # Cf ZERO WIDTH NO-BREAK SPACE
FF01..FF03 ; Common # Po [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN
FF04 ; Common # Sc FULLWIDTH DOLLAR SIGN
FF05..FF07 ; Common # Po [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE
FF08 ; Common # Ps FULLWIDTH LEFT PARENTHESIS
FF09 ; Common # Pe FULLWIDTH RIGHT PARENTHESIS
FF0A ; Common # Po FULLWIDTH ASTERISK
FF0B ; Common # Sm FULLWIDTH PLUS SIGN
FF0C ; Common # Po FULLWIDTH COMMA
FF0D ; Common # Pd FULLWIDTH HYPHEN-MINUS
FF0E..FF0F ; Common # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS
FF10..FF19 ; Common # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
FF1A..FF1B ; Common # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON
FF1C..FF1E ; Common # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN
FF1F..FF20 ; Common # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT
FF3B ; Common # Ps FULLWIDTH LEFT SQUARE BRACKET
FF3C ; Common # Po FULLWIDTH REVERSE SOLIDUS
FF3D ; Common # Pe FULLWIDTH RIGHT SQUARE BRACKET
FF3E ; Common # Sk FULLWIDTH CIRCUMFLEX ACCENT
FF3F ; Common # Pc FULLWIDTH LOW LINE
FF40 ; Common # Sk FULLWIDTH GRAVE ACCENT
FF5B ; Common # Ps FULLWIDTH LEFT CURLY BRACKET
FF5C ; Common # Sm FULLWIDTH VERTICAL LINE
FF5D ; Common # Pe FULLWIDTH RIGHT CURLY BRACKET
FF5E ; Common # Sm FULLWIDTH TILDE
FF5F ; Common # Ps FULLWIDTH LEFT WHITE PARENTHESIS
FF60 ; Common # Pe FULLWIDTH RIGHT WHITE PARENTHESIS
FF61 ; Common # Po HALFWIDTH IDEOGRAPHIC FULL STOP
FF62 ; Common # Ps HALFWIDTH LEFT CORNER BRACKET
FF63 ; Common # Pe HALFWIDTH RIGHT CORNER BRACKET
FF64..FF65 ; Common # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT
FF70 ; Common # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
FF9E..FF9F ; Common # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
FFE0..FFE1 ; Common # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN
FFE2 ; Common # Sm FULLWIDTH NOT SIGN
FFE3 ; Common # Sk FULLWIDTH MACRON
FFE4 ; Common # So FULLWIDTH BROKEN BAR
FFE5..FFE6 ; Common # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
FFE8 ; Common # So HALFWIDTH FORMS LIGHT VERTICAL
FFE9..FFEC ; Common # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW
FFED..FFEE ; Common # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
FFF9..FFFB ; Common # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
10100..10102 ; Common # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK
10107..10133 ; Common # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
10137..1013F ; Common # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT
10190..1019B ; Common # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
101D0..101FC ; Common # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
102E1..102FB ; Common # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED
1BCA0..1BCA3 ; Common # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
1D000..1D0F5 ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
1D100..1D126 ; Common # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
1D129..1D164 ; Common # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
1D165..1D166 ; Common # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D16A..1D16C ; Common # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3
1D16D..1D172 ; Common # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
1D173..1D17A ; Common # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
1D183..1D184 ; Common # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN
1D18C..1D1A9 ; Common # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH
1D1AE..1D1E8 ; Common # So [59] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KIEVAN FLAT SIGN
1D300..1D356 ; Common # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
1D360..1D371 ; Common # No [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE
1D400..1D454 ; Common # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
1D456..1D49C ; Common # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
1D49E..1D49F ; Common # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
1D4A2 ; Common # L& MATHEMATICAL SCRIPT CAPITAL G
1D4A5..1D4A6 ; Common # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
1D4A9..1D4AC ; Common # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
1D4AE..1D4B9 ; Common # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D
1D4BB ; Common # L& MATHEMATICAL SCRIPT SMALL F
1D4BD..1D4C3 ; Common # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
1D4C5..1D505 ; Common # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B
1D507..1D50A ; Common # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
1D50D..1D514 ; Common # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
1D516..1D51C ; Common # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
1D51E..1D539 ; Common # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
1D53B..1D53E ; Common # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
1D540..1D544 ; Common # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
1D546 ; Common # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O
1D54A..1D550 ; Common # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
1D552..1D6A5 ; Common # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
1D6A8..1D6C0 ; Common # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA
1D6C1 ; Common # Sm MATHEMATICAL BOLD NABLA
1D6C2..1D6DA ; Common # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA
1D6DB ; Common # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
1D6DC..1D6FA ; Common # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA
1D6FB ; Common # Sm MATHEMATICAL ITALIC NABLA
1D6FC..1D714 ; Common # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA
1D715 ; Common # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
1D716..1D734 ; Common # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
1D735 ; Common # Sm MATHEMATICAL BOLD ITALIC NABLA
1D736..1D74E ; Common # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA
1D74F ; Common # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
1D750..1D76E ; Common # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
1D76F ; Common # Sm MATHEMATICAL SANS-SERIF BOLD NABLA
1D770..1D788 ; Common # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
1D789 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
1D78A..1D7A8 ; Common # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
1D7A9 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA
1D7AA..1D7C2 ; Common # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
1D7C3 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
1D7C4..1D7CB ; Common # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
1D7CE..1D7FF ; Common # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
1F000..1F02B ; Common # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK
1F030..1F093 ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
1F0A0..1F0AE ; Common # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES
1F0B1..1F0BF ; Common # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER
1F0C1..1F0CF ; Common # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER
1F0D1..1F0F5 ; Common # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21
1F100..1F10C ; Common # No [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
1F130..1F16B ; Common # So [60] SQUARED LATIN CAPITAL LETTER A..RAISED MD SIGN
1F170..1F1AC ; Common # So [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD
1F1E6..1F1FF ; Common # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z
1F201..1F202 ; Common # So [2] SQUARED KATAKANA KOKO..SQUARED KATAKANA SA
1F210..1F23B ; Common # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
1F250..1F251 ; Common # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
1F300..1F3FA ; Common # So [251] CYCLONE..AMPHORA
1F3FB..1F3FF ; Common # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6
1F400..1F6D2 ; Common # So [723] RAT..SHOPPING TROLLEY
1F6E0..1F6EC ; Common # So [13] HAMMER AND WRENCH..AIRPLANE ARRIVING
1F6F0..1F6F6 ; Common # So [7] SATELLITE..CANOE
1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
1F780..1F7D4 ; Common # So [85] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR
1F800..1F80B ; Common # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
1F810..1F847 ; Common # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW
1F850..1F859 ; Common # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
1F860..1F887 ; Common # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
1F890..1F8AD ; Common # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
1F910..1F91E ; Common # So [15] ZIPPER-MOUTH FACE..HAND WITH INDEX AND MIDDLE FINGERS CROSSED
1F920..1F927 ; Common # So [8] FACE WITH COWBOY HAT..SNEEZING FACE
1F930 ; Common # So PREGNANT WOMAN
1F933..1F93E ; Common # So [12] SELFIE..HANDBALL
1F940..1F94B ; Common # So [12] WILTED FLOWER..MARTIAL ARTS UNIFORM
1F950..1F95E ; Common # So [15] CROISSANT..PANCAKES
1F980..1F991 ; Common # So [18] CRAB..SQUID
1F9C0 ; Common # So CHEESE WEDGE
E0001 ; Common # Cf LANGUAGE TAG
E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 7279
# ================================================
0041..005A ; Latin # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z
0061..007A ; Latin # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z
00AA ; Latin # Lo FEMININE ORDINAL INDICATOR
00BA ; Latin # Lo MASCULINE ORDINAL INDICATOR
00C0..00D6 ; Latin # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS
00D8..00F6 ; Latin # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS
00F8..01BA ; Latin # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL
01BB ; Latin # Lo LATIN LETTER TWO WITH STROKE
01BC..01BF ; Latin # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN
01C0..01C3 ; Latin # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK
01C4..0293 ; Latin # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL
0294 ; Latin # Lo LATIN LETTER GLOTTAL STOP
0295..02AF ; Latin # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
02B0..02B8 ; Latin # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
02E0..02E4 ; Latin # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
1D00..1D25 ; Latin # L& [38] LATIN LETTER SMALL CAPITAL A..LATIN LETTER AIN
1D2C..1D5C ; Latin # Lm [49] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL AIN
1D62..1D65 ; Latin # Lm [4] LATIN SUBSCRIPT SMALL LETTER I..LATIN SUBSCRIPT SMALL LETTER V
1D6B..1D77 ; Latin # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G
1D79..1D9A ; Latin # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
1D9B..1DBE ; Latin # Lm [36] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL EZH
1E00..1EFF ; Latin # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP
2071 ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER I
207F ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER N
2090..209C ; Latin # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T
212A..212B ; Latin # L& [2] KELVIN SIGN..ANGSTROM SIGN
2132 ; Latin # L& TURNED CAPITAL F
214E ; Latin # L& TURNED SMALL F
2160..2182 ; Latin # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND
2183..2184 ; Latin # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C
2185..2188 ; Latin # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND
2C60..2C7B ; Latin # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E
2C7C..2C7D ; Latin # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V
2C7E..2C7F ; Latin # L& [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL
A722..A76F ; Latin # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
A770 ; Latin # Lm MODIFIER LETTER US
A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T
A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT
A790..A7AE ; Latin # L& [31] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN CAPITAL LETTER SMALL CAPITAL I
A7B0..A7B7 ; Latin # L& [8] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER OMEGA
A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
A7FA ; Latin # L& LATIN LETTER SMALL CAPITAL TURNED M
A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M
AB30..AB5A ; Latin # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
AB5C..AB5F ; Latin # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
AB60..AB64 ; Latin # L& [5] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER INVERTED ALPHA
FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
# Total code points: 1350
# ================================================
0370..0373 ; Greek # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI
0375 ; Greek # Sk GREEK LOWER NUMERAL SIGN
0376..0377 ; Greek # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
037A ; Greek # Lm GREEK YPOGEGRAMMENI
037B..037D ; Greek # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
037F ; Greek # L& GREEK CAPITAL LETTER YOT
0384 ; Greek # Sk GREEK TONOS
0386 ; Greek # L& GREEK CAPITAL LETTER ALPHA WITH TONOS
0388..038A ; Greek # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
038C ; Greek # L& GREEK CAPITAL LETTER OMICRON WITH TONOS
038E..03A1 ; Greek # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
03A3..03E1 ; Greek # L& [63] GREEK CAPITAL LETTER SIGMA..GREEK SMALL LETTER SAMPI
03F0..03F5 ; Greek # L& [6] GREEK KAPPA SYMBOL..GREEK LUNATE EPSILON SYMBOL
03F6 ; Greek # Sm GREEK REVERSED LUNATE EPSILON SYMBOL
03F7..03FF ; Greek # L& [9] GREEK CAPITAL LETTER SHO..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
1D26..1D2A ; Greek # L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI
1D5D..1D61 ; Greek # Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI
1D66..1D6A ; Greek # Lm [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI
1DBF ; Greek # Lm MODIFIER LETTER SMALL THETA
1F00..1F15 ; Greek # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
1F18..1F1D ; Greek # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
1F20..1F45 ; Greek # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
1F48..1F4D ; Greek # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
1F50..1F57 ; Greek # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
1F59 ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA
1F5B ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
1F5D ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
1F5F..1F7D ; Greek # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA
1F80..1FB4 ; Greek # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
1FB6..1FBC ; Greek # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
1FBD ; Greek # Sk GREEK KORONIS
1FBE ; Greek # L& GREEK PROSGEGRAMMENI
1FBF..1FC1 ; Greek # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
1FC2..1FC4 ; Greek # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
1FC6..1FCC ; Greek # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
1FCD..1FCF ; Greek # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
1FD0..1FD3 ; Greek # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
1FD6..1FDB ; Greek # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA
1FDD..1FDF ; Greek # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI
1FE0..1FEC ; Greek # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
1FED..1FEF ; Greek # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA
1FF2..1FF4 ; Greek # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FF6..1FFC ; Greek # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
1FFD..1FFE ; Greek # Sk [2] GREEK OXIA..GREEK DASIA
2126 ; Greek # L& OHM SIGN
AB65 ; Greek # L& GREEK LETTER SMALL CAPITAL OMEGA
10140..10174 ; Greek # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS
10175..10178 ; Greek # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN
10179..10189 ; Greek # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN
1018A..1018B ; Greek # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN
1018C..1018E ; Greek # So [3] GREEK SINUSOID SIGN..NOMISMA SIGN
101A0 ; Greek # So GREEK SYMBOL TAU RHO
1D200..1D241 ; Greek # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
1D242..1D244 ; Greek # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
1D245 ; Greek # So GREEK MUSICAL LEIMMA
# Total code points: 518
# ================================================
0400..0481 ; Cyrillic # L& [130] CYRILLIC CAPITAL LETTER IE WITH GRAVE..CYRILLIC SMALL LETTER KOPPA
0482 ; Cyrillic # So CYRILLIC THOUSANDS SIGN
0483..0484 ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PALATALIZATION
0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE
0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
048A..052F ; Cyrillic # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER
1C80..1C88 ; Cyrillic # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL
1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN
2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
A640..A66D ; Cyrillic # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
A66E ; Cyrillic # Lo CYRILLIC LETTER MULTIOCULAR O
A66F ; Cyrillic # Mn COMBINING CYRILLIC VZMET
A670..A672 ; Cyrillic # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
A673 ; Cyrillic # Po SLAVONIC ASTERISK
A674..A67D ; Cyrillic # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK
A67E ; Cyrillic # Po CYRILLIC KAVYKA
A67F ; Cyrillic # Lm CYRILLIC PAYEROK
A680..A69B ; Cyrillic # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O
A69C..A69D ; Cyrillic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN
A69E..A69F ; Cyrillic # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E
FE2E..FE2F ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF
# Total code points: 443
# ================================================
0531..0556 ; Armenian # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
0559 ; Armenian # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
055A..055F ; Armenian # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
0561..0587 ; Armenian # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
058A ; Armenian # Pd ARMENIAN HYPHEN
058D..058E ; Armenian # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN
058F ; Armenian # Sc ARMENIAN DRAM SIGN
FB13..FB17 ; Armenian # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
# Total code points: 93
# ================================================
0591..05BD ; Hebrew # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
05BE ; Hebrew # Pd HEBREW PUNCTUATION MAQAF
05BF ; Hebrew # Mn HEBREW POINT RAFE
05C0 ; Hebrew # Po HEBREW PUNCTUATION PASEQ
05C1..05C2 ; Hebrew # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
05C3 ; Hebrew # Po HEBREW PUNCTUATION SOF PASUQ
05C4..05C5 ; Hebrew # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
05C6 ; Hebrew # Po HEBREW PUNCTUATION NUN HAFUKHA
05C7 ; Hebrew # Mn HEBREW POINT QAMATS QATAN
05D0..05EA ; Hebrew # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV
05F0..05F2 ; Hebrew # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
05F3..05F4 ; Hebrew # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM
FB1D ; Hebrew # Lo HEBREW LETTER YOD WITH HIRIQ
FB1E ; Hebrew # Mn HEBREW POINT JUDEO-SPANISH VARIKA
FB1F..FB28 ; Hebrew # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
FB29 ; Hebrew # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN
FB2A..FB36 ; Hebrew # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
FB38..FB3C ; Hebrew # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
FB3E ; Hebrew # Lo HEBREW LETTER MEM WITH DAGESH
FB40..FB41 ; Hebrew # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
FB43..FB44 ; Hebrew # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED
# Total code points: 133
# ================================================
0600..0604 ; Arabic # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT
0606..0608 ; Arabic # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY
0609..060A ; Arabic # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN
060B ; Arabic # Sc AFGHANI SIGN
060D ; Arabic # Po ARABIC DATE SEPARATOR
060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA
0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
061E ; Arabic # Po ARABIC TRIPLE DOT PUNCTUATION MARK
0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH
0656..065F ; Arabic # Mn [10] ARABIC SUBSCRIPT ALEF..ARABIC WAVY HAMZA BELOW
0660..0669 ; Arabic # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
066A..066D ; Arabic # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
066E..066F ; Arabic # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF
0671..06D3 ; Arabic # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
06D4 ; Arabic # Po ARABIC FULL STOP
06D5 ; Arabic # Lo ARABIC LETTER AE
06D6..06DC ; Arabic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
06DE ; Arabic # So ARABIC START OF RUB EL HIZB
06DF..06E4 ; Arabic # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA
06E5..06E6 ; Arabic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH
06E7..06E8 ; Arabic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
06E9 ; Arabic # So ARABIC PLACE OF SAJDAH
06EA..06ED ; Arabic # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM
06EE..06EF ; Arabic # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V
06F0..06F9 ; Arabic # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
06FA..06FC ; Arabic # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW
06FD..06FE ; Arabic # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN
06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V
0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH T
gitextract_h_6popb3/
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.md
│ │ └── feature_request.md
│ ├── dependabot.yml
│ ├── pull_request_template.md
│ └── workflows/
│ ├── cifuzz.yml
│ ├── cmake.yml
│ ├── cross_build.yml
│ ├── requirements/
│ │ ├── base.in
│ │ ├── base.txt
│ │ ├── cibuildwheel.in
│ │ └── cibuildwheel.txt
│ └── wheel.yml
├── .gitignore
├── CMakeLists.txt
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── VERSION.txt
├── cmake/
│ └── ios.toolchain.cmake
├── config.h.in
├── contrib/
│ └── docker/
│ ├── Dockerfile
│ └── README.md
├── data/
│ ├── Scripts.txt
│ ├── botchan.txt
│ ├── extract_headers.pl
│ ├── gen_spec_parser.pl
│ ├── gen_unicode_scripts_code.pl
│ ├── ids_denorm.tsv
│ ├── ids_norm.tsv
│ ├── nfc.tsv
│ ├── nfc_cf.tsv
│ ├── nfd.tsv
│ ├── nfd_cf.tsv
│ ├── nfkc.tsv
│ ├── nfkc_cf.tsv
│ ├── nfkd.tsv
│ ├── nfkd_cf.tsv
│ ├── nmt_nfkc.tsv
│ ├── nmt_nfkc_cf.tsv
│ └── wagahaiwa_nekodearu.txt
├── doc/
│ ├── api.md
│ ├── experiments.md
│ ├── normalization.md
│ ├── options.md
│ └── special_symbols.md
├── python/
│ ├── .gitignore
│ ├── MANIFEST.in
│ ├── README.md
│ ├── add_new_vocab.ipynb
│ ├── build_bundled.sh
│ ├── build_sdist.sh
│ ├── pyproject.toml
│ ├── sentencepiece_python_module_example.ipynb
│ ├── setup.cfg
│ ├── setup.py
│ ├── src/
│ │ └── sentencepiece/
│ │ ├── __init__.py
│ │ ├── _version.py
│ │ ├── sentencepiece.i
│ │ ├── sentencepiece_model_pb2.py
│ │ ├── sentencepiece_pb2.py
│ │ └── sentencepiece_wrap.cxx
│ └── test/
│ ├── __init__.py
│ ├── botchan.txt
│ ├── sentencepiece_test.py
│ ├── test_ja_model.model
│ └── test_model.model
├── sentencepiece.pc.in
├── src/
│ ├── CMakeLists.txt
│ ├── bpe_model.cc
│ ├── bpe_model.h
│ ├── bpe_model_test.cc
│ ├── bpe_model_trainer.cc
│ ├── bpe_model_trainer.h
│ ├── bpe_model_trainer_test.cc
│ ├── builder.cc
│ ├── builder.h
│ ├── builder_test.cc
│ ├── builtin_pb/
│ │ ├── sentencepiece.pb.cc
│ │ ├── sentencepiece.pb.h
│ │ ├── sentencepiece_model.pb.cc
│ │ └── sentencepiece_model.pb.h
│ ├── char_model.cc
│ ├── char_model.h
│ ├── char_model_test.cc
│ ├── char_model_trainer.cc
│ ├── char_model_trainer.h
│ ├── char_model_trainer_test.cc
│ ├── common.h
│ ├── compile_charsmap_main.cc
│ ├── error.cc
│ ├── filesystem.cc
│ ├── filesystem.h
│ ├── filesystem_test.cc
│ ├── freelist.h
│ ├── freelist_test.cc
│ ├── init.cc
│ ├── init.h
│ ├── init_test.cc
│ ├── model_factory.cc
│ ├── model_factory.h
│ ├── model_factory_test.cc
│ ├── model_interface.cc
│ ├── model_interface.h
│ ├── model_interface_test.cc
│ ├── normalization_rule.h
│ ├── normalizer.cc
│ ├── normalizer.h
│ ├── normalizer_test.cc
│ ├── pretokenizer_for_training.cc
│ ├── pretokenizer_for_training.h
│ ├── pretokenizer_for_training_test.cc
│ ├── sentencepiece.proto
│ ├── sentencepiece_model.proto
│ ├── sentencepiece_processor.cc
│ ├── sentencepiece_processor.h
│ ├── sentencepiece_processor_test.cc
│ ├── sentencepiece_trainer.cc
│ ├── sentencepiece_trainer.h
│ ├── sentencepiece_trainer_test.cc
│ ├── spec_parser.h
│ ├── spm_decode_main.cc
│ ├── spm_encode_main.cc
│ ├── spm_export_vocab_main.cc
│ ├── spm_normalize_main.cc
│ ├── spm_train_main.cc
│ ├── test_main.cc
│ ├── testharness.cc
│ ├── testharness.h
│ ├── trainer_factory.cc
│ ├── trainer_factory.h
│ ├── trainer_factory_test.cc
│ ├── trainer_interface.cc
│ ├── trainer_interface.h
│ ├── trainer_interface_test.cc
│ ├── unicode_script.cc
│ ├── unicode_script.h
│ ├── unicode_script_map.h
│ ├── unicode_script_test.cc
│ ├── unigram_model.cc
│ ├── unigram_model.h
│ ├── unigram_model_test.cc
│ ├── unigram_model_trainer.cc
│ ├── unigram_model_trainer.h
│ ├── unigram_model_trainer_test.cc
│ ├── util.cc
│ ├── util.h
│ ├── util_test.cc
│ ├── word_model.cc
│ ├── word_model.h
│ ├── word_model_test.cc
│ ├── word_model_trainer.cc
│ ├── word_model_trainer.h
│ └── word_model_trainer_test.cc
└── third_party/
├── CMakeLists.txt
├── absl/
│ ├── LICENSE
│ ├── container/
│ │ ├── btree_set.h
│ │ ├── flat_hash_map.h
│ │ └── flat_hash_set.h
│ ├── flags/
│ │ ├── flag.cc
│ │ ├── flag.h
│ │ ├── parse.h
│ │ ├── usage.h
│ │ └── usage_config.h
│ ├── log/
│ │ ├── check.h
│ │ ├── globals.h
│ │ ├── log.cc
│ │ └── log.h
│ └── strings/
│ ├── ascii.h
│ ├── match.h
│ ├── numbers.h
│ ├── str_cat.h
│ ├── str_format.h
│ ├── str_join.h
│ ├── str_replace.h
│ ├── str_split.h
│ ├── string_view.h
│ └── strip.h
├── darts_clone/
│ ├── LICENSE
│ └── darts.h
├── esaxx/
│ ├── LICENSE
│ ├── esa.hxx
│ └── sais.hxx
└── protobuf-lite/
├── LICENSE
├── arena.cc
├── arenastring.cc
├── bytestream.cc
├── coded_stream.cc
├── common.cc
├── extension_set.cc
├── generated_enum_util.cc
├── generated_message_table_driven_lite.cc
├── generated_message_util.cc
├── google/
│ └── protobuf/
│ ├── any.h
│ ├── arena.h
│ ├── arena_impl.h
│ ├── arenastring.h
│ ├── descriptor.h
│ ├── extension_set.h
│ ├── extension_set_inl.h
│ ├── generated_enum_reflection.h
│ ├── generated_enum_util.h
│ ├── generated_message_table_driven.h
│ ├── generated_message_table_driven_lite.h
│ ├── generated_message_util.h
│ ├── has_bits.h
│ ├── implicit_weak_message.h
│ ├── io/
│ │ ├── coded_stream.h
│ │ ├── io_win32.h
│ │ ├── zero_copy_stream.h
│ │ ├── zero_copy_stream_impl.h
│ │ └── zero_copy_stream_impl_lite.h
│ ├── map.h
│ ├── map_entry_lite.h
│ ├── map_field_lite.h
│ ├── map_type_handler.h
│ ├── message_lite.h
│ ├── metadata_lite.h
│ ├── parse_context.h
│ ├── port.h
│ ├── port_def.inc
│ ├── port_undef.inc
│ ├── repeated_field.h
│ ├── stubs/
│ │ ├── bytestream.h
│ │ ├── callback.h
│ │ ├── casts.h
│ │ ├── common.h
│ │ ├── hash.h
│ │ ├── int128.h
│ │ ├── logging.h
│ │ ├── macros.h
│ │ ├── map_util.h
│ │ ├── mutex.h
│ │ ├── once.h
│ │ ├── platform_macros.h
│ │ ├── port.h
│ │ ├── status.h
│ │ ├── statusor.h
│ │ ├── stl_util.h
│ │ ├── stringpiece.h
│ │ ├── stringprintf.h
│ │ ├── strutil.h
│ │ └── time.h
│ ├── unknown_field_set.h
│ └── wire_format_lite.h
├── implicit_weak_message.cc
├── int128.cc
├── io_win32.cc
├── message_lite.cc
├── parse_context.cc
├── repeated_field.cc
├── status.cc
├── statusor.cc
├── stringpiece.cc
├── stringprintf.cc
├── structurally_valid.cc
├── strutil.cc
├── time.cc
├── wire_format_lite.cc
├── zero_copy_stream.cc
├── zero_copy_stream_impl.cc
└── zero_copy_stream_impl_lite.cc
Showing preview only (265K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (2706 symbols across 183 files)
FILE: python/setup.py
function long_description (line 31) | def long_description():
function run_pkg_config (line 42) | def run_pkg_config(section, pkg_config_path=None):
function is_sentencepiece_installed (line 56) | def is_sentencepiece_installed():
function is_gil_disabled (line 64) | def is_gil_disabled():
function get_cflags_and_libs (line 68) | def get_cflags_and_libs(root):
class build_ext (line 84) | class build_ext(_build_ext):
method build_extension (line 87) | def build_extension(self, ext):
function copy_package_data (line 123) | def copy_package_data():
function get_win_arch (line 155) | def get_win_arch():
FILE: python/src/sentencepiece/__init__.py
function _swig_repr (line 19) | def _swig_repr(self):
function _swig_setattr_nondynamic_instance_variable (line 27) | def _swig_setattr_nondynamic_instance_variable(set):
function _swig_setattr_nondynamic_class_variable (line 40) | def _swig_setattr_nondynamic_class_variable(set):
function _swig_add_metaclass (line 49) | def _swig_add_metaclass(metaclass):
class _SwigNonDynamicMeta (line 56) | class _SwigNonDynamicMeta(type):
class ImmutableSentencePieceText_ImmutableSentencePiece (line 61) | class ImmutableSentencePieceText_ImmutableSentencePiece(object):
method __init__ (line 65) | def __init__(self):
method _piece (line 69) | def _piece(self):
method _surface (line 72) | def _surface(self):
method _id (line 75) | def _id(self):
method _begin (line 78) | def _begin(self):
method _end (line 81) | def _end(self):
method _surface_as_bytes (line 84) | def _surface_as_bytes(self):
method _piece_as_bytes (line 87) | def _piece_as_bytes(self):
method __str__ (line 98) | def __str__(self):
method __eq__ (line 106) | def __eq__(self, other):
method __hash__ (line 109) | def __hash__(self):
class ImmutableSentencePieceText (line 117) | class ImmutableSentencePieceText(object):
method __init__ (line 121) | def __init__(self):
method _pieces_size (line 125) | def _pieces_size(self):
method _pieces (line 128) | def _pieces(self, index):
method _text (line 131) | def _text(self):
method _score (line 134) | def _score(self):
method SerializeAsString (line 137) | def SerializeAsString(self):
method _text_as_bytes (line 140) | def _text_as_bytes(self):
class ImmutableSentencePieceIterator (line 147) | class ImmutableSentencePieceIterator:
method __init__ (line 148) | def __init__(self, proto):
method __len__ (line 152) | def __len__(self):
method __getitem__ (line 155) | def __getitem__(self, index):
method __str__ (line 164) | def __str__(self):
method pieces (line 170) | def pieces(self):
method __eq__ (line 173) | def __eq__(self, other):
method __hash__ (line 176) | def __hash__(self):
method __str__ (line 179) | def __str__(self):
class ImmutableNBestSentencePieceText (line 190) | class ImmutableNBestSentencePieceText(object):
method __init__ (line 194) | def __init__(self):
method _nbests_size (line 198) | def _nbests_size(self):
method _nbests (line 201) | def _nbests(self, index):
method SerializeAsString (line 204) | def SerializeAsString(self):
class ImmutableSentencePieceTextIterator (line 207) | class ImmutableSentencePieceTextIterator:
method __init__ (line 208) | def __init__(self, proto):
method __len__ (line 212) | def __len__(self):
method __getitem__ (line 215) | def __getitem__(self, index):
method __str__ (line 224) | def __str__(self):
method nbests (line 230) | def nbests(self):
method __eq__ (line 233) | def __eq__(self, other):
method __hash__ (line 236) | def __hash__(self):
method __str__ (line 239) | def __str__(self):
class SentencePieceProcessor (line 247) | class SentencePieceProcessor(object):
method __init__ (line 251) | def __init__(self):
method LoadFromSerializedProto (line 255) | def LoadFromSerializedProto(self, serialized):
method SetEncodeExtraOptions (line 258) | def SetEncodeExtraOptions(self, extra_option):
method SetDecodeExtraOptions (line 261) | def SetDecodeExtraOptions(self, extra_option):
method SetVocabulary (line 264) | def SetVocabulary(self, valid_vocab):
method ResetVocabulary (line 267) | def ResetVocabulary(self):
method LoadVocabulary (line 270) | def LoadVocabulary(self, filename, threshold):
method CalculateEntropy (line 273) | def CalculateEntropy(self, *args):
method GetPieceSize (line 276) | def GetPieceSize(self):
method PieceToId (line 279) | def PieceToId(self, piece):
method IdToPiece (line 282) | def IdToPiece(self, id):
method GetScore (line 285) | def GetScore(self, id):
method IsUnknown (line 288) | def IsUnknown(self, id):
method IsControl (line 291) | def IsControl(self, id):
method IsUnused (line 294) | def IsUnused(self, id):
method IsByte (line 297) | def IsByte(self, id):
method unk_id (line 300) | def unk_id(self):
method bos_id (line 303) | def bos_id(self):
method eos_id (line 306) | def eos_id(self):
method pad_id (line 309) | def pad_id(self):
method serialized_model_proto (line 312) | def serialized_model_proto(self):
method LoadFromFile (line 315) | def LoadFromFile(self, arg):
method _EncodeAsIds (line 318) | def _EncodeAsIds(self, text, enable_sampling, nbest_size, alpha, add_b...
method _EncodeAsPieces (line 321) | def _EncodeAsPieces(self, text, enable_sampling, nbest_size, alpha, ad...
method _EncodeAsSerializedProto (line 324) | def _EncodeAsSerializedProto(self, text, enable_sampling, nbest_size, ...
method _EncodeAsImmutableProto (line 327) | def _EncodeAsImmutableProto(self, text, enable_sampling, nbest_size, a...
method _EncodeAsIdsBatch (line 330) | def _EncodeAsIdsBatch(self, ins, num_threads, enable_sampling, nbest_s...
method _EncodeAsPiecesBatch (line 333) | def _EncodeAsPiecesBatch(self, ins, num_threads, enable_sampling, nbes...
method _EncodeAsSerializedProtoBatch (line 336) | def _EncodeAsSerializedProtoBatch(self, ins, num_threads, enable_sampl...
method _EncodeAsImmutableProtoBatch (line 339) | def _EncodeAsImmutableProtoBatch(self, ins, num_threads, enable_sampli...
method _DecodeIds (line 342) | def _DecodeIds(self, ids):
method _DecodeIdsAsBytes (line 345) | def _DecodeIdsAsBytes(self, ids):
method _DecodePieces (line 348) | def _DecodePieces(self, pieces):
method _DecodeIdsAsSerializedProto (line 351) | def _DecodeIdsAsSerializedProto(self, ids):
method _DecodePiecesAsSerializedProto (line 354) | def _DecodePiecesAsSerializedProto(self, pieces):
method _DecodeIdsAsImmutableProto (line 357) | def _DecodeIdsAsImmutableProto(self, ids):
method _DecodePiecesAsImmutableProto (line 360) | def _DecodePiecesAsImmutableProto(self, pieces):
method _DecodeIdsBatch (line 363) | def _DecodeIdsBatch(self, ins, num_threads):
method _DecodeIdsAsBytesBatch (line 366) | def _DecodeIdsAsBytesBatch(self, ins, num_threads):
method _DecodeIdsAsSerializedProtoBatch (line 369) | def _DecodeIdsAsSerializedProtoBatch(self, ins, num_threads):
method _DecodeIdsAsImmutableProtoBatch (line 372) | def _DecodeIdsAsImmutableProtoBatch(self, ins, num_threads):
method _DecodePiecesBatch (line 375) | def _DecodePiecesBatch(self, ins, num_threads):
method _DecodePiecesAsSerializedProtoBatch (line 378) | def _DecodePiecesAsSerializedProtoBatch(self, ins, num_threads):
method _DecodePiecesAsImmutableProtoBatch (line 381) | def _DecodePiecesAsImmutableProtoBatch(self, ins, num_threads):
method _NBestEncodeAsIds (line 384) | def _NBestEncodeAsIds(self, text, nbest_size, add_bos, add_eos, revers...
method _NBestEncodeAsPieces (line 387) | def _NBestEncodeAsPieces(self, text, nbest_size, add_bos, add_eos, rev...
method _NBestEncodeAsSerializedProto (line 390) | def _NBestEncodeAsSerializedProto(self, text, nbest_size, add_bos, add...
method _NBestEncodeAsImmutableProto (line 393) | def _NBestEncodeAsImmutableProto(self, text, nbest_size, add_bos, add_...
method _SampleEncodeAndScoreAsIds (line 396) | def _SampleEncodeAndScoreAsIds(self, text, num_samples, alpha, wor, in...
method _SampleEncodeAndScoreAsPieces (line 399) | def _SampleEncodeAndScoreAsPieces(self, text, num_samples, alpha, wor,...
method _SampleEncodeAndScoreAsSerializedProto (line 402) | def _SampleEncodeAndScoreAsSerializedProto(self, text, num_samples, al...
method _SampleEncodeAndScoreAsImmutableProto (line 405) | def _SampleEncodeAndScoreAsImmutableProto(self, text, num_samples, alp...
method _Normalize (line 408) | def _Normalize(self, text):
method _NormalizeWithOffsets (line 411) | def _NormalizeWithOffsets(self, text):
method _CalculateEntropy (line 414) | def _CalculateEntropy(self, text, alpha):
method _CalculateEntropyBatch (line 417) | def _CalculateEntropyBatch(self, ins, alpha, num_threads):
method _OverrideNormalizerSpec (line 420) | def _OverrideNormalizerSpec(self, args):
method Init (line 423) | def Init(self,
method Encode (line 471) | def Encode(self,
method EncodeAsPieces (line 565) | def EncodeAsPieces(self, input, **kwargs):
method EncodeAsIds (line 569) | def EncodeAsIds(self, input, **kwargs):
method EncodeAsSerializedProto (line 573) | def EncodeAsSerializedProto(self, input, **kwargs):
method EncodeAsImmutableProto (line 577) | def EncodeAsImmutableProto(self, input, **kwargs):
method SampleEncodeAsPieces (line 581) | def SampleEncodeAsPieces(self, input, nbest_size=None, alpha=None, **k...
method SampleEncodeAsIds (line 586) | def SampleEncodeAsIds(self, input, nbest_size=None, alpha=None,**kwargs):
method SampleEncodeAsSerializedProto (line 591) | def SampleEncodeAsSerializedProto(self, input, nbest_size=None, alpha=...
method SampleEncodeAsImmutableProto (line 596) | def SampleEncodeAsImmutableProto(self, input, nbest_size=None, alpha=N...
method NBestEncode (line 601) | def NBestEncode(self,
method NBestEncodeAsPieces (line 659) | def NBestEncodeAsPieces(self, input, nbest_size=None, **kwargs):
method NBestEncodeAsIds (line 664) | def NBestEncodeAsIds(self, input, nbest_size=None, **kwargs):
method NBestEncodeAsSerializedProto (line 669) | def NBestEncodeAsSerializedProto(self, input, nbest_size=None, **kwargs):
method NBestEncodeAsImmutableProto (line 674) | def NBestEncodeAsImmutableProto(self, input, nbest_size=None, **kwargs):
method SampleEncodeAndScore (line 679) | def SampleEncodeAndScore(self,
method SampleEncodeAndScoreAsPieces (line 756) | def SampleEncodeAndScoreAsPieces(self, input, num_samples=None, alpha=...
method SampleEncodeAndScoreAsIds (line 761) | def SampleEncodeAndScoreAsIds(self, input, num_samples=None, alpha=Non...
method SampleEncodeAndScoreAsSerializedProto (line 766) | def SampleEncodeAndScoreAsSerializedProto(self, input, num_samples=Non...
method SampleEncodeAndScoreAsImmutableProto (line 771) | def SampleEncodeAndScoreAsImmutableProto(self, input, num_samples=None...
method Decode (line 776) | def Decode(self, input, out_type=str, num_threads=None):
method DecodePieces (line 871) | def DecodePieces(self, input, out_type=str, **kwargs):
method DecodeIds (line 875) | def DecodeIds(self, input, out_type=str, **kwargs):
method DecodePiecesAsSerializedProto (line 879) | def DecodePiecesAsSerializedProto(self, input, out_type='serialized_pr...
method DecodeIdsAsSerializedProto (line 883) | def DecodeIdsAsSerializedProto(self, input, out_type='serialized_proto...
method DecodePiecesAsImmutableProto (line 887) | def DecodePiecesAsImmutableProto(self, input, out_type='immutable_prot...
method DecodeIdsAsImmutableProto (line 891) | def DecodeIdsAsImmutableProto(self, input, out_type='immutable_proto',...
method CalculateEntropy (line 895) | def CalculateEntropy(self, input, alpha, num_threads=None):
method Normalize (line 907) | def Normalize(self, input, with_offsets=None):
method OverrideNormalizerSpec (line 917) | def OverrideNormalizerSpec(self, **kwargs):
method piece_size (line 924) | def piece_size(self):
method vocab_size (line 928) | def vocab_size(self):
method __getstate__ (line 932) | def __getstate__(self):
method __setstate__ (line 936) | def __setstate__(self, serialized_model_proto):
method __len__ (line 941) | def __len__(self):
method __getitem__ (line 945) | def __getitem__(self, piece):
method Load (line 949) | def Load(self, model_file=None, model_proto=None):
function SetRandomGeneratorSeed (line 967) | def SetRandomGeneratorSeed(seed):
function SetMinLogLevel (line 970) | def SetMinLogLevel(v):
class SentencePieceTrainer (line 972) | class SentencePieceTrainer(object):
method __init__ (line 975) | def __init__(self, *args, **kwargs):
method _TrainFromString (line 980) | def _TrainFromString(arg):
method _TrainFromMap (line 984) | def _TrainFromMap(args):
method _TrainFromMap2 (line 988) | def _TrainFromMap2(args, iter):
method _TrainFromMap3 (line 992) | def _TrainFromMap3(args):
method _TrainFromMap4 (line 996) | def _TrainFromMap4(args, iter):
method _Train (line 1000) | def _Train(arg=None, **kwargs):
method Train (line 1045) | def Train(arg=None, logstream=None, **kwargs):
class SentencePieceNormalizer (line 1052) | class SentencePieceNormalizer(object):
method __init__ (line 1056) | def __init__(self):
method LoadFromSerializedProto (line 1060) | def LoadFromSerializedProto(self, serialized):
method LoadFromRuleTSV (line 1063) | def LoadFromRuleTSV(self, filename):
method LoadFromRuleName (line 1066) | def LoadFromRuleName(self, name):
method serialized_model_proto (line 1069) | def serialized_model_proto(self):
method LoadFromFile (line 1072) | def LoadFromFile(self, arg):
method _Normalize (line 1075) | def _Normalize(self, text):
method _NormalizeWithOffsets (line 1078) | def _NormalizeWithOffsets(self, text):
method _SetProtoField (line 1081) | def _SetProtoField(self, name, value):
method Init (line 1084) | def Init(self,
method Normalize (line 1122) | def Normalize(self, input, with_offsets=None):
method __getstate__ (line 1133) | def __getstate__(self):
method __setstate__ (line 1137) | def __setstate__(self, serialized_model_proto):
function SetDataDir (line 1145) | def SetDataDir(data_dir):
function _add_snake_case (line 1158) | def _add_snake_case(classname):
function _batchnize (line 1171) | def _batchnize(classname, name):
class _LogStream (line 1212) | class _LogStream(object):
method __init__ (line 1213) | def __init__(self, ostream=None):
method __enter__ (line 1218) | def __enter__(self):
method __exit__ (line 1223) | def __exit__(self, type, value, traceback):
FILE: python/src/sentencepiece/sentencepiece_wrap.cxx
function SWIGINTERNINLINE (line 397) | SWIGINTERNINLINE int SWIG_AddCast(int r) {
function SWIGINTERNINLINE (line 400) | SWIGINTERNINLINE int SWIG_CheckState(int r) {
type swig_type_info (line 416) | struct swig_type_info
type swig_cast_info (line 423) | struct swig_cast_info
type swig_type_info (line 419) | struct swig_type_info {
type swig_cast_info (line 423) | struct swig_cast_info
type swig_cast_info (line 429) | struct swig_cast_info {
type swig_cast_info (line 432) | struct swig_cast_info
type swig_cast_info (line 433) | struct swig_cast_info
type swig_module_info (line 439) | struct swig_module_info {
type swig_module_info (line 442) | struct swig_module_info
function SWIGRUNTIME (line 455) | SWIGRUNTIME int
function SWIGRUNTIME (line 470) | SWIGRUNTIME int
function SWIGRUNTIME (line 489) | SWIGRUNTIME int
function SWIGRUNTIME (line 497) | SWIGRUNTIME swig_cast_info *
function SWIGRUNTIME (line 524) | SWIGRUNTIME swig_cast_info *
function SWIGRUNTIMEINLINE (line 551) | SWIGRUNTIMEINLINE void *
function SWIGRUNTIME (line 559) | SWIGRUNTIME swig_type_info *
function SWIGRUNTIMEINLINE (line 573) | SWIGRUNTIMEINLINE const char *
function SWIGRUNTIME (line 582) | SWIGRUNTIME const char *
function SWIGRUNTIME (line 603) | SWIGRUNTIME void
function SWIGRUNTIME (line 619) | SWIGRUNTIME void
function SWIGRUNTIME (line 633) | SWIGRUNTIME swig_type_info *
function SWIGRUNTIME (line 678) | SWIGRUNTIME swig_type_info *
function SWIGRUNTIME (line 707) | SWIGRUNTIME char *
function SWIGRUNTIME (line 723) | SWIGRUNTIME const char *
function SWIGRUNTIME (line 751) | SWIGRUNTIME char *
function SWIGRUNTIME (line 762) | SWIGRUNTIME const char *
function SWIGRUNTIME (line 775) | SWIGRUNTIME char *
function SWIGRUNTIME (line 790) | SWIGRUNTIME const char *
function SWIGINTERN (line 852) | SWIGINTERN const char *
function SWIGINTERN (line 875) | SWIGINTERN PyObject*
function SWIGRUNTIME (line 931) | SWIGRUNTIME PyObject*
function SWIGRUNTIME (line 978) | SWIGRUNTIME void
function SWIGRUNTIME (line 1005) | SWIGRUNTIME int
function SWIGRUNTIME (line 1015) | SWIGRUNTIME void
class SWIG_Python_Thread_Block (line 1058) | class SWIG_Python_Thread_Block {
method end (line 1062) | void end() { if (status) { PyGILState_Release(state); status = false;} }
method SWIG_Python_Thread_Block (line 1063) | SWIG_Python_Thread_Block() : status(true), state(PyGILState_Ensure()) {}
class SWIG_Python_Thread_Allow (line 1066) | class SWIG_Python_Thread_Allow {
method end (line 1070) | void end() { if (status) { status = false; PyEval_RestoreThread(save); }}
method SWIG_Python_Thread_Allow (line 1071) | SWIG_Python_Thread_Allow() : status(true), save(PyEval_SaveThread()) {}
type swig_const_info (line 1126) | struct swig_const_info {
function SWIGINTERN (line 1210) | SWIGINTERN void
function SWIGINTERN (line 1218) | SWIGINTERN void
function SWIGINTERN (line 1231) | SWIGINTERN void
function SWIGINTERN (line 1238) | SWIGINTERN void
function SWIGINTERN (line 1248) | SWIGINTERN void
function SWIGINTERN (line 1258) | SWIGINTERN PyObject*
function SWIGINTERN (line 1284) | SWIGINTERN Py_ssize_t
function SWIGINTERN (line 1330) | SWIGINTERN int
type swig_globalvar (line 1369) | struct swig_globalvar {
type swig_globalvar (line 1373) | struct swig_globalvar
type swig_varlinkobject (line 1376) | struct swig_varlinkobject {
function SWIGINTERN (line 1381) | SWIGINTERN PyObject *
function SWIGINTERN (line 1390) | SWIGINTERN PyObject *
function SWIGINTERN (line 1429) | SWIGINTERN void
function SWIGINTERN (line 1441) | SWIGINTERN PyObject *
function SWIGINTERN (line 1459) | SWIGINTERN int
function SWIGINTERN (line 1477) | SWIGINTERN PyTypeObject*
function SWIGINTERN (line 1573) | SWIGINTERN PyObject *
function SWIGINTERN (line 1582) | SWIGINTERN void
function SWIGINTERN (line 1602) | SWIGINTERN PyObject *
function SWIGRUNTIMEINLINE (line 1633) | SWIGRUNTIMEINLINE PyObject *
function SWIGRUNTIMEINLINE (line 1653) | SWIGRUNTIMEINLINE int
function SWIGRUNTIMEINLINE (line 1663) | SWIGRUNTIMEINLINE PyObject *
function SWIGRUNTIME (line 1671) | SWIGRUNTIME SwigPyClientData *
function SWIGRUNTIME (line 1721) | SWIGRUNTIME void
function SWIGRUNTIME (line 1747) | SWIGRUNTIME PyObject *
function SWIGRUNTIME (line 1761) | SWIGRUNTIME PyObject *
function SWIGRUNTIME (line 1767) | SWIGRUNTIME PyObject *
function SWIGRUNTIME (line 1792) | SWIGRUNTIME PyObject *
function SWIGRUNTIME (line 1798) | SWIGRUNTIME PyObject *
function SWIGRUNTIME (line 1804) | SWIGRUNTIME PyObject *
function SWIGRUNTIME (line 1830) | SWIGRUNTIME PyObject *
function SWIGRUNTIME (line 1836) | SWIGRUNTIME int
function SWIGRUNTIME (line 1845) | SWIGRUNTIME PyObject*
function SWIGRUNTIME (line 1864) | SWIGRUNTIME PyTypeObject*
function SWIGRUNTIME (line 1874) | SWIGRUNTIME PyTypeObject*
function SWIGRUNTIMEINLINE (line 1881) | SWIGRUNTIMEINLINE int
function SWIGRUNTIME (line 1914) | SWIGRUNTIME void
function SWIGRUNTIME (line 1973) | SWIGRUNTIME PyObject*
function SWIGRUNTIME (line 1987) | SWIGRUNTIME PyObject*
function SWIGINTERN (line 1999) | SWIGINTERN PyObject*
function SWIGINTERN (line 2007) | SWIGINTERN PyObject*
function SWIGINTERN (line 2015) | SWIGINTERN PyObject*
function SWIGRUNTIME (line 2046) | SWIGRUNTIME PyTypeObject*
function SWIGRUNTIME (line 2209) | SWIGRUNTIME PyObject *
function SWIGRUNTIME (line 2242) | SWIGRUNTIME PyObject *
function SWIGRUNTIME (line 2253) | SWIGRUNTIME PyObject *
function SWIGRUNTIME (line 2264) | SWIGRUNTIME int
function SWIGRUNTIME (line 2275) | SWIGRUNTIME PyTypeObject*
function SWIGRUNTIMEINLINE (line 2281) | SWIGRUNTIMEINLINE int
function SWIGRUNTIME (line 2302) | SWIGRUNTIME void
function SWIGRUNTIME (line 2312) | SWIGRUNTIME PyTypeObject*
function SWIGRUNTIME (line 2429) | SWIGRUNTIME PyObject *
function SWIGRUNTIME (line 2448) | SWIGRUNTIME swig_type_info *
function SWIGRUNTIME (line 2467) | SWIGRUNTIME PyObject *
function SWIGRUNTIME (line 2482) | SWIGRUNTIME SwigPyObject *
function SWIGRUNTIME (line 2553) | SWIGRUNTIME int
function SWIGRUNTIME (line 2568) | SWIGRUNTIME int
function SWIGRUNTIME (line 2680) | SWIGRUNTIME int
function SWIGRUNTIME (line 2719) | SWIGRUNTIME int
function SWIGRUNTIME (line 2742) | SWIGRUNTIME PyObject*
function SWIGRUNTIME (line 2808) | SWIGRUNTIME int
function SWIGINTERN (line 2830) | SWIGINTERN PyObject *
function SWIGRUNTIME (line 2849) | SWIGRUNTIME PyObject *
function SWIGRUNTIMEINLINE (line 2910) | SWIGRUNTIMEINLINE PyObject *
function SWIGRUNTIME (line 2926) | SWIGRUNTIME PyObject *
function SWIGRUNTIME (line 2934) | SWIGRUNTIME swig_module_info *
function SWIGRUNTIME (line 2955) | SWIGRUNTIME void
function SWIGRUNTIME (line 2980) | SWIGRUNTIME void
function SWIGRUNTIME (line 3002) | SWIGRUNTIME swig_type_info *
function SWIGRUNTIME (line 3033) | SWIGRUNTIME int
function SWIGRUNTIME (line 3062) | SWIGRUNTIME int
function SWIGRUNTIMEINLINE (line 3075) | SWIGRUNTIMEINLINE const char *
function SWIGRUNTIME (line 3083) | SWIGRUNTIME void
function SWIGRUNTIME (line 3127) | SWIGRUNTIME void *
class SwigValueWrapper (line 3250) | class SwigValueWrapper {
type SwigSmartPointer (line 3251) | struct SwigSmartPointer {
method SwigSmartPointer (line 3253) | SwigSmartPointer(T *p) : ptr(p) { }
method SwigSmartPointer (line 3255) | SwigSmartPointer& operator=(SwigSmartPointer& rhs) { T* oldptr = ptr...
method reset (line 3256) | void reset(T *p) { T* oldptr = ptr; ptr = 0; delete oldptr; ptr = p; }
method SwigValueWrapper (line 3261) | SwigValueWrapper() : pointer(0) { }
method SwigValueWrapper (line 3262) | SwigValueWrapper& operator=(const T& t) { SwigSmartPointer tmp(new T(t...
method SwigValueWrapper (line 3264) | SwigValueWrapper& operator=(T&& t) { SwigSmartPointer tmp(new T(std::m...
method T (line 3269) | T *operator&() const { return pointer.ptr; }
method reset (line 3270) | static void reset(SwigValueWrapper& t, T *p) { t.pointer.reset(p); }
function T (line 3282) | T SwigValueInit() {
type swig (line 3302) | namespace swig {
class SwigPtr_PyObject (line 3303) | class SwigPtr_PyObject {
method SwigPtr_PyObject (line 3308) | SwigPtr_PyObject() :_obj(0)
method SwigPtr_PyObject (line 3312) | SwigPtr_PyObject(const SwigPtr_PyObject& item) : _obj(item._obj)
method SwigPtr_PyObject (line 3319) | SwigPtr_PyObject(PyObject *obj, bool initial_ref = true) :_obj(obj)
method SwigPtr_PyObject (line 3328) | SwigPtr_PyObject & operator=(const SwigPtr_PyObject& item)
method PyObject (line 3350) | PyObject *operator->() const
type SwigVar_PyObject (line 3359) | struct SwigVar_PyObject : SwigPtr_PyObject {
method SwigVar_PyObject (line 3360) | SwigVar_PyObject(PyObject* obj = 0) : SwigPtr_PyObject(obj, false) { }
method SwigVar_PyObject (line 3362) | SwigVar_PyObject & operator = (PyObject* obj)
type swig (line 3358) | namespace swig {
class SwigPtr_PyObject (line 3303) | class SwigPtr_PyObject {
method SwigPtr_PyObject (line 3308) | SwigPtr_PyObject() :_obj(0)
method SwigPtr_PyObject (line 3312) | SwigPtr_PyObject(const SwigPtr_PyObject& item) : _obj(item._obj)
method SwigPtr_PyObject (line 3319) | SwigPtr_PyObject(PyObject *obj, bool initial_ref = true) :_obj(obj)
method SwigPtr_PyObject (line 3328) | SwigPtr_PyObject & operator=(const SwigPtr_PyObject& item)
method PyObject (line 3350) | PyObject *operator->() const
type SwigVar_PyObject (line 3359) | struct SwigVar_PyObject : SwigPtr_PyObject {
method SwigVar_PyObject (line 3360) | SwigVar_PyObject(PyObject* obj = 0) : SwigPtr_PyObject(obj, false) { }
method SwigVar_PyObject (line 3362) | SwigVar_PyObject & operator = (PyObject* obj)
function ReleaseResultObject (line 3390) | inline void ReleaseResultObject(PyObject *obj) {
class PyInputString (line 3396) | class PyInputString {
method PyInputString (line 3398) | explicit PyInputString(PyObject* obj) {
method str (line 3409) | absl::string_view str() const { return absl::string_view(data(), size(...
method Py_ssize_t (line 3411) | Py_ssize_t size() const { return size_; }
method IsAvalable (line 3412) | bool IsAvalable() const { return str_ != nullptr; }
method PyObject (line 3413) | PyObject *input_type() const { return input_type_; }
method IsUnicode (line 3415) | static bool IsUnicode(PyObject *resultobj) {
function PyObject (line 3425) | PyObject* MakePyOutputString(const std::string& output,
function PyObject (line 3433) | PyObject* MakePyOutputBytes(const sentencepiece::util::bytes& output) {
function ToSwigError (line 3437) | int ToSwigError(sentencepiece::util::StatusCode code) {
class PySentenceIterator (line 3451) | class PySentenceIterator : public sentencepiece::SentenceIterator {
method PySentenceIterator (line 3453) | PySentenceIterator(PyObject *iter) : iter_(iter) {
method done (line 3462) | bool done() const override {
method Next (line 3466) | void Next() override {
method status (line 3475) | sentencepiece::util::Status status() const override {
method CopyValue (line 3480) | void CopyValue() {
function RewriteIds (line 3505) | inline void RewriteIds(const sentencepiece::SentencePieceProcessor &sp,
function RewriteIds (line 3514) | inline void RewriteIds(const sentencepiece::SentencePieceProcessor &sp,
function RewriteIds (line 3532) | inline void RewriteIds(const sentencepiece::SentencePieceProcessor &sp,
function RewriteIds (line 3542) | inline void RewriteIds(const sentencepiece::SentencePieceProcessor &sp,
function CheckIds (line 3552) | inline void CheckIds(const std::vector<int> &ids, int num_pieces) {
function CheckIds (line 3562) | inline void CheckIds(const std::vector<absl::string_view> &ids, int num_...
function CheckIdsBatch (line 3564) | inline void CheckIdsBatch(const std::vector<std::vector<int>> &ids, int ...
function ConvertToUnicodeSpans (line 3569) | inline void ConvertToUnicodeSpans(T *proto) {}
function ConvertToUnicodeSpans (line 3572) | inline void ConvertToUnicodeSpans(sentencepiece::ImmutableSentencePieceT...
function ConvertToUnicodeSpans (line 3577) | inline void ConvertToUnicodeSpans(sentencepiece::ImmutableNBestSentenceP...
class ThreadPool (line 3581) | class ThreadPool {
method ThreadPool (line 3583) | explicit ThreadPool(size_t request_size) :
method Schedule (line 3592) | void Schedule(std::function<void()> closure) {
function InitNumThreads (line 3607) | inline void InitNumThreads(const std::vector<T> &ins, int *num_threads) {
function SWIGINTERNINLINE (line 3662) | SWIGINTERNINLINE PyObject*
function SWIGINTERN (line 3668) | SWIGINTERN sentencepiece::util::bytes const &sentencepiece_ImmutableSent...
function SWIGINTERN (line 3671) | SWIGINTERN sentencepiece::util::bytes const &sentencepiece_ImmutableSent...
function SWIGINTERNINLINE (line 3678) | SWIGINTERNINLINE PyObject*
function SWIGINTERNINLINE (line 3702) | SWIGINTERNINLINE PyObject*
function SWIGINTERNINLINE (line 3711) | SWIGINTERNINLINE PyObject *
function SWIGINTERN (line 3727) | SWIGINTERN int
function SWIGINTERNINLINE (line 3779) | SWIGINTERNINLINE int
function SWIGINTERN (line 3811) | SWIGINTERN int
function SWIGINTERN (line 3859) | SWIGINTERN int
function SWIGINTERNINLINE (line 3878) | SWIGINTERNINLINE PyObject *
function SWIGINTERN (line 3884) | SWIGINTERN sentencepiece::util::bytes const &sentencepiece_ImmutableSent...
function SWIGINTERN (line 3888) | SWIGINTERN swig_type_info*
function SWIGINTERN (line 3901) | SWIGINTERN int
type std (line 4003) | namespace std { }
function SWIG_isfinite_func (line 4004) | typename T>
function SWIGINTERN (line 4029) | SWIGINTERN int
function SWIGINTERNINLINE (line 4045) | SWIGINTERNINLINE PyObject*
function SWIGINTERNINLINE (line 4052) | SWIGINTERNINLINE PyObject*
function SWIGINTERN (line 4058) | SWIGINTERN sentencepiece::util::Status sentencepiece_SentencePieceProces...
function SWIGINTERN (line 4062) | SWIGINTERN int
function SWIGINTERN (line 4075) | SWIGINTERN std::vector< int > sentencepiece_SentencePieceProcessor__Enco...
function SWIGINTERN (line 4082) | SWIGINTERN std::vector< std::string > sentencepiece_SentencePieceProcess...
function SWIGINTERN (line 4089) | SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceProcess...
function SWIGINTERN (line 4096) | SWIGINTERN sentencepiece::ImmutableSentencePieceText sentencepiece_Sente...
function SWIGINTERN (line 4104) | SWIGINTERN std::vector< std::vector< int > > sentencepiece_SentencePiece...
function SWIGINTERN (line 4108) | SWIGINTERN std::vector< std::vector< std::string > > sentencepiece_Sente...
function SWIGINTERN (line 4112) | SWIGINTERN BytesArray sentencepiece_SentencePieceProcessor__EncodeAsSeri...
function SWIGINTERN (line 4117) | SWIGINTERN std::vector< sentencepiece::ImmutableSentencePieceText > sent...
function SWIGINTERN (line 4122) | SWIGINTERN std::string sentencepiece_SentencePieceProcessor__DecodeIds(s...
function SWIGINTERN (line 4126) | SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceProcess...
function SWIGINTERN (line 4130) | SWIGINTERN std::string sentencepiece_SentencePieceProcessor__DecodePiece...
function SWIGINTERN (line 4133) | SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceProcess...
function SWIGINTERN (line 4137) | SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceProcess...
function SWIGINTERN (line 4141) | SWIGINTERN sentencepiece::ImmutableSentencePieceText sentencepiece_Sente...
function SWIGINTERN (line 4147) | SWIGINTERN sentencepiece::ImmutableSentencePieceText sentencepiece_Sente...
function SWIGINTERN (line 4153) | SWIGINTERN std::vector< std::string > sentencepiece_SentencePieceProcess...
function SWIGINTERN (line 4157) | SWIGINTERN BytesArray sentencepiece_SentencePieceProcessor__DecodeIdsAsB...
function SWIGINTERN (line 4161) | SWIGINTERN BytesArray sentencepiece_SentencePieceProcessor__DecodeIdsAsS...
function SWIGINTERN (line 4166) | SWIGINTERN std::vector< sentencepiece::ImmutableSentencePieceText > sent...
function SWIGINTERN (line 4171) | SWIGINTERN std::vector< std::string > sentencepiece_SentencePieceProcess...
function SWIGINTERN (line 4174) | SWIGINTERN BytesArray sentencepiece_SentencePieceProcessor__DecodePieces...
function SWIGINTERN (line 4178) | SWIGINTERN std::vector< sentencepiece::ImmutableSentencePieceText > sent...
function SWIGINTERN (line 4182) | SWIGINTERN std::vector< std::vector< int > > sentencepiece_SentencePiece...
function SWIGINTERN (line 4189) | SWIGINTERN std::vector< std::vector< std::string > > sentencepiece_Sente...
function SWIGINTERN (line 4196) | SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceProcess...
function SWIGINTERN (line 4201) | SWIGINTERN sentencepiece::ImmutableNBestSentencePieceText sentencepiece_...
function SWIGINTERN (line 4208) | SWIGINTERN std::vector< std::pair< std::vector< int >,float > > sentence...
function SWIGINTERN (line 4216) | SWIGINTERN std::vector< std::pair< std::vector< std::string >,float > > ...
function SWIGINTERN (line 4224) | SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceProcess...
function SWIGINTERN (line 4230) | SWIGINTERN sentencepiece::ImmutableNBestSentencePieceText sentencepiece_...
function SWIGINTERN (line 4238) | SWIGINTERN std::string sentencepiece_SentencePieceProcessor__Normalize(s...
function SWIGINTERN (line 4241) | SWIGINTERN std::pair< std::string,std::vector< size_t > > sentencepiece_...
function SWIGINTERN (line 4246) | SWIGINTERN float sentencepiece_SentencePieceProcessor__CalculateEntropy(...
function SWIGINTERN (line 4249) | SWIGINTERN std::vector< float > sentencepiece_SentencePieceProcessor__Ca...
function SWIGINTERN (line 4266) | SWIGINTERN sentencepiece::util::Status sentencepiece_SentencePieceProces...
function SWIGINTERN (line 4277) | SWIGINTERN int
function SWIGINTERN (line 4329) | SWIGINTERN int
function SWIGINTERN (line 4344) | SWIGINTERN void sentencepiece_SentencePieceTrainer__TrainFromString(absl...
function SWIGINTERN (line 4349) | SWIGINTERN void sentencepiece_SentencePieceTrainer__TrainFromMap(std::un...
function SWIGINTERN (line 4354) | SWIGINTERN void sentencepiece_SentencePieceTrainer__TrainFromMap2(std::u...
function SWIGINTERN (line 4359) | SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceTrainer...
function SWIGINTERN (line 4365) | SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceTrainer...
function SWIGINTERN (line 4371) | SWIGINTERN sentencepiece::util::Status sentencepiece_SentencePieceNormal...
function SWIGINTERN (line 4374) | SWIGINTERN std::string sentencepiece_SentencePieceNormalizer__Normalize(...
function SWIGINTERN (line 4380) | SWIGINTERN std::pair< std::string,std::vector< size_t > > sentencepiece_...
function SWIGINTERN (line 4386) | SWIGINTERN void sentencepiece_SentencePieceNormalizer__SetProtoField(sen...
function SWIGINTERN (line 4395) | SWIGINTERN PyObject *_wrap_new_ImmutableSentencePieceText_ImmutableSente...
function SWIGINTERN (line 4417) | SWIGINTERN PyObject *_wrap_delete_ImmutableSentencePieceText_ImmutableSe...
function SWIGINTERN (line 4448) | SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentenceP...
function SWIGINTERN (line 4483) | SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentenceP...
function SWIGINTERN (line 4518) | SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentenceP...
function SWIGINTERN (line 4550) | SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentenceP...
function SWIGINTERN (line 4582) | SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentenceP...
function SWIGINTERN (line 4614) | SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentenceP...
function SWIGINTERN (line 4648) | SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentenceP...
function SWIGINTERN (line 4682) | SWIGINTERN PyObject *ImmutableSentencePieceText_ImmutableSentencePiece_s...
function SWIGINTERN (line 4689) | SWIGINTERN PyObject *ImmutableSentencePieceText_ImmutableSentencePiece_s...
function SWIGINTERN (line 4693) | SWIGINTERN PyObject *_wrap_new_ImmutableSentencePieceText(PyObject *self...
function SWIGINTERN (line 4715) | SWIGINTERN PyObject *_wrap_delete_ImmutableSentencePieceText(PyObject *s...
function SWIGINTERN (line 4746) | SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText__pieces_size(PyObj...
function SWIGINTERN (line 4778) | SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText__pieces(PyObject *...
function SWIGINTERN (line 4817) | SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText__text(PyObject *se...
function SWIGINTERN (line 4852) | SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText__score(PyObject *s...
function SWIGINTERN (line 4884) | SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_SerializeAsString(...
function SWIGINTERN (line 4918) | SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText__text_as_bytes(PyO...
function SWIGINTERN (line 4952) | SWIGINTERN PyObject *ImmutableSentencePieceText_swigregister(PyObject *S...
function SWIGINTERN (line 4959) | SWIGINTERN PyObject *ImmutableSentencePieceText_swiginit(PyObject *SWIGU...
function SWIGINTERN (line 4963) | SWIGINTERN PyObject *_wrap_new_ImmutableNBestSentencePieceText(PyObject ...
function SWIGINTERN (line 4985) | SWIGINTERN PyObject *_wrap_delete_ImmutableNBestSentencePieceText(PyObje...
function SWIGINTERN (line 5016) | SWIGINTERN PyObject *_wrap_ImmutableNBestSentencePieceText__nbests_size(...
function SWIGINTERN (line 5048) | SWIGINTERN PyObject *_wrap_ImmutableNBestSentencePieceText__nbests(PyObj...
function SWIGINTERN (line 5087) | SWIGINTERN PyObject *_wrap_ImmutableNBestSentencePieceText_SerializeAsSt...
function SWIGINTERN (line 5121) | SWIGINTERN PyObject *ImmutableNBestSentencePieceText_swigregister(PyObje...
function SWIGINTERN (line 5128) | SWIGINTERN PyObject *ImmutableNBestSentencePieceText_swiginit(PyObject *...
function SWIGINTERN (line 5132) | SWIGINTERN PyObject *_wrap_new_SentencePieceProcessor(PyObject *self, Py...
function SWIGINTERN (line 5154) | SWIGINTERN PyObject *_wrap_delete_SentencePieceProcessor(PyObject *self,...
function SWIGINTERN (line 5185) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_LoadFromSerializedProt...
function SWIGINTERN (line 5231) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SetEncodeExtraOptions(...
function SWIGINTERN (line 5277) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SetDecodeExtraOptions(...
function SWIGINTERN (line 5323) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SetVocabulary(PyObject...
function SWIGINTERN (line 5387) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_ResetVocabulary(PyObje...
function SWIGINTERN (line 5424) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_LoadVocabulary(PyObjec...
function SWIGINTERN (line 5478) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_CalculateEntropy__SWIG...
function SWIGINTERN (line 5539) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_CalculateEntropy__SWIG...
function SWIGINTERN (line 5587) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_CalculateEntropy(PyObj...
function SWIGINTERN (line 5648) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_GetPieceSize(PyObject ...
function SWIGINTERN (line 5680) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_PieceToId(PyObject *se...
function SWIGINTERN (line 5721) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IdToPiece(PyObject *se...
function SWIGINTERN (line 5763) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_GetScore(PyObject *sel...
function SWIGINTERN (line 5802) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IsUnknown(PyObject *se...
function SWIGINTERN (line 5841) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IsControl(PyObject *se...
function SWIGINTERN (line 5880) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IsUnused(PyObject *sel...
function SWIGINTERN (line 5919) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IsByte(PyObject *self,...
function SWIGINTERN (line 5958) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_unk_id(PyObject *self,...
function SWIGINTERN (line 5990) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_bos_id(PyObject *self,...
function SWIGINTERN (line 6022) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_eos_id(PyObject *self,...
function SWIGINTERN (line 6054) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_pad_id(PyObject *self,...
function SWIGINTERN (line 6086) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_serialized_model_proto...
function SWIGINTERN (line 6120) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor_LoadFromFile(PyObject ...
function SWIGINTERN (line 6166) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsIds(PyObject ...
function SWIGINTERN (line 6268) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsPieces(PyObje...
function SWIGINTERN (line 6371) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsSerializedPro...
function SWIGINTERN (line 6470) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsImmutableProt...
function SWIGINTERN (line 6567) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsIdsBatch(PyOb...
function SWIGINTERN (line 6699) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsPiecesBatch(P...
function SWIGINTERN (line 6832) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsSerializedPro...
function SWIGINTERN (line 6960) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsImmutableProt...
function SWIGINTERN (line 7089) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodeIds(PyObject *s...
function SWIGINTERN (line 7150) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodeIdsAsBytes(PyOb...
function SWIGINTERN (line 7210) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodePieces(PyObject...
function SWIGINTERN (line 7272) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodeIdsAsSerialized...
function SWIGINTERN (line 7332) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodePiecesAsSeriali...
function SWIGINTERN (line 7393) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodeIdsAsImmutableP...
function SWIGINTERN (line 7451) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodePiecesAsImmutab...
function SWIGINTERN (line 7510) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodeIdsBatch(PyObje...
function SWIGINTERN (line 7592) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodeIdsAsBytesBatch...
function SWIGINTERN (line 7673) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodeIdsAsSerialized...
function SWIGINTERN (line 7754) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodeIdsAsImmutableP...
function SWIGINTERN (line 7836) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodePiecesBatch(PyO...
function SWIGINTERN (line 7913) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodePiecesAsSeriali...
function SWIGINTERN (line 7989) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodePiecesAsImmutab...
function SWIGINTERN (line 8066) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__NBestEncodeAsIds(PyOb...
function SWIGINTERN (line 8156) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__NBestEncodeAsPieces(P...
function SWIGINTERN (line 8247) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__NBestEncodeAsSerializ...
function SWIGINTERN (line 8330) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__NBestEncodeAsImmutabl...
function SWIGINTERN (line 8411) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__SampleEncodeAndScoreA...
function SWIGINTERN (line 8525) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__SampleEncodeAndScoreA...
function SWIGINTERN (line 8640) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__SampleEncodeAndScoreA...
function SWIGINTERN (line 8747) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__SampleEncodeAndScoreA...
function SWIGINTERN (line 8852) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__Normalize(PyObject *s...
function SWIGINTERN (line 8896) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__NormalizeWithOffsets(...
function SWIGINTERN (line 8947) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__CalculateEntropy(PyOb...
function SWIGINTERN (line 8996) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__CalculateEntropyBatch...
function SWIGINTERN (line 9076) | SWIGINTERN PyObject *_wrap_SentencePieceProcessor__OverrideNormalizerSpe...
function SWIGINTERN (line 9143) | SWIGINTERN PyObject *SentencePieceProcessor_swigregister(PyObject *SWIGU...
function SWIGINTERN (line 9150) | SWIGINTERN PyObject *SentencePieceProcessor_swiginit(PyObject *SWIGUNUSE...
function SWIGINTERN (line 9154) | SWIGINTERN PyObject *_wrap_SetRandomGeneratorSeed(PyObject *self, PyObje...
function SWIGINTERN (line 9185) | SWIGINTERN PyObject *_wrap_SetMinLogLevel(PyObject *self, PyObject *args) {
function SWIGINTERN (line 9216) | SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromString(PyObjec...
function SWIGINTERN (line 9249) | SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromMap(PyObject *...
function SWIGINTERN (line 9303) | SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromMap2(PyObject ...
function SWIGINTERN (line 9373) | SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromMap3(PyObject ...
function SWIGINTERN (line 9430) | SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromMap4(PyObject ...
function SWIGINTERN (line 9503) | SWIGINTERN PyObject *SentencePieceTrainer_swigregister(PyObject *SWIGUNU...
function SWIGINTERN (line 9510) | SWIGINTERN PyObject *_wrap_new_SentencePieceNormalizer(PyObject *self, P...
function SWIGINTERN (line 9532) | SWIGINTERN PyObject *_wrap_delete_SentencePieceNormalizer(PyObject *self...
function SWIGINTERN (line 9563) | SWIGINTERN PyObject *_wrap_SentencePieceNormalizer_LoadFromSerializedPro...
function SWIGINTERN (line 9609) | SWIGINTERN PyObject *_wrap_SentencePieceNormalizer_LoadFromRuleTSV(PyObj...
function SWIGINTERN (line 9655) | SWIGINTERN PyObject *_wrap_SentencePieceNormalizer_LoadFromRuleName(PyOb...
function SWIGINTERN (line 9701) | SWIGINTERN PyObject *_wrap_SentencePieceNormalizer_serialized_model_prot...
function SWIGINTERN (line 9736) | SWIGINTERN PyObject *_wrap_SentencePieceNormalizer_LoadFromFile(PyObject...
function SWIGINTERN (line 9782) | SWIGINTERN PyObject *_wrap_SentencePieceNormalizer__Normalize(PyObject *...
function SWIGINTERN (line 9826) | SWIGINTERN PyObject *_wrap_SentencePieceNormalizer__NormalizeWithOffsets...
function SWIGINTERN (line 9877) | SWIGINTERN PyObject *_wrap_SentencePieceNormalizer__SetProtoField(PyObje...
function SWIGINTERN (line 9925) | SWIGINTERN PyObject *SentencePieceNormalizer_swigregister(PyObject *SWIG...
function SWIGINTERN (line 9932) | SWIGINTERN PyObject *SentencePieceNormalizer_swiginit(PyObject *SWIGUNUS...
function SWIGINTERN (line 9936) | SWIGINTERN PyObject *_wrap_SetDataDir(PyObject *self, PyObject *args) {
function SWIGRUNTIME (line 10218) | SWIGRUNTIME void
function SWIGRUNTIME (line 10365) | SWIGRUNTIME void
function SWIGINTERN (line 10407) | SWIGINTERN void
function SWIGINTERN (line 10434) | SWIGINTERN void
FILE: python/test/sentencepiece_test.py
class TestSentencepieceProcessor (line 35) | class TestSentencepieceProcessor(unittest.TestCase):
method setUp (line 38) | def setUp(self):
method test_load (line 48) | def test_load(self):
method test_roundtrip (line 75) | def test_roundtrip(self):
method test_ja_load (line 134) | def test_ja_load(self):
method test_ja_roundtrip (line 157) | def test_ja_roundtrip(self):
method test_train (line 206) | def test_train(self):
method test_train_iterator (line 220) | def test_train_iterator(self):
method test_train_kwargs (line 269) | def test_train_kwargs(self):
method test_serialized_proto (line 289) | def test_serialized_proto(self):
method test_decode_bytes (line 335) | def test_decode_bytes(self):
method test_immutable_proto (line 369) | def test_immutable_proto(self):
method test_new_api (line 488) | def test_new_api(self):
method test_new_api_init (line 583) | def test_new_api_init(self):
method test_sampling (line 597) | def test_sampling(self):
method test_nbest (line 630) | def test_nbest(self):
method test_sample_and_score (line 689) | def test_sample_and_score(self):
method test_valid_range (line 723) | def test_valid_range(self):
method test_batch (line 745) | def test_batch(self):
method test_pickle (line 781) | def test_pickle(self):
method test_global_params (line 795) | def test_global_params(self):
method test_normalize (line 801) | def test_normalize(self):
method test_normalizer (line 844) | def test_normalizer(self):
method test_normalizer_rule (line 906) | def test_normalizer_rule(self):
method test_override_normalize_spec (line 913) | def test_override_normalize_spec(self):
function suite (line 931) | def suite():
FILE: src/bpe_model.cc
type sentencepiece (line 28) | namespace sentencepiece {
type bpe (line 29) | namespace bpe {
type SymbolPair (line 44) | struct SymbolPair {
class SymbolPairComparator (line 51) | class SymbolPairComparator {
type Symbol (line 59) | struct Symbol {
FILE: src/bpe_model.h
function namespace (line 21) | namespace sentencepiece {
FILE: src/bpe_model_test.cc
type sentencepiece (line 22) | namespace sentencepiece {
type bpe (line 23) | namespace bpe {
function ModelProto (line 26) | ModelProto MakeBaseModelProto() {
function AddPiece (line 42) | void AddPiece(ModelProto *model_proto, const std::string &piece,
function TEST (line 49) | TEST(BPEModelTest, EncodeTest) {
function TEST (line 143) | TEST(BPEModelTest, EncodeAmbiguousTest) {
function TEST (line 189) | TEST(BPEModelTest, NotSupportedTest) {
function TEST (line 195) | TEST(BPEModelTest, EncodeWithUnusedTest) {
function TEST (line 252) | TEST(SampleModelTest, EncodeTest) {
FILE: src/bpe_model_trainer.cc
type sentencepiece (line 28) | namespace sentencepiece {
type bpe (line 29) | namespace bpe {
FILE: src/bpe_model_trainer.h
function namespace (line 28) | namespace sentencepiece {
FILE: src/bpe_model_trainer_test.cc
type sentencepiece (line 27) | namespace sentencepiece {
type bpe (line 28) | namespace bpe {
function RunTrainer (line 34) | std::string RunTrainer(
function TEST (line 81) | TEST(BPETrainerTest, BasicTest) {
function TEST (line 94) | TEST(BPETrainerTest, EndToEndTest) {
FILE: src/builder.cc
type sentencepiece (line 47) | namespace sentencepiece {
type normalizer (line 48) | namespace normalizer {
function UnicodeNormalize (line 62) | Builder::Chars UnicodeNormalize(UNormalizationMode mode,
function ToNFKD (line 79) | Builder::Chars ToNFKD(const Builder::Chars &input) {
function ToNFKC (line 83) | Builder::Chars ToNFKC(const Builder::Chars &input) {
function ToNFC (line 87) | Builder::Chars ToNFC(const Builder::Chars &input) {
function ToNFD (line 91) | Builder::Chars ToNFD(const Builder::Chars &input) {
function ExpandUnnormalized (line 98) | std::vector<Builder::Chars> ExpandUnnormalized(
function Normalize (line 124) | Builder::Chars Normalize(const Builder::CharsMap &chars_map,
function IsValidNormalizerData (line 156) | util::Status IsValidNormalizerData(absl::string_view blob_data) {
function BuildMapInternal (line 332) | util::Status BuildMapInternal(
FILE: src/builder.h
function namespace (line 27) | namespace sentencepiece {
FILE: src/builder_test.cc
type sentencepiece (line 25) | namespace sentencepiece {
type normalizer (line 26) | namespace normalizer {
function TEST (line 31) | TEST(BuilderTest, RemoveRedundantMapTest) {
function TEST (line 48) | TEST(BuilderTest, GetPrecompiledCharsMapWithInvalidNameTest) {
function TEST (line 54) | TEST(BuilderTest, BuildNFKCMapTest) {
function TEST (line 64) | TEST(BuilderTest, GetPrecompiledCharsMapTest) {
function TEST (line 103) | TEST(BuilderTest, CompileCharsMap) {
function TEST (line 145) | TEST(BuilderTest, LoadCharsMapTest) {
function TEST (line 179) | TEST(BuilderTest, LoadCharsMapWithEmptyeTest) {
function TEST (line 211) | TEST(BuilderTest, ContainsTooManySharedPrefixTest) {
FILE: src/builtin_pb/sentencepiece.pb.cc
type sentencepiece (line 16) | namespace sentencepiece {
class SentencePieceText_SentencePieceDefaultTypeInternal (line 17) | class SentencePieceText_SentencePieceDefaultTypeInternal {
class SentencePieceTextDefaultTypeInternal (line 21) | class SentencePieceTextDefaultTypeInternal {
class NBestSentencePieceTextDefaultTypeInternal (line 25) | class NBestSentencePieceTextDefaultTypeInternal {
class SentencePieceText_SentencePiece::_Internal (line 75) | class SentencePieceText_SentencePiece::_Internal {
method set_has_piece (line 78) | static void set_has_piece(HasBits* has_bits) {
method set_has_id (line 81) | static void set_has_id(HasBits* has_bits) {
method set_has_surface (line 84) | static void set_has_surface(HasBits* has_bits) {
method set_has_begin (line 87) | static void set_has_begin(HasBits* has_bits) {
method set_has_end (line 90) | static void set_has_end(HasBits* has_bits) {
function SentencePieceText_SentencePiece (line 154) | const SentencePieceText_SentencePiece& SentencePieceText_SentencePiece...
class SentencePieceText::_Internal (line 439) | class SentencePieceText::_Internal {
method set_has_text (line 442) | static void set_has_text(HasBits* has_bits) {
method set_has_score (line 445) | static void set_has_score(HasBits* has_bits) {
function SentencePieceText (line 499) | const SentencePieceText& SentencePieceText::default_instance() {
class NBestSentencePieceText::_Internal (line 726) | class NBestSentencePieceText::_Internal {
function NBestSentencePieceText (line 767) | const NBestSentencePieceText& NBestSentencePieceText::default_instance...
function InitDefaultsscc_info_NBestSentencePieceText_sentencepiece_2eproto (line 30) | static void InitDefaultsscc_info_NBestSentencePieceText_sentencepiece_2e...
function InitDefaultsscc_info_SentencePieceText_sentencepiece_2eproto (line 44) | static void InitDefaultsscc_info_SentencePieceText_sentencepiece_2eproto...
function InitDefaultsscc_info_SentencePieceText_SentencePiece_sentencepiece_2eproto (line 58) | static void InitDefaultsscc_info_SentencePieceText_SentencePiece_sentenc...
type sentencepiece (line 71) | namespace sentencepiece {
class SentencePieceText_SentencePieceDefaultTypeInternal (line 17) | class SentencePieceText_SentencePieceDefaultTypeInternal {
class SentencePieceTextDefaultTypeInternal (line 21) | class SentencePieceTextDefaultTypeInternal {
class NBestSentencePieceTextDefaultTypeInternal (line 25) | class NBestSentencePieceTextDefaultTypeInternal {
class SentencePieceText_SentencePiece::_Internal (line 75) | class SentencePieceText_SentencePiece::_Internal {
method set_has_piece (line 78) | static void set_has_piece(HasBits* has_bits) {
method set_has_id (line 81) | static void set_has_id(HasBits* has_bits) {
method set_has_surface (line 84) | static void set_has_surface(HasBits* has_bits) {
method set_has_begin (line 87) | static void set_has_begin(HasBits* has_bits) {
method set_has_end (line 90) | static void set_has_end(HasBits* has_bits) {
function SentencePieceText_SentencePiece (line 154) | const SentencePieceText_SentencePiece& SentencePieceText_SentencePiece...
class SentencePieceText::_Internal (line 439) | class SentencePieceText::_Internal {
method set_has_text (line 442) | static void set_has_text(HasBits* has_bits) {
method set_has_score (line 445) | static void set_has_score(HasBits* has_bits) {
function SentencePieceText (line 499) | const SentencePieceText& SentencePieceText::default_instance() {
class NBestSentencePieceText::_Internal (line 726) | class NBestSentencePieceText::_Internal {
function NBestSentencePieceText (line 767) | const NBestSentencePieceText& NBestSentencePieceText::default_instance...
function PROTOBUF_NAMESPACE_OPEN (line 910) | PROTOBUF_NAMESPACE_OPEN
FILE: src/builtin_pb/sentencepiece.pb.h
function PROTOBUF_NAMESPACE_OPEN (line 35) | PROTOBUF_NAMESPACE_OPEN
function PROTOBUF_NAMESPACE_CLOSE (line 39) | PROTOBUF_NAMESPACE_CLOSE
function namespace (line 53) | namespace sentencepiece {
function PROTOBUF_NAMESPACE_CLOSE (line 68) | PROTOBUF_NAMESPACE_CLOSE
function std (line 101) | inline std::string* mutable_unknown_fields() {
function SentencePieceText_SentencePiece (line 107) | static inline const SentencePieceText_SentencePiece* internal_default_in...
function Swap (line 117) | inline void Swap(SentencePieceText_SentencePiece* other) {
function UnsafeArenaSwap (line 125) | void UnsafeArenaSwap(SentencePieceText_SentencePiece* other) {
function SentencePieceText_SentencePiece (line 133) | inline SentencePieceText_SentencePiece* New() const final {
function final (line 137) | const final {
function PROTOBUF_ATTRIBUTE_REINITIALIZES (line 144) | PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final;
function PROTOBUF_NAMESPACE_ID (line 160) | static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() {
function std (line 189) | const std::string& piece() const;
function std (line 311) | inline std::string* mutable_unknown_fields() {
function SentencePieceText (line 317) | static inline const SentencePieceText* internal_default_instance() {
function Swap (line 327) | inline void Swap(SentencePieceText* other) {
function UnsafeArenaSwap (line 335) | void UnsafeArenaSwap(SentencePieceText* other) {
function SentencePieceText (line 343) | inline SentencePieceText* New() const final {
function final (line 347) | const final {
function PROTOBUF_ATTRIBUTE_REINITIALIZES (line 354) | PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final;
function PROTOBUF_NAMESPACE_ID (line 370) | static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() {
type SentencePieceText_SentencePiece (line 384) | typedef SentencePieceText_SentencePiece SentencePiece;
function PROTOBUF_NAMESPACE_ID (line 408) | const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::Senten...
function std (line 491) | inline std::string* mutable_unknown_fields() {
function NBestSentencePieceText (line 497) | static inline const NBestSentencePieceText* internal_default_instance() {
function Swap (line 507) | inline void Swap(NBestSentencePieceText* other) {
function UnsafeArenaSwap (line 515) | void UnsafeArenaSwap(NBestSentencePieceText* other) {
function NBestSentencePieceText (line 523) | inline NBestSentencePieceText* New() const final {
function final (line 527) | const final {
function PROTOBUF_ATTRIBUTE_REINITIALIZES (line 534) | PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final;
function PROTOBUF_NAMESPACE_ID (line 550) | static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() {
function _internal_has_piece (line 610) | inline bool SentencePieceText_SentencePiece::_internal_has_piece() const {
function clear_piece (line 617) | inline void SentencePieceText_SentencePiece::clear_piece() {
function set_piece (line 625) | inline void SentencePieceText_SentencePiece::set_piece(const std::string...
function std (line 629) | inline std::string* SentencePieceText_SentencePiece::mutable_piece() {
function _internal_set_piece (line 636) | inline void SentencePieceText_SentencePiece::_internal_set_piece(const s...
function set_piece (line 640) | inline void SentencePieceText_SentencePiece::set_piece(std::string&& val...
function set_piece (line 646) | inline void SentencePieceText_SentencePiece::set_piece(const char* value) {
function set_piece (line 652) | inline void SentencePieceText_SentencePiece::set_piece(const char* value,
function std (line 659) | inline std::string* SentencePieceText_SentencePiece::_internal_mutable_p...
function std (line 663) | inline std::string* SentencePieceText_SentencePiece::release_piece() {
function set_allocated_piece (line 671) | inline void SentencePieceText_SentencePiece::set_allocated_piece(std::st...
function _internal_has_id (line 683) | inline bool SentencePieceText_SentencePiece::_internal_has_id() const {
function clear_id (line 690) | inline void SentencePieceText_SentencePiece::clear_id() {
function _internal_set_id (line 701) | inline void SentencePieceText_SentencePiece::_internal_set_id(::PROTOBUF...
function set_id (line 705) | inline void SentencePieceText_SentencePiece::set_id(::PROTOBUF_NAMESPACE...
function _internal_has_surface (line 711) | inline bool SentencePieceText_SentencePiece::_internal_has_surface() con...
function clear_surface (line 718) | inline void SentencePieceText_SentencePiece::clear_surface() {
function set_surface (line 726) | inline void SentencePieceText_SentencePiece::set_surface(const std::stri...
function std (line 730) | inline std::string* SentencePieceText_SentencePiece::mutable_surface() {
function _internal_set_surface (line 737) | inline void SentencePieceText_SentencePiece::_internal_set_surface(const...
function set_surface (line 741) | inline void SentencePieceText_SentencePiece::set_surface(std::string&& v...
function set_surface (line 747) | inline void SentencePieceText_SentencePiece::set_surface(const char* val...
function set_surface (line 753) | inline void SentencePieceText_SentencePiece::set_surface(const char* value,
function std (line 760) | inline std::string* SentencePieceText_SentencePiece::_internal_mutable_s...
function std (line 764) | inline std::string* SentencePieceText_SentencePiece::release_surface() {
function set_allocated_surface (line 772) | inline void SentencePieceText_SentencePiece::set_allocated_surface(std::...
function _internal_has_begin (line 784) | inline bool SentencePieceText_SentencePiece::_internal_has_begin() const {
function clear_begin (line 791) | inline void SentencePieceText_SentencePiece::clear_begin() {
function _internal_set_begin (line 802) | inline void SentencePieceText_SentencePiece::_internal_set_begin(::PROTO...
function set_begin (line 806) | inline void SentencePieceText_SentencePiece::set_begin(::PROTOBUF_NAMESP...
function _internal_has_end (line 812) | inline bool SentencePieceText_SentencePiece::_internal_has_end() const {
function clear_end (line 819) | inline void SentencePieceText_SentencePiece::clear_end() {
function _internal_set_end (line 830) | inline void SentencePieceText_SentencePiece::_internal_set_end(::PROTOBU...
function set_end (line 834) | inline void SentencePieceText_SentencePiece::set_end(::PROTOBUF_NAMESPAC...
function _internal_has_text (line 844) | inline bool SentencePieceText::_internal_has_text() const {
function clear_text (line 851) | inline void SentencePieceText::clear_text() {
function set_text (line 859) | inline void SentencePieceText::set_text(const std::string& value) {
function std (line 863) | inline std::string* SentencePieceText::mutable_text() {
function _internal_set_text (line 870) | inline void SentencePieceText::_internal_set_text(const std::string& val...
function set_text (line 874) | inline void SentencePieceText::set_text(std::string&& value) {
function set_text (line 880) | inline void SentencePieceText::set_text(const char* value) {
function set_text (line 886) | inline void SentencePieceText::set_text(const char* value,
function std (line 893) | inline std::string* SentencePieceText::_internal_mutable_text() {
function std (line 897) | inline std::string* SentencePieceText::release_text() {
function set_allocated_text (line 905) | inline void SentencePieceText::set_allocated_text(std::string* text) {
function clear_pieces (line 923) | inline void SentencePieceText::clear_pieces() {
function sentencepiece (line 926) | inline ::sentencepiece::SentencePieceText_SentencePiece* SentencePieceTe...
function PROTOBUF_NAMESPACE_ID (line 930) | inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::Sente...
function sentencepiece (line 935) | inline const ::sentencepiece::SentencePieceText_SentencePiece& SentenceP...
function sentencepiece (line 938) | inline const ::sentencepiece::SentencePieceText_SentencePiece& SentenceP...
function sentencepiece (line 942) | inline ::sentencepiece::SentencePieceText_SentencePiece* SentencePieceTe...
function sentencepiece (line 945) | inline ::sentencepiece::SentencePieceText_SentencePiece* SentencePieceTe...
function PROTOBUF_NAMESPACE_ID (line 949) | inline const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece:...
function clear_score (line 963) | inline void SentencePieceText::clear_score() {
function _internal_set_score (line 974) | inline void SentencePieceText::_internal_set_score(float value) {
function set_score (line 978) | inline void SentencePieceText::set_score(float value) {
function clear_nbests (line 994) | inline void NBestSentencePieceText::clear_nbests() {
function sentencepiece (line 997) | inline ::sentencepiece::SentencePieceText* NBestSentencePieceText::mutab...
function PROTOBUF_NAMESPACE_ID (line 1001) | inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::Sente...
function sentencepiece (line 1006) | inline const ::sentencepiece::SentencePieceText& NBestSentencePieceText:...
function sentencepiece (line 1009) | inline const ::sentencepiece::SentencePieceText& NBestSentencePieceText:...
function sentencepiece (line 1013) | inline ::sentencepiece::SentencePieceText* NBestSentencePieceText::_inte...
function sentencepiece (line 1016) | inline ::sentencepiece::SentencePieceText* NBestSentencePieceText::add_n...
FILE: src/builtin_pb/sentencepiece_model.pb.cc
type sentencepiece (line 19) | namespace sentencepiece {
class TrainerSpecDefaultTypeInternal (line 20) | class TrainerSpecDefaultTypeInternal {
class NormalizerSpecDefaultTypeInternal (line 24) | class NormalizerSpecDefaultTypeInternal {
class SelfTestData_SampleDefaultTypeInternal (line 28) | class SelfTestData_SampleDefaultTypeInternal {
class SelfTestDataDefaultTypeInternal (line 32) | class SelfTestDataDefaultTypeInternal {
class ModelProto_SentencePieceDefaultTypeInternal (line 36) | class ModelProto_SentencePieceDefaultTypeInternal {
class ModelProtoDefaultTypeInternal (line 40) | class ModelProtoDefaultTypeInternal {
function TrainerSpec_ModelType_IsValid (line 129) | bool TrainerSpec_ModelType_IsValid(int value) {
function TrainerSpec_ModelType_Parse (line 178) | bool TrainerSpec_ModelType_Parse(
function ModelProto_SentencePiece_Type_IsValid (line 197) | bool ModelProto_SentencePiece_Type_IsValid(int value) {
function ModelProto_SentencePiece_Type_Parse (line 254) | bool ModelProto_SentencePiece_Type_Parse(
class TrainerSpec::_Internal (line 278) | class TrainerSpec::_Internal {
method set_has_input_format (line 281) | static void set_has_input_format(HasBits* has_bits) {
method set_has_model_prefix (line 284) | static void set_has_model_prefix(HasBits* has_bits) {
method set_has_model_type (line 287) | static void set_has_model_type(HasBits* has_bits) {
method set_has_vocab_size (line 290) | static void set_has_vocab_size(HasBits* has_bits) {
method set_has_self_test_sample_size (line 293) | static void set_has_self_test_sample_size(HasBits* has_bits) {
method set_has_enable_differential_privacy (line 296) | static void set_has_enable_differential_privacy(HasBits* has_bits) {
method set_has_differential_privacy_noise_level (line 299) | static void set_has_differential_privacy_noise_level(HasBits* has_bi...
method set_has_differential_privacy_clipping_threshold (line 302) | static void set_has_differential_privacy_clipping_threshold(HasBits*...
method set_has_character_coverage (line 305) | static void set_has_character_coverage(HasBits* has_bits) {
method set_has_input_sentence_size (line 308) | static void set_has_input_sentence_size(HasBits* has_bits) {
method set_has_shuffle_input_sentence (line 311) | static void set_has_shuffle_input_sentence(HasBits* has_bits) {
method set_has_mining_sentence_size (line 314) | static void set_has_mining_sentence_size(HasBits* has_bits) {
method set_has_training_sentence_size (line 317) | static void set_has_training_sentence_size(HasBits* has_bits) {
method set_has_seed_sentencepiece_size (line 320) | static void set_has_seed_sentencepiece_size(HasBits* has_bits) {
method set_has_shrinking_factor (line 323) | static void set_has_shrinking_factor(HasBits* has_bits) {
method set_has_max_sentence_length (line 326) | static void set_has_max_sentence_length(HasBits* has_bits) {
method set_has_num_threads (line 329) | static void set_has_num_threads(HasBits* has_bits) {
method set_has_num_sub_iterations (line 332) | static void set_has_num_sub_iterations(HasBits* has_bits) {
method set_has_max_sentencepiece_length (line 335) | static void set_has_max_sentencepiece_length(HasBits* has_bits) {
method set_has_split_by_unicode_script (line 338) | static void set_has_split_by_unicode_script(HasBits* has_bits) {
method set_has_split_by_number (line 341) | static void set_has_split_by_number(HasBits* has_bits) {
method set_has_split_by_whitespace (line 344) | static void set_has_split_by_whitespace(HasBits* has_bits) {
method set_has_treat_whitespace_as_suffix (line 347) | static void set_has_treat_whitespace_as_suffix(HasBits* has_bits) {
method set_has_allow_whitespace_only_pieces (line 350) | static void set_has_allow_whitespace_only_pieces(HasBits* has_bits) {
method set_has_split_digits (line 353) | static void set_has_split_digits(HasBits* has_bits) {
method set_has_pretokenization_delimiter (line 356) | static void set_has_pretokenization_delimiter(HasBits* has_bits) {
method set_has_required_chars (line 359) | static void set_has_required_chars(HasBits* has_bits) {
method set_has_byte_fallback (line 362) | static void set_has_byte_fallback(HasBits* has_bits) {
method set_has_vocabulary_output_piece_score (line 365) | static void set_has_vocabulary_output_piece_score(HasBits* has_bits) {
method set_has_hard_vocab_limit (line 368) | static void set_has_hard_vocab_limit(HasBits* has_bits) {
method set_has_use_all_vocab (line 371) | static void set_has_use_all_vocab(HasBits* has_bits) {
method set_has_unk_id (line 374) | static void set_has_unk_id(HasBits* has_bits) {
method set_has_bos_id (line 377) | static void set_has_bos_id(HasBits* has_bits) {
method set_has_eos_id (line 380) | static void set_has_eos_id(HasBits* has_bits) {
method set_has_pad_id (line 383) | static void set_has_pad_id(HasBits* has_bits) {
method set_has_unk_piece (line 386) | static void set_has_unk_piece(HasBits* has_bits) {
method set_has_bos_piece (line 389) | static void set_has_bos_piece(HasBits* has_bits) {
method set_has_eos_piece (line 392) | static void set_has_eos_piece(HasBits* has_bits) {
method set_has_pad_piece (line 395) | static void set_has_pad_piece(HasBits* has_bits) {
method set_has_unk_surface (line 398) | static void set_has_unk_surface(HasBits* has_bits) {
method set_has_train_extremely_large_corpus (line 401) | static void set_has_train_extremely_large_corpus(HasBits* has_bits) {
method set_has_seed_sentencepieces_file (line 404) | static void set_has_seed_sentencepieces_file(HasBits* has_bits) {
function TrainerSpec (line 555) | const TrainerSpec& TrainerSpec::default_instance() {
class NormalizerSpec::_Internal (line 1933) | class NormalizerSpec::_Internal {
method set_has_name (line 1936) | static void set_has_name(HasBits* has_bits) {
method set_has_precompiled_charsmap (line 1939) | static void set_has_precompiled_charsmap(HasBits* has_bits) {
method set_has_add_dummy_prefix (line 1942) | static void set_has_add_dummy_prefix(HasBits* has_bits) {
method set_has_remove_extra_whitespaces (line 1945) | static void set_has_remove_extra_whitespaces(HasBits* has_bits) {
method set_has_escape_whitespaces (line 1948) | static void set_has_escape_whitespaces(HasBits* has_bits) {
method set_has_normalization_rule_tsv (line 1951) | static void set_has_normalization_rule_tsv(HasBits* has_bits) {
function NormalizerSpec (line 2021) | const NormalizerSpec& NormalizerSpec::default_instance() {
class SelfTestData_Sample::_Internal (line 2323) | class SelfTestData_Sample::_Internal {
method set_has_input (line 2326) | static void set_has_input(HasBits* has_bits) {
method set_has_expected (line 2329) | static void set_has_expected(HasBits* has_bits) {
function SelfTestData_Sample (line 2384) | const SelfTestData_Sample& SelfTestData_Sample::default_instance() {
class SelfTestData::_Internal (line 2566) | class SelfTestData::_Internal {
function SelfTestData (line 2609) | const SelfTestData& SelfTestData::default_instance() {
class ModelProto_SentencePiece::_Internal (line 2770) | class ModelProto_SentencePiece::_Internal {
method set_has_piece (line 2773) | static void set_has_piece(HasBits* has_bits) {
method set_has_score (line 2776) | static void set_has_score(HasBits* has_bits) {
method set_has_type (line 2779) | static void set_has_type(HasBits* has_bits) {
function ModelProto_SentencePiece (line 2834) | const ModelProto_SentencePiece& ModelProto_SentencePiece::default_inst...
class ModelProto::_Internal (line 3062) | class ModelProto::_Internal {
method set_has_trainer_spec (line 3066) | static void set_has_trainer_spec(HasBits* has_bits) {
method set_has_normalizer_spec (line 3070) | static void set_has_normalizer_spec(HasBits* has_bits) {
method set_has_self_test_data (line 3074) | static void set_has_self_test_data(HasBits* has_bits) {
method set_has_denormalizer_spec (line 3078) | static void set_has_denormalizer_spec(HasBits* has_bits) {
function ModelProto (line 3167) | const ModelProto& ModelProto::default_instance() {
function InitDefaultsscc_info_ModelProto_sentencepiece_5fmodel_2eproto (line 45) | static void InitDefaultsscc_info_ModelProto_sentencepiece_5fmodel_2eprot...
function InitDefaultsscc_info_ModelProto_SentencePiece_sentencepiece_5fmodel_2eproto (line 62) | static void InitDefaultsscc_info_ModelProto_SentencePiece_sentencepiece_...
function InitDefaultsscc_info_NormalizerSpec_sentencepiece_5fmodel_2eproto (line 75) | static void InitDefaultsscc_info_NormalizerSpec_sentencepiece_5fmodel_2e...
function InitDefaultsscc_info_SelfTestData_sentencepiece_5fmodel_2eproto (line 88) | static void InitDefaultsscc_info_SelfTestData_sentencepiece_5fmodel_2epr...
function InitDefaultsscc_info_SelfTestData_Sample_sentencepiece_5fmodel_2eproto (line 102) | static void InitDefaultsscc_info_SelfTestData_Sample_sentencepiece_5fmod...
function InitDefaultsscc_info_TrainerSpec_sentencepiece_5fmodel_2eproto (line 115) | static void InitDefaultsscc_info_TrainerSpec_sentencepiece_5fmodel_2epro...
type sentencepiece (line 128) | namespace sentencepiece {
class TrainerSpecDefaultTypeInternal (line 20) | class TrainerSpecDefaultTypeInternal {
class NormalizerSpecDefaultTypeInternal (line 24) | class NormalizerSpecDefaultTypeInternal {
class SelfTestData_SampleDefaultTypeInternal (line 28) | class SelfTestData_SampleDefaultTypeInternal {
class SelfTestDataDefaultTypeInternal (line 32) | class SelfTestDataDefaultTypeInternal {
class ModelProto_SentencePieceDefaultTypeInternal (line 36) | class ModelProto_SentencePieceDefaultTypeInternal {
class ModelProtoDefaultTypeInternal (line 40) | class ModelProtoDefaultTypeInternal {
function TrainerSpec_ModelType_IsValid (line 129) | bool TrainerSpec_ModelType_IsValid(int value) {
function TrainerSpec_ModelType_Parse (line 178) | bool TrainerSpec_ModelType_Parse(
function ModelProto_SentencePiece_Type_IsValid (line 197) | bool ModelProto_SentencePiece_Type_IsValid(int value) {
function ModelProto_SentencePiece_Type_Parse (line 254) | bool ModelProto_SentencePiece_Type_Parse(
class TrainerSpec::_Internal (line 278) | class TrainerSpec::_Internal {
method set_has_input_format (line 281) | static void set_has_input_format(HasBits* has_bits) {
method set_has_model_prefix (line 284) | static void set_has_model_prefix(HasBits* has_bits) {
method set_has_model_type (line 287) | static void set_has_model_type(HasBits* has_bits) {
method set_has_vocab_size (line 290) | static void set_has_vocab_size(HasBits* has_bits) {
method set_has_self_test_sample_size (line 293) | static void set_has_self_test_sample_size(HasBits* has_bits) {
method set_has_enable_differential_privacy (line 296) | static void set_has_enable_differential_privacy(HasBits* has_bits) {
method set_has_differential_privacy_noise_level (line 299) | static void set_has_differential_privacy_noise_level(HasBits* has_bi...
method set_has_differential_privacy_clipping_threshold (line 302) | static void set_has_differential_privacy_clipping_threshold(HasBits*...
method set_has_character_coverage (line 305) | static void set_has_character_coverage(HasBits* has_bits) {
method set_has_input_sentence_size (line 308) | static void set_has_input_sentence_size(HasBits* has_bits) {
method set_has_shuffle_input_sentence (line 311) | static void set_has_shuffle_input_sentence(HasBits* has_bits) {
method set_has_mining_sentence_size (line 314) | static void set_has_mining_sentence_size(HasBits* has_bits) {
method set_has_training_sentence_size (line 317) | static void set_has_training_sentence_size(HasBits* has_bits) {
method set_has_seed_sentencepiece_size (line 320) | static void set_has_seed_sentencepiece_size(HasBits* has_bits) {
method set_has_shrinking_factor (line 323) | static void set_has_shrinking_factor(HasBits* has_bits) {
method set_has_max_sentence_length (line 326) | static void set_has_max_sentence_length(HasBits* has_bits) {
method set_has_num_threads (line 329) | static void set_has_num_threads(HasBits* has_bits) {
method set_has_num_sub_iterations (line 332) | static void set_has_num_sub_iterations(HasBits* has_bits) {
method set_has_max_sentencepiece_length (line 335) | static void set_has_max_sentencepiece_length(HasBits* has_bits) {
method set_has_split_by_unicode_script (line 338) | static void set_has_split_by_unicode_script(HasBits* has_bits) {
method set_has_split_by_number (line 341) | static void set_has_split_by_number(HasBits* has_bits) {
method set_has_split_by_whitespace (line 344) | static void set_has_split_by_whitespace(HasBits* has_bits) {
method set_has_treat_whitespace_as_suffix (line 347) | static void set_has_treat_whitespace_as_suffix(HasBits* has_bits) {
method set_has_allow_whitespace_only_pieces (line 350) | static void set_has_allow_whitespace_only_pieces(HasBits* has_bits) {
method set_has_split_digits (line 353) | static void set_has_split_digits(HasBits* has_bits) {
method set_has_pretokenization_delimiter (line 356) | static void set_has_pretokenization_delimiter(HasBits* has_bits) {
method set_has_required_chars (line 359) | static void set_has_required_chars(HasBits* has_bits) {
method set_has_byte_fallback (line 362) | static void set_has_byte_fallback(HasBits* has_bits) {
method set_has_vocabulary_output_piece_score (line 365) | static void set_has_vocabulary_output_piece_score(HasBits* has_bits) {
method set_has_hard_vocab_limit (line 368) | static void set_has_hard_vocab_limit(HasBits* has_bits) {
method set_has_use_all_vocab (line 371) | static void set_has_use_all_vocab(HasBits* has_bits) {
method set_has_unk_id (line 374) | static void set_has_unk_id(HasBits* has_bits) {
method set_has_bos_id (line 377) | static void set_has_bos_id(HasBits* has_bits) {
method set_has_eos_id (line 380) | static void set_has_eos_id(HasBits* has_bits) {
method set_has_pad_id (line 383) | static void set_has_pad_id(HasBits* has_bits) {
method set_has_unk_piece (line 386) | static void set_has_unk_piece(HasBits* has_bits) {
method set_has_bos_piece (line 389) | static void set_has_bos_piece(HasBits* has_bits) {
method set_has_eos_piece (line 392) | static void set_has_eos_piece(HasBits* has_bits) {
method set_has_pad_piece (line 395) | static void set_has_pad_piece(HasBits* has_bits) {
method set_has_unk_surface (line 398) | static void set_has_unk_surface(HasBits* has_bits) {
method set_has_train_extremely_large_corpus (line 401) | static void set_has_train_extremely_large_corpus(HasBits* has_bits) {
method set_has_seed_sentencepieces_file (line 404) | static void set_has_seed_sentencepieces_file(HasBits* has_bits) {
function TrainerSpec (line 555) | const TrainerSpec& TrainerSpec::default_instance() {
class NormalizerSpec::_Internal (line 1933) | class NormalizerSpec::_Internal {
method set_has_name (line 1936) | static void set_has_name(HasBits* has_bits) {
method set_has_precompiled_charsmap (line 1939) | static void set_has_precompiled_charsmap(HasBits* has_bits) {
method set_has_add_dummy_prefix (line 1942) | static void set_has_add_dummy_prefix(HasBits* has_bits) {
method set_has_remove_extra_whitespaces (line 1945) | static void set_has_remove_extra_whitespaces(HasBits* has_bits) {
method set_has_escape_whitespaces (line 1948) | static void set_has_escape_whitespaces(HasBits* has_bits) {
method set_has_normalization_rule_tsv (line 1951) | static void set_has_normalization_rule_tsv(HasBits* has_bits) {
function NormalizerSpec (line 2021) | const NormalizerSpec& NormalizerSpec::default_instance() {
class SelfTestData_Sample::_Internal (line 2323) | class SelfTestData_Sample::_Internal {
method set_has_input (line 2326) | static void set_has_input(HasBits* has_bits) {
method set_has_expected (line 2329) | static void set_has_expected(HasBits* has_bits) {
function SelfTestData_Sample (line 2384) | const SelfTestData_Sample& SelfTestData_Sample::default_instance() {
class SelfTestData::_Internal (line 2566) | class SelfTestData::_Internal {
function SelfTestData (line 2609) | const SelfTestData& SelfTestData::default_instance() {
class ModelProto_SentencePiece::_Internal (line 2770) | class ModelProto_SentencePiece::_Internal {
method set_has_piece (line 2773) | static void set_has_piece(HasBits* has_bits) {
method set_has_score (line 2776) | static void set_has_score(HasBits* has_bits) {
method set_has_type (line 2779) | static void set_has_type(HasBits* has_bits) {
function ModelProto_SentencePiece (line 2834) | const ModelProto_SentencePiece& ModelProto_SentencePiece::default_inst...
class ModelProto::_Internal (line 3062) | class ModelProto::_Internal {
method set_has_trainer_spec (line 3066) | static void set_has_trainer_spec(HasBits* has_bits) {
method set_has_normalizer_spec (line 3070) | static void set_has_normalizer_spec(HasBits* has_bits) {
method set_has_self_test_data (line 3074) | static void set_has_self_test_data(HasBits* has_bits) {
method set_has_denormalizer_spec (line 3078) | static void set_has_denormalizer_spec(HasBits* has_bits) {
function ModelProto (line 3167) | const ModelProto& ModelProto::default_instance() {
function PROTOBUF_NAMESPACE_OPEN (line 3477) | PROTOBUF_NAMESPACE_OPEN
FILE: src/builtin_pb/sentencepiece_model.pb.h
function PROTOBUF_NAMESPACE_OPEN (line 36) | PROTOBUF_NAMESPACE_OPEN
function PROTOBUF_NAMESPACE_CLOSE (line 40) | PROTOBUF_NAMESPACE_CLOSE
function namespace (line 54) | namespace sentencepiece {
function PROTOBUF_NAMESPACE_CLOSE (line 81) | PROTOBUF_NAMESPACE_CLOSE
function std (line 158) | inline std::string* mutable_unknown_fields() {
function TrainerSpec (line 164) | static inline const TrainerSpec* internal_default_instance() {
function Swap (line 174) | inline void Swap(TrainerSpec* other) {
function UnsafeArenaSwap (line 182) | void UnsafeArenaSwap(TrainerSpec* other) {
function TrainerSpec (line 190) | inline TrainerSpec* New() const final {
function final (line 194) | const final {
function PROTOBUF_ATTRIBUTE_REINITIALIZES (line 201) | PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final;
function PROTOBUF_NAMESPACE_ID (line 217) | static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() {
type TrainerSpec_ModelType (line 231) | typedef TrainerSpec_ModelType ModelType;
function ModelType_IsValid (line 240) | static inline bool ModelType_IsValid(int value) {
function std (line 250) | inline const std::string& ModelType_Name(T enum_t_value) {
function ModelType_Parse (line 256) | static inline bool ModelType_Parse(::PROTOBUF_NAMESPACE_ID::ConstStringP...
function PROTOBUF_NAMESPACE_ID (line 328) | const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField<std::string>& input() co...
function std (line 1118) | inline std::string* mutable_unknown_fields() {
function NormalizerSpec (line 1124) | static inline const NormalizerSpec* internal_default_instance() {
function Swap (line 1134) | inline void Swap(NormalizerSpec* other) {
function UnsafeArenaSwap (line 1142) | void UnsafeArenaSwap(NormalizerSpec* other) {
function NormalizerSpec (line 1150) | inline NormalizerSpec* New() const final {
function final (line 1154) | const final {
function PROTOBUF_ATTRIBUTE_REINITIALIZES (line 1161) | PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final;
function PROTOBUF_NAMESPACE_ID (line 1177) | static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() {
function std (line 1207) | const std::string& name() const;
function std (line 1350) | inline std::string* mutable_unknown_fields() {
function SelfTestData_Sample (line 1356) | static inline const SelfTestData_Sample* internal_default_instance() {
function Swap (line 1366) | inline void Swap(SelfTestData_Sample* other) {
function UnsafeArenaSwap (line 1374) | void UnsafeArenaSwap(SelfTestData_Sample* other) {
function SelfTestData_Sample (line 1382) | inline SelfTestData_Sample* New() const final {
function final (line 1386) | const final {
function PROTOBUF_ATTRIBUTE_REINITIALIZES (line 1393) | PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final;
function PROTOBUF_NAMESPACE_ID (line 1409) | static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() {
function std (line 1435) | const std::string& input() const;
function std (line 1512) | inline std::string* mutable_unknown_fields() {
function SelfTestData (line 1518) | static inline const SelfTestData* internal_default_instance() {
function Swap (line 1528) | inline void Swap(SelfTestData* other) {
function UnsafeArenaSwap (line 1536) | void UnsafeArenaSwap(SelfTestData* other) {
function SelfTestData (line 1544) | inline SelfTestData* New() const final {
function final (line 1548) | const final {
function PROTOBUF_ATTRIBUTE_REINITIALIZES (line 1555) | PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final;
function PROTOBUF_NAMESPACE_ID (line 1571) | static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() {
type SelfTestData_Sample (line 1585) | typedef SelfTestData_Sample Sample;
function PROTOBUF_NAMESPACE_ID (line 1607) | const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SelfTe...
function std (line 1654) | inline std::string* mutable_unknown_fields() {
function ModelProto_SentencePiece (line 1660) | static inline const ModelProto_SentencePiece* internal_default_instance() {
function Swap (line 1670) | inline void Swap(ModelProto_SentencePiece* other) {
function UnsafeArenaSwap (line 1678) | void UnsafeArenaSwap(ModelProto_SentencePiece* other) {
function ModelProto_SentencePiece (line 1686) | inline ModelProto_SentencePiece* New() const final {
function final (line 1690) | const final {
function PROTOBUF_ATTRIBUTE_REINITIALIZES (line 1697) | PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final;
function PROTOBUF_NAMESPACE_ID (line 1713) | static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() {
type ModelProto_SentencePiece_Type (line 1727) | typedef ModelProto_SentencePiece_Type Type;
function Type_IsValid (line 1740) | static inline bool Type_IsValid(int value) {
function std (line 1750) | inline const std::string& Type_Name(T enum_t_value) {
function Type_Parse (line 1756) | static inline bool Type_Parse(::PROTOBUF_NAMESPACE_ID::ConstStringParam ...
function std (line 1774) | const std::string& piece() const;
function std (line 1861) | inline std::string* mutable_unknown_fields() {
function ModelProto (line 1867) | static inline const ModelProto* internal_default_instance() {
function Swap (line 1877) | inline void Swap(ModelProto* other) {
function UnsafeArenaSwap (line 1885) | void UnsafeArenaSwap(ModelProto* other) {
function ModelProto (line 1893) | inline ModelProto* New() const final {
function final (line 1897) | const final {
function PROTOBUF_ATTRIBUTE_REINITIALIZES (line 1904) | PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final;
function PROTOBUF_NAMESPACE_ID (line 1920) | static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() {
type ModelProto_SentencePiece (line 1934) | typedef ModelProto_SentencePiece SentencePiece;
function clear_input (line 2072) | inline void TrainerSpec::clear_input() {
function std (line 2075) | inline std::string* TrainerSpec::add_input() {
function std (line 2079) | inline const std::string& TrainerSpec::_internal_input(int index) const {
function std (line 2082) | inline const std::string& TrainerSpec::input(int index) const {
function std (line 2086) | inline std::string* TrainerSpec::mutable_input(int index) {
function set_input (line 2090) | inline void TrainerSpec::set_input(int index, const std::string& value) {
function set_input (line 2094) | inline void TrainerSpec::set_input(int index, std::string&& value) {
function set_input (line 2098) | inline void TrainerSpec::set_input(int index, const char* value) {
function set_input (line 2103) | inline void TrainerSpec::set_input(int index, const char* value, size_t ...
function std (line 2108) | inline std::string* TrainerSpec::_internal_add_input() {
function add_input (line 2111) | inline void TrainerSpec::add_input(const std::string& value) {
function add_input (line 2115) | inline void TrainerSpec::add_input(std::string&& value) {
function add_input (line 2119) | inline void TrainerSpec::add_input(const char* value) {
function add_input (line 2124) | inline void TrainerSpec::add_input(const char* value, size_t size) {
function PROTOBUF_NAMESPACE_ID (line 2133) | inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField<std::string>*
function _internal_has_input_format (line 2140) | inline bool TrainerSpec::_internal_has_input_format() const {
function clear_input_format (line 2147) | inline void TrainerSpec::clear_input_format() {
function set_input_format (line 2155) | inline void TrainerSpec::set_input_format(const std::string& value) {
function std (line 2159) | inline std::string* TrainerSpec::mutable_input_format() {
function _internal_set_input_format (line 2166) | inline void TrainerSpec::_internal_set_input_format(const std::string& v...
function set_input_format (line 2170) | inline void TrainerSpec::set_input_format(std::string&& value) {
function set_input_format (line 2176) | inline void TrainerSpec::set_input_format(const char* value) {
function set_input_format (line 2182) | inline void TrainerSpec::set_input_format(const char* value,
function std (line 2189) | inline std::string* TrainerSpec::_internal_mutable_input_format() {
function std (line 2193) | inline std::string* TrainerSpec::release_input_format() {
function set_allocated_input_format (line 2201) | inline void TrainerSpec::set_allocated_input_format(std::string* input_f...
function _internal_has_model_prefix (line 2213) | inline bool TrainerSpec::_internal_has_model_prefix() const {
function clear_model_prefix (line 2220) | inline void TrainerSpec::clear_model_prefix() {
function set_model_prefix (line 2228) | inline void TrainerSpec::set_model_prefix(const std::string& value) {
function std (line 2232) | inline std::string* TrainerSpec::mutable_model_prefix() {
function _internal_set_model_prefix (line 2239) | inline void TrainerSpec::_internal_set_model_prefix(const std::string& v...
function set_model_prefix (line 2243) | inline void TrainerSpec::set_model_prefix(std::string&& value) {
function set_model_prefix (line 2249) | inline void TrainerSpec::set_model_prefix(const char* value) {
function set_model_prefix (line 2255) | inline void TrainerSpec::set_model_prefix(const char* value,
function std (line 2262) | inline std::string* TrainerSpec::_internal_mutable_model_prefix() {
function std (line 2266) | inline std::string* TrainerSpec::release_model_prefix() {
function set_allocated_model_prefix (line 2274) | inline void TrainerSpec::set_allocated_model_prefix(std::string* model_p...
function _internal_has_model_type (line 2286) | inline bool TrainerSpec::_internal_has_model_type() const {
function clear_model_type (line 2293) | inline void TrainerSpec::clear_model_type() {
function _internal_set_model_type (line 2304) | inline void TrainerSpec::_internal_set_model_type(::sentencepiece::Train...
function set_model_type (line 2309) | inline void TrainerSpec::set_model_type(::sentencepiece::TrainerSpec_Mod...
function _internal_has_vocab_size (line 2315) | inline bool TrainerSpec::_internal_has_vocab_size() const {
function clear_vocab_size (line 2322) | inline void TrainerSpec::clear_vocab_size() {
function _internal_set_vocab_size (line 2333) | inline void TrainerSpec::_internal_set_vocab_size(::PROTOBUF_NAMESPACE_I...
function set_vocab_size (line 2337) | inline void TrainerSpec::set_vocab_size(::PROTOBUF_NAMESPACE_ID::int32 v...
function clear_accept_language (line 2349) | inline void TrainerSpec::clear_accept_language() {
function std (line 2352) | inline std::string* TrainerSpec::add_accept_language() {
function std (line 2356) | inline const std::string& TrainerSpec::_internal_accept_language(int ind...
function std (line 2359) | inline const std::string& TrainerSpec::accept_language(int index) const {
function std (line 2363) | inline std::string* TrainerSpec::mutable_accept_language(int index) {
function set_accept_language (line 2367) | inline void TrainerSpec::set_accept_language(int index, const std::strin...
function set_accept_language (line 2371) | inline void TrainerSpec::set_accept_language(int index, std::string&& va...
function set_accept_language (line 2375) | inline void TrainerSpec::set_accept_language(int index, const char* valu...
function set_accept_language (line 2380) | inline void TrainerSpec::set_accept_language(int index, const char* valu...
function std (line 2385) | inline std::string* TrainerSpec::_internal_add_accept_language() {
function add_accept_language (line 2388) | inline void TrainerSpec::add_accept_language(const std::string& value) {
function add_accept_language (line 2392) | inline void TrainerSpec::add_accept_language(std::string&& value) {
function add_accept_language (line 2396) | inline void TrainerSpec::add_accept_language(const char* value) {
function add_accept_language (line 2401) | inline void TrainerSpec::add_accept_language(const char* value, size_t s...
function PROTOBUF_NAMESPACE_ID (line 2410) | inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField<std::string>*
function _internal_has_self_test_sample_size (line 2417) | inline bool TrainerSpec::_internal_has_self_test_sample_size() const {
function clear_self_test_sample_size (line 2424) | inline void TrainerSpec::clear_self_test_sample_size() {
function _internal_set_self_test_sample_size (line 2435) | inline void TrainerSpec::_internal_set_self_test_sample_size(::PROTOBUF_...
function set_self_test_sample_size (line 2439) | inline void TrainerSpec::set_self_test_sample_size(::PROTOBUF_NAMESPACE_...
function _internal_has_enable_differential_privacy (line 2445) | inline bool TrainerSpec::_internal_has_enable_differential_privacy() con...
function clear_enable_differential_privacy (line 2452) | inline void TrainerSpec::clear_enable_differential_privacy() {
function _internal_set_enable_differential_privacy (line 2463) | inline void TrainerSpec::_internal_set_enable_differential_privacy(bool ...
function set_enable_differential_privacy (line 2467) | inline void TrainerSpec::set_enable_differential_privacy(bool value) {
function _internal_has_differential_privacy_noise_level (line 2473) | inline bool TrainerSpec::_internal_has_differential_privacy_noise_level(...
function clear_differential_privacy_noise_level (line 2480) | inline void TrainerSpec::clear_differential_privacy_noise_level() {
function _internal_set_differential_privacy_noise_level (line 2491) | inline void TrainerSpec::_internal_set_differential_privacy_noise_level(...
function set_differential_privacy_noise_level (line 2495) | inline void TrainerSpec::set_differential_privacy_noise_level(float valu...
function _internal_has_differential_privacy_clipping_threshold (line 2501) | inline bool TrainerSpec::_internal_has_differential_privacy_clipping_thr...
function clear_differential_privacy_clipping_threshold (line 2508) | inline void TrainerSpec::clear_differential_privacy_clipping_threshold() {
function _internal_set_differential_privacy_clipping_threshold (line 2519) | inline void TrainerSpec::_internal_set_differential_privacy_clipping_thr...
function set_differential_privacy_clipping_threshold (line 2523) | inline void TrainerSpec::set_differential_privacy_clipping_threshold(::P...
function _internal_has_character_coverage (line 2529) | inline bool TrainerSpec::_internal_has_character_coverage() const {
function clear_character_coverage (line 2536) | inline void TrainerSpec::clear_character_coverage() {
function _internal_set_character_coverage (line 2547) | inline void TrainerSpec::_internal_set_character_coverage(float value) {
function set_character_coverage (line 2551) | inline void TrainerSpec::set_character_coverage(float value) {
function _internal_has_input_sentence_size (line 2557) | inline bool TrainerSpec::_internal_has_input_sentence_size() const {
function clear_input_sentence_size (line 2564) | inline void TrainerSpec::clear_input_sentence_size() {
function _internal_set_input_sentence_size (line 2575) | inline void TrainerSpec::_internal_set_input_sentence_size(::PROTOBUF_NA...
function set_input_sentence_size (line 2579) | inline void TrainerSpec::set_input_sentence_size(::PROTOBUF_NAMESPACE_ID...
function _internal_has_shuffle_input_sentence (line 2585) | inline bool TrainerSpec::_internal_has_shuffle_input_sentence() const {
function clear_shuffle_input_sentence (line 2592) | inline void TrainerSpec::clear_shuffle_input_sentence() {
function _internal_set_shuffle_input_sentence (line 2603) | inline void TrainerSpec::_internal_set_shuffle_input_sentence(bool value) {
function set_shuffle_input_sentence (line 2607) | inline void TrainerSpec::set_shuffle_input_sentence(bool value) {
function _internal_has_mining_sentence_size (line 2613) | inline bool TrainerSpec::_internal_has_mining_sentence_size() const {
function clear_mining_sentence_size (line 2620) | inline void TrainerSpec::clear_mining_sentence_size() {
function _internal_set_mining_sentence_size (line 2631) | inline void TrainerSpec::_internal_set_mining_sentence_size(::PROTOBUF_N...
function set_mining_sentence_size (line 2635) | inline void TrainerSpec::set_mining_sentence_size(::PROTOBUF_NAMESPACE_I...
function _internal_has_training_sentence_size (line 2641) | inline bool TrainerSpec::_internal_has_training_sentence_size() const {
function clear_training_sentence_size (line 2648) | inline void TrainerSpec::clear_training_sentence_size() {
function _internal_set_training_sentence_size (line 2659) | inline void TrainerSpec::_internal_set_training_sentence_size(::PROTOBUF...
function set_training_sentence_size (line 2663) | inline void TrainerSpec::set_training_sentence_size(::PROTOBUF_NAMESPACE...
function _internal_has_seed_sentencepiece_size (line 2669) | inline bool TrainerSpec::_internal_has_seed_sentencepiece_size() const {
function clear_seed_sentencepiece_size (line 2676) | inline void TrainerSpec::clear_seed_sentencepiece_size() {
function _internal_set_seed_sentencepiece_size (line 2687) | inline void TrainerSpec::_internal_set_seed_sentencepiece_size(::PROTOBU...
function set_seed_sentencepiece_size (line 2691) | inline void TrainerSpec::set_seed_sentencepiece_size(::PROTOBUF_NAMESPAC...
function _internal_has_shrinking_factor (line 2697) | inline bool TrainerSpec::_internal_has_shrinking_factor() const {
function clear_shrinking_factor (line 2704) | inline void TrainerSpec::clear_shrinking_factor() {
function _internal_set_shrinking_factor (line 2715) | inline void TrainerSpec::_internal_set_shrinking_factor(float value) {
function set_shrinking_factor (line 2719) | inline void TrainerSpec::set_shrinking_factor(float value) {
function _internal_has_max_sentence_length (line 2725) | inline bool TrainerSpec::_internal_has_max_sentence_length() const {
function clear_max_sentence_length (line 2732) | inline void TrainerSpec::clear_max_sentence_length() {
function _internal_set_max_sentence_length (line 2743) | inline void TrainerSpec::_internal_set_max_sentence_length(::PROTOBUF_NA...
function set_max_sentence_length (line 2747) | inline void TrainerSpec::set_max_sentence_length(::PROTOBUF_NAMESPACE_ID...
function _internal_has_num_threads (line 2753) | inline bool TrainerSpec::_internal_has_num_threads() const {
function clear_num_threads (line 2760) | inline void TrainerSpec::clear_num_threads() {
function _internal_set_num_threads (line 2771) | inline void TrainerSpec::_internal_set_num_threads(::PROTOBUF_NAMESPACE_...
function set_num_threads (line 2775) | inline void TrainerSpec::set_num_threads(::PROTOBUF_NAMESPACE_ID::int32 ...
function _internal_has_num_sub_iterations (line 2781) | inline bool TrainerSpec::_internal_has_num_sub_iterations() const {
function clear_num_sub_iterations (line 2788) | inline void TrainerSpec::clear_num_sub_iterations() {
function _internal_set_num_sub_iterations (line 2799) | inline void TrainerSpec::_internal_set_num_sub_iterations(::PROTOBUF_NAM...
function set_num_sub_iterations (line 2803) | inline void TrainerSpec::set_num_sub_iterations(::PROTOBUF_NAMESPACE_ID:...
function _internal_has_max_sentencepiece_length (line 2809) | inline bool TrainerSpec::_internal_has_max_sentencepiece_length() const {
function clear_max_sentencepiece_length (line 2816) | inline void TrainerSpec::clear_max_sentencepiece_length() {
function _internal_set_max_sentencepiece_length (line 2827) | inline void TrainerSpec::_internal_set_max_sentencepiece_length(::PROTOB...
function set_max_sentencepiece_length (line 2831) | inline void TrainerSpec::set_max_sentencepiece_length(::PROTOBUF_NAMESPA...
function _internal_has_split_by_unicode_script (line 2837) | inline bool TrainerSpec::_internal_has_split_by_unicode_script() const {
function clear_split_by_unicode_script (line 2844) | inline void TrainerSpec::clear_split_by_unicode_script() {
function _internal_set_split_by_unicode_script (line 2855) | inline void TrainerSpec::_internal_set_split_by_unicode_script(bool valu...
function set_split_by_unicode_script (line 2859) | inline void TrainerSpec::set_split_by_unicode_script(bool value) {
function _internal_has_split_by_number (line 2865) | inline bool TrainerSpec::_internal_has_split_by_number() const {
function clear_split_by_number (line 2872) | inline void TrainerSpec::clear_split_by_number() {
function _internal_set_split_by_number (line 2883) | inline void TrainerSpec::_internal_set_split_by_number(bool value) {
function set_split_by_number (line 2887) | inline void TrainerSpec::set_split_by_number(bool value) {
function _internal_has_split_by_whitespace (line 2893) | inline bool TrainerSpec::_internal_has_split_by_whitespace() const {
function clear_split_by_whitespace (line 2900) | inline void TrainerSpec::clear_split_by_whitespace() {
function _internal_set_split_by_whitespace (line 2911) | inline void TrainerSpec::_internal_set_split_by_whitespace(bool value) {
function set_split_by_whitespace (line 2915) | inline void TrainerSpec::set_split_by_whitespace(bool value) {
function _internal_has_treat_whitespace_as_suffix (line 2921) | inline bool TrainerSpec::_internal_has_treat_whitespace_as_suffix() const {
function clear_treat_whitespace_as_suffix (line 2928) | inline void TrainerSpec::clear_treat_whitespace_as_suffix() {
function _internal_set_treat_whitespace_as_suffix (line 2939) | inline void TrainerSpec::_internal_set_treat_whitespace_as_suffix(bool v...
function set_treat_whitespace_as_suffix (line 2943) | inline void TrainerSpec::set_treat_whitespace_as_suffix(bool value) {
function _internal_has_allow_whitespace_only_pieces (line 2949) | inline bool TrainerSpec::_internal_has_allow_whitespace_only_pieces() co...
function clear_allow_whitespace_only_pieces (line 2956) | inline void TrainerSpec::clear_allow_whitespace_only_pieces() {
function _internal_set_allow_whitespace_only_pieces (line 2967) | inline void TrainerSpec::_internal_set_allow_whitespace_only_pieces(bool...
function set_allow_whitespace_only_pieces (line 2971) | inline void TrainerSpec::set_allow_whitespace_only_pieces(bool value) {
function _internal_has_split_digits (line 2977) | inline bool TrainerSpec::_internal_has_split_digits() const {
function clear_split_digits (line 2984) | inline void TrainerSpec::clear_split_digits() {
function _internal_set_split_digits (line 2995) | inline void TrainerSpec::_internal_set_split_digits(bool value) {
function set_split_digits (line 2999) | inline void TrainerSpec::set_split_digits(bool value) {
function _internal_has_pretokenization_delimiter (line 3005) | inline bool TrainerSpec::_internal_has_pretokenization_delimiter() const {
function clear_pretokenization_delimiter (line 3012) | inline void TrainerSpec::clear_pretokenization_delimiter() {
function set_pretokenization_delimiter (line 3020) | inline void TrainerSpec::set_pretokenization_delimiter(const std::string...
function std (line 3024) | inline std::string* TrainerSpec::mutable_pretokenization_delimiter() {
function _internal_set_pretokenization_delimiter (line 3031) | inline void TrainerSpec::_internal_set_pretokenization_delimiter(const s...
function set_pretokenization_delimiter (line 3035) | inline void TrainerSpec::set_pretokenization_delimiter(std::string&& val...
function set_pretokenization_delimiter (line 3041) | inline void TrainerSpec::set_pretokenization_delimiter(const char* value) {
function set_pretokenization_delimiter (line 3047) | inline void TrainerSpec::set_pretokenization_delimiter(const char* value,
function std (line 3054) | inline std::string* TrainerSpec::_internal_mutable_pretokenization_delim...
function std (line 3058) | inline std::string* TrainerSpec::release_pretokenization_delimiter() {
function set_allocated_pretokenization_delimiter (line 3066) | inline void TrainerSpec::set_allocated_pretokenization_delimiter(std::st...
function clear_control_symbols (line 3084) | inline void TrainerSpec::clear_control_symbols() {
function std (line 3087) | inline std::string* TrainerSpec::add_control_symbols() {
function std (line 3091) | inline const std::string& TrainerSpec::_internal_control_symbols(int ind...
function std (line 3094) | inline const std::string& TrainerSpec::control_symbols(int index) const {
function std (line 3098) | inline std::string* TrainerSpec::mutable_control_symbols(int index) {
function set_control_symbols (line 3102) | inline void TrainerSpec::set_control_symbols(int index, const std::strin...
function set_control_symbols (line 3106) | inline void TrainerSpec::set_control_symbols(int index, std::string&& va...
function set_control_symbols (line 3110) | inline void TrainerSpec::set_control_symbols(int index, const char* valu...
function set_control_symbols (line 3115) | inline void TrainerSpec::set_control_symbols(int index, const char* valu...
function std (line 3120) | inline std::string* TrainerSpec::_internal_add_control_symbols() {
function add_control_symbols (line 3123) | inline void TrainerSpec::add_control_symbols(const std::string& value) {
function add_control_symbols (line 3127) | inline void TrainerSpec::add_control_symbols(std::string&& value) {
function add_control_symbols (line 3131) | inline void TrainerSpec::add_control_symbols(const char* value) {
function add_control_symbols (line 3136) | inline void TrainerSpec::add_control_symbols(const char* value, size_t s...
function PROTOBUF_NAMESPACE_ID (line 3145) | inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField<std::string>*
function clear_user_defined_symbols (line 3158) | inline void TrainerSpec::clear_user_defined_symbols() {
function std (line 3161) | inline std::string* TrainerSpec::add_user_defined_symbols() {
function std (line 3165) | inline const std::string& TrainerSpec::_internal_user_defined_symbols(in...
function std (line 3168) | inline const std::string& TrainerSpec::user_defined_symbols(int index) c...
function std (line 3172) | inline std::string* TrainerSpec::mutable_user_defined_symbols(int index) {
function set_user_defined_symbols (line 3176) | inline void TrainerSpec::set_user_defined_symbols(int index, const std::...
function set_user_defined_symbols (line 3180) | inline void TrainerSpec::set_user_defined_symbols(int index, std::string...
function set_user_defined_symbols (line 3184) | inline void TrainerSpec::set_user_defined_symbols(int index, const char*...
function set_user_defined_symbols (line 3189) | inline void TrainerSpec::set_user_defined_symbols(int index, const char*...
function std (line 3194) | inline std::string* TrainerSpec::_internal_add_user_defined_symbols() {
function add_user_defined_symbols (line 3197) | inline void TrainerSpec::add_user_defined_symbols(const std::string& val...
function add_user_defined_symbols (line 3201) | inline void TrainerSpec::add_user_defined_symbols(std::string&& value) {
function add_user_defined_symbols (line 3205) | inline void TrainerSpec::add_user_defined_symbols(const char* value) {
function add_user_defined_symbols (line 3210) | inline void TrainerSpec::add_user_defined_symbols(const char* value, siz...
function PROTOBUF_NAMESPACE_ID (line 3219) | inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField<std::string>*
function _internal_has_required_chars (line 3226) | inline bool TrainerSpec::_internal_has_required_chars() const {
function clear_required_chars (line 3233) | inline void TrainerSpec::clear_required_chars() {
function set_required_chars (line 3241) | inline void TrainerSpec::set_required_chars(const std::string& value) {
function std (line 3245) | inline std::string* TrainerSpec::mutable_required_chars() {
function _internal_set_required_chars (line 3252) | inline void TrainerSpec::_internal_set_required_chars(const std::string&...
function set_required_chars (line 3256) | inline void TrainerSpec::set_required_chars(std::string&& value) {
function set_required_chars (line 3262) | inline void TrainerSpec::set_required_chars(const char* value) {
function set_required_chars (line 3268) | inline void TrainerSpec::set_required_chars(const char* value,
function std (line 3275) | inline std::string* TrainerSpec::_internal_mutable_required_chars() {
function std (line 3279) | inline std::string* TrainerSpec::release_required_chars() {
function set_allocated_required_chars (line 3287) | inline void TrainerSpec::set_allocated_required_chars(std::string* requi...
function _internal_has_byte_fallback (line 3299) | inline bool TrainerSpec::_internal_has_byte_fallback() const {
function clear_byte_fallback (line 3306) | inline void TrainerSpec::clear_byte_fallback() {
function _internal_set_byte_fallback (line 3317) | inline void TrainerSpec::_internal_set_byte_fallback(bool value) {
function set_byte_fallback (line 3321) | inline void TrainerSpec::set_byte_fallback(bool value) {
function _internal_has_vocabulary_output_piece_score (line 3327) | inline bool TrainerSpec::_internal_has_vocabulary_output_piece_score() c...
function clear_vocabulary_output_piece_score (line 3334) | inline void TrainerSpec::clear_vocabulary_output_piece_score() {
function _internal_set_vocabulary_output_piece_score (line 3345) | inline void TrainerSpec::_internal_set_vocabulary_output_piece_score(boo...
function set_vocabulary_output_piece_score (line 3349) | inline void TrainerSpec::set_vocabulary_output_piece_score(bool value) {
function _internal_has_hard_vocab_limit (line 3355) | inline bool TrainerSpec::_internal_has_hard_vocab_limit() const {
function clear_hard_vocab_limit (line 3362) | inline void TrainerSpec::clear_hard_vocab_limit() {
function _internal_set_hard_vocab_limit (line 3373) | inline void TrainerSpec::_internal_set_hard_vocab_limit(bool value) {
function set_hard_vocab_limit (line 3377) | inline void TrainerSpec::set_hard_vocab_limit(bool value) {
function _internal_has_use_all_vocab (line 3383) | inline bool TrainerSpec::_internal_has_use_all_vocab() const {
function clear_use_all_vocab (line 3390) | inline void TrainerSpec::clear_use_all_vocab() {
function _internal_set_use_all_vocab (line 3401) | inline void TrainerSpec::_internal_set_use_all_vocab(bool value) {
function set_use_all_vocab (line 3405) | inline void TrainerSpec::set_use_all_vocab(bool value) {
function _internal_has_unk_id (line 3411) | inline bool TrainerSpec::_internal_has_unk_id() const {
function clear_unk_id (line 3418) | inline void TrainerSpec::clear_unk_id() {
function _internal_set_unk_id (line 3429) | inline void TrainerSpec::_internal_set_unk_id(::PROTOBUF_NAMESPACE_ID::i...
function set_unk_id (line 3433) | inline void TrainerSpec::set_unk_id(::PROTOBUF_NAMESPACE_ID::int32 value) {
function _internal_has_bos_id (line 3439) | inline bool TrainerSpec::_internal_has_bos_id() const {
function clear_bos_id (line 3446) | inline void TrainerSpec::clear_bos_id() {
function _internal_set_bos_id (line 3457) | inline void TrainerSpec::_internal_set_bos_id(::PROTOBUF_NAMESPACE_ID::i...
function set_bos_id (line 3461) | inline void TrainerSpec::set_bos_id(::PROTOBUF_NAMESPACE_ID::int32 value) {
function _internal_has_eos_id (line 3467) | inline bool TrainerSpec::_internal_has_eos_id() const {
function clear_eos_id (line 3474) | inline void TrainerSpec::clear_eos_id() {
function _internal_set_eos_id (line 3485) | inline void TrainerSpec::_internal_set_eos_id(::PROTOBUF_NAMESPACE_ID::i...
function set_eos_id (line 3489) | inline void TrainerSpec::set_eos_id(::PROTOBUF_NAMESPACE_ID::int32 value) {
function _internal_has_pad_id (line 3495) | inline bool TrainerSpec::_internal_has_pad_id() const {
function clear_pad_id (line 3502) | inline void TrainerSpec::clear_pad_id() {
function _internal_set_pad_id (line 3513) | inline void TrainerSpec::_internal_set_pad_id(::PROTOBUF_NAMESPACE_ID::i...
function set_pad_id (line 3517) | inline void TrainerSpec::set_pad_id(::PROTOBUF_NAMESPACE_ID::int32 value) {
function _internal_has_unk_piece (line 3523) | inline bool TrainerSpec::_internal_has_unk_piece() const {
function clear_unk_piece (line 3530) | inline void TrainerSpec::clear_unk_piece() {
function set_unk_piece (line 3539) | inline void TrainerSpec::set_unk_piece(const std::string& value) {
function std (line 3543) | inline std::string* TrainerSpec::mutable_unk_piece() {
function _internal_set_unk_piece (line 3550) | inline void TrainerSpec::_internal_set_unk_piece(const std::string& valu...
function set_unk_piece (line 3554) | inline void TrainerSpec::set_unk_piece(std::string&& value) {
function set_unk_piece (line 3560) | inline void TrainerSpec::set_unk_piece(const char* value) {
function set_unk_piece (line 3566) | inline void TrainerSpec::set_unk_piece(const char* value,
function std (line 3573) | inline std::string* TrainerSpec::_internal_mutable_unk_piece() {
function std (line 3577) | inline std::string* TrainerSpec::release_unk_piece() {
function set_allocated_unk_piece (line 3585) | inline void TrainerSpec::set_allocated_unk_piece(std::string* unk_piece) {
function _internal_has_bos_piece (line 3597) | inline bool TrainerSpec::_internal_has_bos_piece() const {
function clear_bos_piece (line 3604) | inline void TrainerSpec::clear_bos_piece() {
function set_bos_piece (line 3613) | inline void TrainerSpec::set_bos_piece(const std::string& value) {
function std (line 3617) | inline std::string* TrainerSpec::mutable_bos_piece() {
function _internal_set_bos_piece (line 3624) | inline void TrainerSpec::_internal_set_bos_piece(const std::string& valu...
function set_bos_piece (line 3628) | inline void TrainerSpec::set_bos_piece(std::string&& value) {
function set_bos_piece (line 3634) | inline void TrainerSpec::set_bos_piece(const char* value) {
function set_bos_piece (line 3640) | inline void TrainerSpec::set_bos_piece(const char* value,
function std (line 3647) | inline std::string* TrainerSpec::_internal_mutable_bos_piece() {
function std (line 3651) | inline std::string* TrainerSpec::release_bos_piece() {
function set_allocated_bos_piece (line 3659) | inline void TrainerSpec::set_allocated_bos_piece(std::string* bos_piece) {
function _internal_has_eos_piece (line 3671) | inline bool TrainerSpec::_internal_has_eos_piece() const {
function clear_eos_piece (line 3678) | inline void TrainerSpec::clear_eos_piece() {
function set_eos_piece (line 3687) | inline void TrainerSpec::set_eos_piece(const std::string& value) {
function std (line 3691) | inline std::string* TrainerSpec::mutable_eos_piece() {
function _internal_set_eos_piece (line 3698) | inline void TrainerSpec::_internal_set_eos_piece(const std::string& valu...
function set_eos_piece (line 3702) | inline void TrainerSpec::set_eos_piece(std::string&& value) {
function set_eos_piece (line 3708) | inline void TrainerSpec::set_eos_piece(const char* value) {
function set_eos_piece (line 3714) | inline void TrainerSpec::set_eos_piece(const char* value,
function std (line 3721) | inline std::string* TrainerSpec::_internal_mutable_eos_piece() {
function std (line 3725) | inline std::string* TrainerSpec::release_eos_piece() {
function set_allocated_eos_piece (line 3733) | inline void TrainerSpec::set_allocated_eos_piece(std::string* eos_piece) {
function _internal_has_pad_piece (line 3745) | inline bool TrainerSpec::_internal_has_pad_piece() const {
function clear_pad_piece (line 3752) | inline void TrainerSpec::clear_pad_piece() {
function set_pad_piece (line 3761) | inline void TrainerSpec::set_pad_piece(const std::string& value) {
function std (line 3765) | inline std::string* TrainerSpec::mutable_pad_piece() {
function _internal_set_pad_piece (line 3772) | inline void TrainerSpec::_internal_set_pad_piece(const std::string& valu...
function set_pad_piece (line 3776) | inline void TrainerSpec::set_pad_piece(std::string&& value) {
function set_pad_piece (line 3782) | inline void TrainerSpec::set_pad_piece(const char* value) {
function set_pad_piece (line 3788) | inline void TrainerSpec::set_pad_piece(const char* value,
function std (line 3795) | inline std::string* TrainerSpec::_internal_mutable_pad_piece() {
function std (line 3799) | inline std::string* TrainerSpec::release_pad_piece() {
function set_allocated_pad_piece (line 3807) | inline void TrainerSpec::set_allocated_pad_piece(std::string* pad_piece) {
function _internal_has_unk_surface (line 3819) | inline bool TrainerSpec::_internal_has_unk_surface() const {
function clear_unk_surface (line 3826) | inline void TrainerSpec::clear_unk_surface() {
function set_unk_surface (line 3835) | inline void TrainerSpec::set_unk_surface(const std::string& value) {
function std (line 3839) | inline std::string* TrainerSpec::mutable_unk_surface() {
function _internal_set_unk_surface (line 3846) | inline void TrainerSpec::_internal_set_unk_surface(const std::string& va...
function set_unk_surface (line 3850) | inline void TrainerSpec::set_unk_surface(std::string&& value) {
function set_unk_surface (line 3856) | inline void TrainerSpec::set_unk_surface(const char* value) {
function set_unk_surface (line 3862) | inline void TrainerSpec::set_unk_surface(const char* value,
function std (line 3869) | inline std::string* TrainerSpec::_internal_mutable_unk_surface() {
function std (line 3873) | inline std::string* TrainerSpec::release_unk_surface() {
function set_allocated_unk_surface (line 3881) | inline void TrainerSpec::set_allocated_unk_surface(std::string* unk_surf...
function _internal_has_train_extremely_large_corpus (line 3893) | inline bool TrainerSpec::_internal_has_train_extremely_large_corpus() co...
function clear_train_extremely_large_corpus (line 3900) | inline void TrainerSpec::clear_train_extremely_large_corpus() {
function _internal_set_train_extremely_large_corpus (line 3911) | inline void TrainerSpec::_internal_set_train_extremely_large_corpus(bool...
function set_train_extremely_large_corpus (line 3915) | inline void TrainerSpec::set_train_extremely_large_corpus(bool value) {
function _internal_has_seed_sentencepieces_file (line 3921) | inline bool TrainerSpec::_internal_has_seed_sentencepieces_file() const {
function clear_seed_sentencepieces_file (line 3928) | inline void TrainerSpec::clear_seed_sentencepieces_file() {
function set_seed_sentencepieces_file (line 3936) | inline void TrainerSpec::set_seed_sentencepieces_file(const std::string&...
function std (line 3940) | inline std::string* TrainerSpec::mutable_seed_sentencepieces_file() {
function _internal_set_seed_sentencepieces_file (line 3947) | inline void TrainerSpec::_internal_set_seed_sentencepieces_file(const st...
function set_seed_sentencepieces_file (line 3951) | inline void TrainerSpec::set_seed_sentencepieces_file(std::string&& valu...
function set_seed_sentencepieces_file (line 3957) | inline void TrainerSpec::set_seed_sentencepieces_file(const char* value) {
function set_seed_sentencepieces_file (line 3963) | inline void TrainerSpec::set_seed_sentencepieces_file(const char* value,
function std (line 3970) | inline std::string* TrainerSpec::_internal_mutable_seed_sentencepieces_f...
function std (line 3974) | inline std::string* TrainerSpec::release_seed_sentencepieces_file() {
function set_allocated_seed_sentencepieces_file (line 3982) | inline void TrainerSpec::set_allocated_seed_sentencepieces_file(std::str...
function _internal_has_name (line 3998) | inline bool NormalizerSpec::_internal_has_name() const {
function clear_name (line 4005) | inline void NormalizerSpec::clear_name() {
function set_name (line 4013) | inline void NormalizerSpec::set_name(const std::string& value) {
function std (line 4017) | inline std::string* NormalizerSpec::mutable_name() {
function _internal_set_name (line 4024) | inline void NormalizerSpec::_internal_set_name(const std::string& value) {
function set_name (line 4028) | inline void NormalizerSpec::set_name(std::string&& value) {
function set_name (line 4034) | inline void NormalizerSpec::set_name(const char* value) {
function set_name (line 4040) | inline void NormalizerSpec::set_name(const char* value,
function std (line 4047) | inline std::string* NormalizerSpec::_internal_mutable_name() {
function std (line 4051) | inline std::string* NormalizerSpec::release_name() {
function set_allocated_name (line 4059) | inline void NormalizerSpec::set_allocated_name(std::string* name) {
function _internal_has_precompiled_charsmap (line 4071) | inline bool NormalizerSpec::_internal_has_precompiled_charsmap() const {
function clear_precompiled_charsmap (line 4078) | inline void NormalizerSpec::clear_precompiled_charsmap() {
function set_precompiled_charsmap (line 4086) | inline void NormalizerSpec::set_precompiled_charsmap(const std::string& ...
function std (line 4090) | inline std::string* NormalizerSpec::mutable_precompiled_charsmap() {
function _internal_set_precompiled_charsmap (line 4097) | inline void NormalizerSpec::_internal_set_precompiled_charsmap(const std...
function set_precompiled_charsmap (line 4101) | inline void NormalizerSpec::set_precompiled_charsmap(std::string&& value) {
function set_precompiled_charsmap (line 4107) | inline void NormalizerSpec::set_precompiled_charsmap(const char* value) {
function set_precompiled_charsmap (line 4113) | inline void NormalizerSpec::set_precompiled_charsmap(const void* value,
function std (line 4120) | inline std::string* NormalizerSpec::_internal_mutable_precompiled_charsm...
function std (line 4124) | inline std::string* NormalizerSpec::release_precompiled_charsmap() {
function set_allocated_precompiled_charsmap (line 4132) | inline void NormalizerSpec::set_allocated_precompiled_charsmap(std::stri...
function _internal_has_add_dummy_prefix (line 4144) | inline bool NormalizerSpec::_internal_has_add_dummy_prefix() const {
function clear_add_dummy_prefix (line 4151) | inline void NormalizerSpec::clear_add_dummy_prefix() {
function _internal_set_add_dummy_prefix (line 4162) | inline void NormalizerSpec::_internal_set_add_dummy_prefix(bool value) {
function set_add_dummy_prefix (line 4166) | inline void NormalizerSpec::set_add_dummy_prefix(bool value) {
function _internal_has_remove_extra_whitespaces (line 4172) | inline bool NormalizerSpec::_internal_has_remove_extra_whitespaces() con...
function clear_remove_extra_whitespaces (line 4179) | inline void NormalizerSpec::clear_remove_extra_whitespaces() {
function _internal_set_remove_extra_whitespaces (line 4190) | inline void NormalizerSpec::_internal_set_remove_extra_whitespaces(bool ...
function set_remove_extra_whitespaces (line 4194) | inline void NormalizerSpec::set_remove_extra_whitespaces(bool value) {
function _internal_has_escape_whitespaces (line 4200) | inline bool NormalizerSpec::_internal_has_escape_whitespaces() const {
function clear_escape_whitespaces (line 4207) | inline void NormalizerSpec::clear_escape_whitespaces() {
function _internal_set_escape_whitespaces (line 4218) | inline void NormalizerSpec::_internal_set_escape_whitespaces(bool value) {
function set_escape_whitespaces (line 4222) | inline void NormalizerSpec::set_escape_whitespaces(bool value) {
function _internal_has_normalization_rule_tsv (line 4228) | inline bool NormalizerSpec::_internal_has_normalization_rule_tsv() const {
function clear_normalization_rule_tsv (line 4235) | inline void NormalizerSpec::clear_normalization_rule_tsv() {
function set_normalization_rule_tsv (line 4243) | inline void NormalizerSpec::set_normalization_rule_tsv(const std::string...
function std (line 4247) | inline std::string* NormalizerSpec::mutable_normalization_rule_tsv() {
function _internal_set_normalization_rule_tsv (line 4254) | inline void NormalizerSpec::_internal_set_normalization_rule_tsv(const s...
function set_normalization_rule_tsv (line 4258) | inline void NormalizerSpec::set_normalization_rule_tsv(std::string&& val...
function set_normalization_rule_tsv (line 4264) | inline void NormalizerSpec::set_normalization_rule_tsv(const char* value) {
function set_normalization_rule_tsv (line 4270) | inline void NormalizerSpec::set_normalization_rule_tsv(const char* value,
function std (line 4277) | inline std::string* NormalizerSpec::_internal_mutable_normalization_rule...
function std (line 4281) | inline std::string* NormalizerSpec::release_normalization_rule_tsv() {
function set_allocated_normalization_rule_tsv (line 4289) | inline void NormalizerSpec::set_allocated_normalization_rule_tsv(std::st...
function _internal_has_input (line 4305) | inline bool SelfTestData_Sample::_internal_has_input() const {
function clear_input (line 4312) | inline void SelfTestData_Sample::clear_input() {
function set_input (line 4320) | inline void SelfTestData_Sample::set_input(const std::string& value) {
function std (line 4324) | inline std::string* SelfTestData_Sample::mutable_input() {
function _internal_set_input (line 4331) | inline void SelfTestData_Sample::_internal_set_input(const std::string& ...
function set_input (line 4335) | inline void SelfTestData_Sample::set_input(std::string&& value) {
function set_input (line 4341) | inline void SelfTestData_Sample::set_input(const char* value) {
function set_input (line 4347) | inline void SelfTestData_Sample::set_input(const char* value,
function std (line 4354) | inline std::string* SelfTestData_Sample::_internal_mutable_input() {
function std (line 4358) | inline std::string* SelfTestData_Sample::release_input() {
function set_allocated_input (line 4366) | inline void SelfTestData_Sample::set_allocated_input(std::string* input) {
function _internal_has_expected (line 4378) | inline bool SelfTestData_Sample::_internal_has_expected() const {
function clear_expected (line 4385) | inline void SelfTestData_Sample::clear_expected() {
function set_expected (line 4393) | inline void SelfTestData_Sample::set_expected(const std::string& value) {
function std (line 4397) | inline std::string* SelfTestData_Sample::mutable_expected() {
function _internal_set_expected (line 4404) | inline void SelfTestData_Sample::_internal_set_expected(const std::strin...
function set_expected (line 4408) | inline void SelfTestData_Sample::set_expected(std::string&& value) {
function set_expected (line 4414) | inline void SelfTestData_Sample::set_expected(const char* value) {
function set_expected (line 4420) | inline void SelfTestData_Sample::set_expected(const char* value,
function std (line 4427) | inline std::string* SelfTestData_Sample::_internal_mutable_expected() {
function std (line 4431) | inline std::string* SelfTestData_Sample::release_expected() {
function set_allocated_expected (line 4439) | inline void SelfTestData_Sample::set_allocated_expected(std::string* exp...
function clear_samples (line 4461) | inline void SelfTestData::clear_samples() {
function sentencepiece (line 4464) | inline ::sentencepiece::SelfTestData_Sample* SelfTestData::mutable_sampl...
function PROTOBUF_NAMESPACE_ID (line 4468) | inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SelfT...
function sentencepiece (line 4473) | inline const ::sentencepiece::SelfTestData_Sample& SelfTestData::_intern...
function sentencepiece (line 4476) | inline const ::sentencepiece::SelfTestData_Sample& SelfTestData::samples...
function sentencepiece (line 4480) | inline ::sentencepiece::SelfTestData_Sample* SelfTestData::_internal_add...
function sentencepiece (line 4483) | inline ::sentencepiece::SelfTestData_Sample* SelfTestData::add_samples() {
function PROTOBUF_NAMESPACE_ID (line 4487) | inline const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece:...
function clear_piece (line 4505) | inline void ModelProto_SentencePiece::clear_piece() {
function set_piece (line 4513) | inline void ModelProto_SentencePiece::set_piece(const std::string& value) {
function std (line 4517) | inline std::string* ModelProto_SentencePiece::mutable_piece() {
function _internal_set_piece (line 4524) | inline void ModelProto_SentencePiece::_internal_set_piece(const std::str...
function set_piece (line 4528) | inline void ModelProto_SentencePiece::set_piece(std::string&& value) {
function set_piece (line 4534) | inline void ModelProto_SentencePiece::set_piece(const char* value) {
function set_piece (line 4540) | inline void ModelProto_SentencePiece::set_piece(const char* value,
function std (line 4547) | inline std::string* ModelProto_SentencePiece::_internal_mutable_piece() {
function std (line 4551) | inline std::string* ModelProto_SentencePiece::release_piece() {
function set_allocated_piece (line 4559) | inline void ModelProto_SentencePiece::set_allocated_piece(std::string* p...
function _internal_has_score (line 4571) | inline bool ModelProto_SentencePiece::_internal_has_score() const {
function clear_score (line 4578) | inline void ModelProto_SentencePiece::clear_score() {
function _internal_set_score (line 4589) | inline void ModelProto_SentencePiece::_internal_set_score(float value) {
function set_score (line 4593) | inline void ModelProto_SentencePiece::set_score(float value) {
function _internal_has_type (line 4599) | inline bool ModelProto_SentencePiece::_internal_has_type() const {
function clear_type (line 4606) | inline void ModelProto_SentencePiece::clear_type() {
function _internal_set_type (line 4617) | inline void ModelProto_SentencePiece::_internal_set_type(::sentencepiece...
function set_type (line 4622) | inline void ModelProto_SentencePiece::set_type(::sentencepiece::ModelPro...
function clear_pieces (line 4638) | inline void ModelProto::clear_pieces() {
function sentencepiece (line 4641) | inline ::sentencepiece::ModelProto_SentencePiece* ModelProto::mutable_pi...
function PROTOBUF_NAMESPACE_ID (line 4645) | inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::Model...
function sentencepiece (line 4650) | inline const ::sentencepiece::ModelProto_SentencePiece& ModelProto::_int...
function sentencepiece (line 4653) | inline const ::sentencepiece::ModelProto_SentencePiece& ModelProto::piec...
function sentencepiece (line 4657) | inline ::sentencepiece::ModelProto_SentencePiece* ModelProto::_internal_...
function sentencepiece (line 4660) | inline ::sentencepiece::ModelProto_SentencePiece* ModelProto::add_pieces...
function PROTOBUF_NAMESPACE_ID (line 4664) | inline const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece:...
function clear_trainer_spec (line 4679) | inline void ModelProto::clear_trainer_spec() {
function sentencepiece (line 4683) | inline const ::sentencepiece::TrainerSpec& ModelProto::_internal_trainer...
function unsafe_arena_set_allocated_trainer_spec (line 4692) | inline void ModelProto::unsafe_arena_set_allocated_trainer_spec(
function sentencepiece (line 4705) | inline ::sentencepiece::TrainerSpec* ModelProto::release_trainer_spec() {
function sentencepiece (line 4714) | inline ::sentencepiece::TrainerSpec* ModelProto::unsafe_arena_release_tr...
function sentencepiece (line 4721) | inline ::sentencepiece::TrainerSpec* ModelProto::_internal_mutable_train...
function sentencepiece (line 4729) | inline ::sentencepiece::TrainerSpec* ModelProto::mutable_trainer_spec() {
function set_allocated_trainer_spec (line 4733) | inline void ModelProto::set_allocated_trainer_spec(::sentencepiece::Trai...
function _internal_has_normalizer_spec (line 4754) | inline bool ModelProto::_internal_has_normalizer_spec() const {
function clear_normalizer_spec (line 4762) | inline void ModelProto::clear_normalizer_spec() {
function sentencepiece (line 4766) | inline const ::sentencepiece::NormalizerSpec& ModelProto::_internal_norm...
function unsafe_arena_set_allocated_normalizer_spec (line 4775) | inline void ModelProto::unsafe_arena_set_allocated_normalizer_spec(
function sentencepiece (line 4788) | inline ::sentencepiece::NormalizerSpec* ModelProto::release_normalizer_s...
function sentencepiece (line 4797) | inline ::sentencepiece::NormalizerSpec* ModelProto::unsafe_arena_release...
function sentencepiece (line 4804) | inline ::sentencepiece::NormalizerSpec* ModelProto::_internal_mutable_no...
function sentencepiece (line 4812) | inline ::sentencepiece::NormalizerSpec* ModelProto::mutable_normalizer_s...
function set_allocated_normalizer_spec (line 4816) | inline void ModelProto::set_allocated_normalizer_spec(::sentencepiece::N...
function _internal_has_self_test_data (line 4837) | inline bool ModelProto::_internal_has_self_test_data() const {
function clear_self_test_data (line 4845) | inline void ModelProto::clear_self_test_data() {
function sentencepiece (line 4849) | inline const ::sentencepiece::SelfTestData& ModelProto::_internal_self_t...
function unsafe_arena_set_allocated_self_test_data (line 4858) | inline void ModelProto::unsafe_arena_set_allocated_self_test_data(
function sentencepiece (line 4871) | inline ::sentencepiece::SelfTestData* ModelProto::release_self_test_data...
function sentencepiece (line 4880) | inline ::sentencepiece::SelfTestData* ModelProto::unsafe_arena_release_s...
function sentencepiece (line 4887) | inline ::sentencepiece::SelfTestData* ModelProto::_internal_mutable_self...
function sentencepiece (line 4895) | inline ::sentencepiece::SelfTestData* ModelProto::mutable_self_test_data...
function set_allocated_self_test_data (line 4899) | inline void ModelProto::set_allocated_self_test_data(::sentencepiece::Se...
function _internal_has_denormalizer_spec (line 4920) | inline bool ModelProto::_internal_has_denormalizer_spec() const {
function clear_denormalizer_spec (line 4928) | inline void ModelProto::clear_denormalizer_spec() {
function sentencepiece (line 4932) | inline const ::sentencepiece::NormalizerSpec& ModelProto::_internal_deno...
function unsafe_arena_set_allocated_denormalizer_spec (line 4941) | inline void ModelProto::unsafe_arena_set_allocated_denormalizer_spec(
function sentencepiece (line 4954) | inline ::sentencepiece::NormalizerSpec* ModelProto::release_denormalizer...
function sentencepiece (line 4963) | inline ::sentencepiece::NormalizerSpec* ModelProto::unsafe_arena_release...
function sentencepiece (line 4970) | inline ::sentencepiece::NormalizerSpec* ModelProto::_internal_mutable_de...
function sentencepiece (line 4978) | inline ::sentencepiece::NormalizerSpec* ModelProto::mutable_denormalizer...
function set_allocated_denormalizer_spec (line 4982) | inline void ModelProto::set_allocated_denormalizer_spec(::sentencepiece:...
function PROTOBUF_NAMESPACE_OPEN (line 5020) | PROTOBUF_NAMESPACE_OPEN
function true_type (line 5023) | struct is_proto_enum< ::sentencepiece::ModelProto_SentencePiece_Type> : ...
FILE: src/char_model.cc
type sentencepiece (line 18) | namespace sentencepiece {
type character (line 19) | namespace character {
function EncodeResult (line 28) | EncodeResult Model::Encode(absl::string_view normalized) const {
FILE: src/char_model.h
function namespace (line 21) | namespace sentencepiece {
FILE: src/char_model_test.cc
type sentencepiece (line 21) | namespace sentencepiece {
type character (line 22) | namespace character {
function ModelProto (line 28) | ModelProto MakeBaseModelProto() {
function AddPiece (line 44) | void AddPiece(ModelProto *model_proto, const std::string &piece,
function TEST (line 51) | TEST(ModelTest, EncodeTest) {
function TEST (line 109) | TEST(CharModelTest, NotSupportedTest) {
FILE: src/char_model_trainer.cc
type sentencepiece (line 21) | namespace sentencepiece {
type character (line 22) | namespace character {
FILE: src/char_model_trainer.h
function namespace (line 21) | namespace sentencepiece {
FILE: src/char_model_trainer_test.cc
type sentencepiece (line 26) | namespace sentencepiece {
type character (line 27) | namespace character {
function RunTrainer (line 33) | std::string RunTrainer(const std::vector<std::string> &input, int si...
function TEST (line 73) | TEST(TrainerTest, BasicTest) {
FILE: src/compile_charsmap_main.cc
type sentencepiece (line 36) | namespace sentencepiece {
function ToHexUInt64Array (line 39) | std::string ToHexUInt64Array(
function ToHexData (line 73) | std::string ToHexData(absl::string_view data) {
function MakeHeader (line 103) | std::string MakeHeader(
function main (line 161) | int main(int argc, char **argv) {
FILE: src/error.cc
type sentencepiece (line 20) | namespace sentencepiece {
type util (line 21) | namespace util {
type Status::Rep (line 26) | struct Status::Rep {
function StatusCode (line 58) | StatusCode Status::code() const { return ok() ? StatusCode::kOk : re...
FILE: src/filesystem.cc
type sentencepiece (line 29) | namespace sentencepiece {
type filesystem (line 30) | namespace filesystem {
class PosixReadableFile (line 32) | class PosixReadableFile : public ReadableFile {
method PosixReadableFile (line 34) | PosixReadableFile(absl::string_view filename, bool is_binary = false)
method status (line 50) | util::Status status() const { return status_; }
method ReadLine (line 52) | bool ReadLine(std::string *line) {
method ReadAll (line 56) | bool ReadAll(std::string *line) {
class PosixWritableFile (line 71) | class PosixWritableFile : public WritableFile {
method PosixWritableFile (line 73) | PosixWritableFile(absl::string_view filename, bool is_binary = false)
method status (line 89) | util::Status status() const { return status_; }
method Write (line 91) | bool Write(absl::string_view text) {
method WriteLine (line 96) | bool WriteLine(absl::string_view text) { return Write(text) && Wri...
function NewReadableFile (line 106) | std::unique_ptr<ReadableFile> NewReadableFile(absl::string_view file...
function NewWritableFile (line 111) | std::unique_ptr<WritableFile> NewWritableFile(absl::string_view file...
FILE: src/filesystem.h
function namespace (line 28) | namespace sentencepiece {
FILE: src/filesystem_test.cc
type sentencepiece (line 20) | namespace sentencepiece {
function TEST (line 22) | TEST(UtilTest, FilesystemTest) {
function TEST (line 49) | TEST(UtilTest, FilesystemInvalidFileTest) {
FILE: src/freelist.h
function namespace (line 22) | namespace sentencepiece {
FILE: src/freelist_test.cc
type sentencepiece (line 18) | namespace sentencepiece {
type model (line 19) | namespace model {
function TEST (line 21) | TEST(FreeListTest, BasicTest) {
FILE: src/init.cc
function ABSL_FLAG (line 32) | ABSL_FLAG(bool, quiet, false, "Suppress logging message.");
function ShutdownLibrary (line 61) | void ShutdownLibrary() { google::protobuf::ShutdownProtobufLibrary(); }
FILE: src/init.h
function namespace (line 20) | namespace sentencepiece {
FILE: src/init_test.cc
function TEST (line 44) | TEST(FlagsTest, ParseCommandLineFlagsTest) {
function TEST (line 66) | TEST(FlagsTest, ParseCommandLineFlagsTest2) {
function TEST (line 81) | TEST(FlagsTest, ParseCommandLineFlagsTest3) {
function TEST (line 92) | TEST(FlagsTest, ParseCommandLineFlagsEmptyStringArgs) {
function TEST (line 101) | TEST(FlagsTest, ParseCommandLineFlagsEmptyBoolArgs) {
FILE: src/model_factory.cc
type sentencepiece (line 22) | namespace sentencepiece {
FILE: src/model_factory.h
function namespace (line 23) | namespace sentencepiece {
FILE: src/model_factory_test.cc
type sentencepiece (line 18) | namespace sentencepiece {
function TEST (line 20) | TEST(ModelFactoryTest, BasicTest) {
FILE: src/model_interface.cc
type sentencepiece (line 23) | namespace sentencepiece {
function SplitIntoWords (line 153) | std::vector<absl::string_view> SplitIntoWords(absl::string_view text,
function ByteToPiece (line 210) | std::string ByteToPiece(unsigned char c) {
function PieceToByte (line 214) | int PieceToByte(absl::string_view piece) {
FILE: src/model_interface.h
function namespace (line 33) | namespace sentencepiece {
function virtual (line 114) | virtual float CalculateEntropy(absl::string_view normalized,
function virtual (line 138) | virtual const std::string &IdToPiece(int id) const {
function virtual (line 152) | virtual float GetScore(int id) const {
function virtual (line 157) | virtual bool IsUnknown(int id) const {
function virtual (line 163) | virtual bool IsControl(int id) const {
function virtual (line 169) | virtual bool IsUnused(int id) const {
function virtual (line 175) | virtual bool IsUserDefined(int id) const {
function virtual (line 181) | virtual bool IsByte(int id) const {
function virtual (line 194) | virtual bool VerifyOutputsEquivalent(absl::string_view expected,
function GetScoreInlined (line 203) | inline float GetScoreInlined(int id) const {
function IsUnknownInlined (line 207) | inline bool IsUnknownInlined(int id) const {
function IsControlInlined (line 212) | inline bool IsControlInlined(int id) const {
function IsUnusedInlined (line 217) | inline bool IsUnusedInlined(int id) const {
function IsUserDefinedInlined (line 222) | inline bool IsUserDefinedInlined(int id) const {
function IsByteInlined (line 227) | inline bool IsByteInlined(int id) const {
FILE: src/model_interface_test.cc
type sentencepiece (line 22) | namespace sentencepiece {
function ModelProto (line 31) | ModelProto MakeBaseModelProto(TrainerSpec::ModelType type,
function AddPiece (line 50) | void AddPiece(ModelProto *model_proto, const std::string &piece,
function AddBytePiece (line 57) | void AddBytePiece(ModelProto *model_proto, unsigned char byte) {
function TEST (line 63) | TEST(ModelInterfaceTest, GetDefaultPieceTest) {
function TEST (line 111) | TEST(ModelInterfaceTest, SetModelInterfaceTest) {
function TEST (line 125) | TEST(ModelInterfaceTest, PieceToIdTest) {
function TEST (line 210) | TEST(ModelInterfaceTest, InvalidModelTest) {
function TEST (line 245) | TEST(ModelInterfaceTest, ByteFallbackModelTest) {
function RandomString (line 279) | std::string RandomString(int length) {
function TEST (line 294) | TEST(ModelInterfaceTest, PieceToIdStressTest) {
function TEST (line 321) | TEST(ModelInterfaceTest, SplitIntoWordsTest) {
function TEST (line 360) | TEST(ModelInterfaceTest, SplitIntoWordsSuffixTest) {
function TEST (line 416) | TEST(ModelInterfaceTest, SplitIntoWordsWhiteSpaceOnly) {
function TEST (line 460) | TEST(ModelInterfaceTest, ByteToPieceTest) {
function TEST (line 468) | TEST(ModelInterfaceTest, PieceToByteTest) {
function TEST (line 485) | TEST(ModelInterfaceTest, VerifyOutputsEquivalent) {
FILE: src/normalization_rule.h
function namespace (line 4) | namespace sentencepiece {
FILE: src/normalizer.cc
type sentencepiece (line 28) | namespace sentencepiece {
type normalizer (line 29) | namespace normalizer {
FILE: src/normalizer.h
function namespace (line 30) | namespace sentencepiece {
FILE: src/normalizer_test.cc
type sentencepiece (line 24) | namespace sentencepiece {
type normalizer (line 25) | namespace normalizer {
function NormalizerSpec (line 33) | NormalizerSpec MakeDefaultSpec() {
function TEST (line 38) | TEST(NormalizerTest, NormalizeTest) {
function TEST (line 78) | TEST(NormalizerTest, NormalizeWithoutDummyPrefixTest) {
function TEST (line 98) | TEST(NormalizerTest, NormalizeTreatWSAsSuffixTest) {
function TEST (line 114) | TEST(NormalizerTest, NormalizeWithoutRemoveExtraWhitespacesTest) {
function TEST (line 131) | TEST(NormalizerTest, NormalizeWithoutEscapeWhitespacesTest) {
function TEST (line 150) | TEST(NormalizeTest, NomalizeWithSpaceContainedRules) {
function TEST (line 267) | TEST(NormalizerTest, NormalizeReplacementChar) {
function TEST (line 278) | TEST(NormalizerTest, NormalizeFullTest) {
function TEST (line 360) | TEST(NormalizerTest, EncodeDecodePrecompiledCharsMapTest) {
function TEST (line 396) | TEST(NormalizerTest, StatusTest) {
function TEST (line 416) | TEST(NormalizerTest, PrefixMatcherTest) {
function TEST (line 439) | TEST(NormalizerTest, PrefixMatcherWithEmptyTest) {
FILE: src/pretokenizer_for_training.cc
type sentencepiece (line 20) | namespace sentencepiece {
type pretokenizer (line 21) | namespace pretokenizer {
FILE: src/pretokenizer_for_training.h
function namespace (line 26) | namespace sentencepiece {
FILE: src/pretokenizer_for_training_test.cc
type sentencepiece (line 22) | namespace sentencepiece {
type pretokenizer (line 23) | namespace pretokenizer {
class MockPretokenizer (line 25) | class MockPretokenizer : public PretokenizerForTrainingInterface {
method MockPretokenizer (line 27) | MockPretokenizer() {}
method SentencePieceText (line 30) | SentencePieceText Tokenize(absl::string_view text) const override {
method status (line 34) | util::Status status() const override { return util::OkStatus(); }
method SetOutput (line 36) | void SetOutput(const SentencePieceText &spt) { spt_ = spt; }
function TEST (line 42) | TEST(PretokenizerForTrainingTest, BaseTest) {
FILE: src/sentencepiece_processor.cc
type sentencepiece (line 43) | namespace sentencepiece {
function ToPieceArray (line 57) | std::vector<absl::string_view> ToPieceArray(const std::vector<std::str...
function ConvertToUnicodeSpansInternal (line 63) | void ConvertToUnicodeSpansInternal(SentencePieceText *spt) {
function ImmutableSentencePieceText_ImmutableSentencePiece (line 147) | ImmutableSentencePieceText_ImmutableSentencePiece
function SentencePieceText (line 160) | SentencePieceText *ImmutableSentencePieceText::mutable_proto() {
function ImmutableSentencePieceText (line 183) | ImmutableSentencePieceText ImmutableNBestSentencePieceText::nbests(
function NBestSentencePieceText (line 197) | NBestSentencePieceText *ImmutableNBestSentencePieceText::mutable_proto...
function ModelProto (line 1117) | const ModelProto &SentencePieceProcessor::model_proto() const {
function NormalizerSpec (line 1125) | NormalizerSpec *SentencePieceProcessor::mutable_normalizer_spec() const {
type io (line 1135) | namespace io {
function LoadModelProto (line 1136) | util::Status LoadModelProto(absl::string_view filename,
function SaveModelProto (line 1156) | util::Status SaveModelProto(absl::string_view filename,
FILE: src/sentencepiece_processor.h
function namespace (line 30) | namespace absl {
function namespace (line 36) | namespace sentencepiece {
FILE: src/sentencepiece_processor_test.cc
type sentencepiece (line 32) | namespace sentencepiece {
class MockModel (line 37) | class MockModel : public ModelInterface {
method SetEncodeResult (line 39) | void SetEncodeResult(absl::string_view input, const EncodeResult &ou...
method SetNBestEncodeResult (line 44) | void SetNBestEncodeResult(absl::string_view input,
method EncodeResult (line 50) | EncodeResult Encode(absl::string_view normalized) const {
method EncodeResult (line 55) | EncodeResult SampleEncode(absl::string_view normalized, float alpha)...
method NBestEncodeResult (line 60) | NBestEncodeResult NBestEncode(absl::string_view normalized,
method IsSampleEncodeAvailable (line 66) | bool IsSampleEncodeAvailable() const override { return true; }
method IsNBestEncodeAvailable (line 68) | bool IsNBestEncodeAvailable() const override { return true; }
method IsControl (line 70) | bool IsControl(int id) const { return id == 1 || id == 2; }
method IsUnknown (line 72) | bool IsUnknown(int id) const { return id == 0; }
method GetPieceSize (line 74) | int GetPieceSize() const { return 10; }
method PieceToId (line 76) | int PieceToId(absl::string_view piece) const { return 0; }
method GetScore (line 80) | float GetScore(int id) const { return 0.0; }
class ByteFallbackMockModel (line 89) | class ByteFallbackMockModel : public MockModel {
method ByteFallbackEnabled (line 91) | bool ByteFallbackEnabled() const override { return true; }
function GetSpVec (line 94) | std::vector<std::string> GetSpVec(const EncodeResult &pieces) {
function GetIdVec (line 102) | std::vector<int> GetIdVec(const EncodeResult &pieces) {
function GetSpVec (line 110) | std::vector<std::string> GetSpVec(const SentencePieceText &spt) {
function NormalizerSpec (line 118) | NormalizerSpec MakeDefaultNormalizerSpec() {
function TEST (line 122) | TEST(SentencepieceProcessorTest, StatusTest) {
function TEST (line 130) | TEST(SentencepieceProcessorTest, EncodeTest) {
function TEST (line 418) | TEST(SentencepieceProcessorTest, NBestEncodeTest) {
function TEST (line 472) | TEST(SentencepieceProcessorTest, SampleEncodeTest) {
function TEST (line 544) | TEST(SentencepieceProcessorTest, DecodeTest) {
function TEST (line 711) | TEST(SentencepieceProcessorTest, DummyPrefixDecodeTest) {
function TEST (line 791) | TEST(SentencepieceProcessorTest, ByteFallbackDecodeTest) {
function AddPiece (line 946) | void AddPiece(ModelProto *model_proto, absl::string_view piece,
function TEST (line 953) | TEST(SentencePieceProcessorTest, LoadInvalidModelTest) {
function TEST (line 959) | TEST(SentencePieceProcessorTest, LoadSerializedProtoTest) {
function TEST (line 974) | TEST(SentencePieceProcessorTest, EndToEndTest) {
function TEST (line 1375) | TEST(SentencePieceProcessorTest, SkipNormalizationTest) {
function TEST (line 1407) | TEST(SentencePieceProcessorTest, ExtraOptionsUndefinedTest) {
function TEST (line 1427) | TEST(SentencePieceProcessorTest, OverrideSpecialPieceTest) {
function TEST (line 1461) | TEST(SentencePieceProcessorTest, VocabularyTest) {
function TEST (line 1564) | TEST(SentencePieceProcessorTest, ImmutableSentencePieceTextTest) {
function TEST (line 1624) | TEST(SentencePieceProcessorTest, ImmutableNBestSentencePieceTextTest) {
function TEST (line 1657) | TEST(SentencePieceProcessorTest, ConvertToUnicodeSpansTest) {
FILE: src/sentencepiece_trainer.cc
type sentencepiece (line 35) | namespace sentencepiece {
function NormalizerSpec (line 92) | NormalizerSpec SentencePieceTrainer::GetNormalizerSpec(absl::string_vi...
class VectorSentenceIterator (line 202) | class VectorSentenceIterator : public SentenceIterator {
method VectorSentenceIterator (line 204) | explicit VectorSentenceIterator(const std::vector<std::string> &values)
method done (line 207) | virtual bool done() const { return iter_ == end_; }
method Next (line 208) | void Next() override { ++iter_; }
method status (line 210) | util::Status status() const override { return util::OkStatus(); }
function NormalizerSpec (line 361) | NormalizerSpec *SentencePieceNormalizer::mutable_normalizer_spec() con...
function ConvertToUnicodeAlignment (line 369) | void ConvertToUnicodeAlignment(absl::string_view orig, absl::string_vi...
FILE: src/sentencepiece_trainer.h
function namespace (line 30) | namespace pretokenizer {
function namespace (line 34) | namespace normalizer {
function class (line 46) | class SentenceIterator {
FILE: src/sentencepiece_trainer_test.cc
type sentencepiece (line 23) | namespace sentencepiece {
function CheckVocab (line 32) | void CheckVocab(absl::string_view filename, int expected_vocab_size) {
function CheckNormalizer (line 40) | void CheckNormalizer(absl::string_view filename, bool expected_has_nor...
function TEST (line 52) | TEST(SentencePieceTrainerTest, TrainFromArgsTest) {
function TEST (line 102) | TEST(SentencePieceTrainerTest, TrainFromIterator) {
function TEST (line 151) | TEST(SentencePieceTrainerTest, TrainWithCustomNormalizationRule) {
function TEST (line 164) | TEST(SentencePieceTrainerTest, TrainWithCustomDenormalizationRule) {
function TEST (line 183) | TEST(SentencePieceTrainerTest, TrainErrorTest) {
function TEST (line 191) | TEST(SentencePieceTrainerTest, TrainTest) {
function TEST (line 201) | TEST(SentencePieceTrainerTest, SetProtoFieldTest) {
function TEST (line 278) | TEST(SentencePieceTrainerTest, MergeSpecsFromArgs) {
function TEST (line 339) | TEST(SentencePieceTrainerTest, PopulateModelTypeFromStringTest) {
function TEST (line 357) | TEST(SentencePieceTrainerTest, NormalizationTest) {
FILE: src/spec_parser.h
function namespace (line 26) | namespace sentencepiece {
FILE: src/spm_decode_main.cc
function main (line 36) | int main(int argc, char *argv[]) {
FILE: src/spm_encode_main.cc
function main (line 55) | int main(int argc, char *argv[]) {
FILE: src/spm_export_vocab_main.cc
function main (line 30) | int main(int argc, char *argv[]) {
FILE: src/spm_normalize_main.cc
function main (line 48) | int main(int argc, char *argv[]) {
FILE: src/test_main.cc
function main (line 26) | int main(int argc, char **argv) {
FILE: src/testharness.cc
type sentencepiece (line 32) | namespace sentencepiece {
type test (line 33) | namespace test {
type Test (line 36) | struct Test {
function RegisterTest (line 44) | bool RegisterTest(const char *base, const char *name, void (*func)()) {
function RunAllTests (line 56) | int RunAllTests() {
FILE: src/testharness.h
function namespace (line 31) | namespace testing {
function namespace (line 36) | namespace sentencepiece {
FILE: src/trainer_factory.cc
type sentencepiece (line 22) | namespace sentencepiece {
FILE: src/trainer_factory.h
function namespace (line 23) | namespace sentencepiece {
FILE: src/trainer_factory_test.cc
type sentencepiece (line 18) | namespace sentencepiece {
function TEST (line 20) | TEST(TrainerFactoryTest, BasicTest) {
FILE: src/trainer_interface.cc
type sentencepiece (line 41) | namespace sentencepiece {
function VerifySpec (line 53) | util::Status VerifySpec(const TrainerSpec &trainer_spec) {
function is_unicode_decimal_number (line 97) | bool is_unicode_decimal_number(char32 c) {
class SentenceSelector (line 101) | class SentenceSelector {
method SentenceSelector (line 107) | SentenceSelector(TrainerInterface::Sentences *sentences,
method Finish (line 123) | void Finish() const {
method Add (line 135) | bool Add(const std::pair<std::string, int64_t> &sentence) {
method total_size (line 154) | size_t total_size() const {
function AddDPNoise (line 310) | void AddDPNoise(const TrainerSpec &trainer_spec, std::mt19937 *generator,
FILE: src/trainer_interface.h
function namespace (line 33) | namespace sentencepiece {
FILE: src/trainer_interface_test.cc
type sentencepiece (line 25) | namespace sentencepiece {
function char32 (line 31) | static char32 ToChar32(absl::string_view str) {
function TEST (line 36) | TEST(TrainerInterfaceTest, IsValidSentencePieceTest) {
function TEST (line 202) | TEST(TrainerInterfaceTest, OverrideSpecialPiecesTest) {
function TEST (line 419) | TEST(TrainerInterfaceTest, BytePiecesTest) {
function TEST (line 444) | TEST(TrainerInterfaceTest, SerializeTest) {
function TEST (line 494) | TEST(TrainerInterfaceTest, CharactersTest) {
function TEST (line 558) | TEST(TrainerInterfaceTest, MultiFileSentenceIteratorTest) {
function TEST (line 581) | TEST(TrainerInterfaceTest, MultiFileSentenceIteratorErrorTest) {
FILE: src/unicode_script.cc
type sentencepiece (line 22) | namespace sentencepiece {
type unicode_script (line 23) | namespace unicode_script {
class GetScriptInternal (line 25) | class GetScriptInternal {
method GetScriptInternal (line 27) | GetScriptInternal() { InitTable(&smap_); }
method ScriptType (line 29) | ScriptType GetScript(char32 c) const {
function ScriptType (line 38) | ScriptType GetScript(char32 c) {
FILE: src/unicode_script.h
function namespace (line 20) | namespace sentencepiece {
FILE: src/unicode_script_map.h
function namespace (line 18) | namespace sentencepiece {
FILE: src/unicode_script_test.cc
type sentencepiece (line 21) | namespace sentencepiece {
type unicode_script (line 22) | namespace unicode_script {
function ScriptType (line 23) | ScriptType GetScriptType(absl::string_view s) {
function TEST (line 29) | TEST(UnicodeScript, GetScriptTypeTest) {
FILE: src/unigram_model.cc
type sentencepiece (line 32) | namespace sentencepiece {
type unigram (line 33) | namespace unigram {
function LogSumExp (line 47) | inline float LogSumExp(float x, float y, bool init_mode) {
function Gumbel (line 63) | inline float Gumbel() {
type Hypothesis (line 296) | struct Hypothesis {
function Hypothesis (line 313) | Hypothesis *CloneHypAndDependents(
class HypothesisComparator (line 369) | class HypothesisComparator {
function EncodeResult (line 674) | EncodeResult Model::Encode(absl::string_view normalized) const {
function NBestEncodeResult (line 695) | NBestEncodeResult Model::NBestEncode(absl::string_view normalized,
function EncodeResult (line 723) | EncodeResult Model::SampleEncode(absl::string_view normalized,
function NBestEncodeResult (line 741) | NBestEncodeResult Model::SampleEncodeAndScore(absl::string_view norm...
function EncodeResult (line 889) | EncodeResult Model::EncodeOptimized(absl::string_view normalized) co...
FILE: src/unigram_model.h
function namespace (line 30) | namespace unigram {
FILE: src/unigram_model_test.cc
type sentencepiece (line 29) | namespace sentencepiece {
type unigram (line 30) | namespace unigram {
function TEST (line 32) | TEST(LatticeTest, SetSentenceTest) {
function TEST (line 76) | TEST(LatticeTest, InsertTest) {
function TEST (line 162) | TEST(LatticeTest, ViterbiFromIncompleteLatticeTest) {
function GetTokenized (line 176) | std::string GetTokenized(const std::vector<Lattice::Node *> &nodes) {
function InsertWithScore (line 184) | void InsertWithScore(Lattice *lattice, int pos, int length, float sc...
function InsertWithScoreAndId (line 188) | void InsertWithScoreAndId(Lattice *lattice, int pos, int length, flo...
function TEST (line 195) | TEST(LatticeTest, ViterbiTest) {
function TEST (line 214) | TEST(LatticeTest, NBestTest) {
function TEST (line 240) | TEST(LatticeTest, NBestSampleTest) {
function TEST (line 321) | TEST(LatticeTest, CalculateEntropyTest) {
function TEST (line 357) | TEST(LatticeTest, ForwardAlgorithmTest) {
function TEST (line 394) | TEST(LatticeTest, PopulateMarginalTest) {
function TEST (line 429) | TEST(LatticeTest, SampleTest) {
function ModelProto (line 468) | ModelProto MakeBaseModelProto() {
class UnigramModelTest (line 492) | class UnigramModelTest : public test::TestWithParam<Model::EncoderVe...
method SetUp (line 494) | void SetUp() override { encoder_version_ = GetParam(); }
method TearDown (line 495) | void TearDown() override {}
function AddPiece (line 499) | void AddPiece(ModelProto *model_proto, const std::string &piece,
function TEST (line 506) | TEST(UnigramModelTest, SetUnigramModelTest) {
function TEST (line 519) | TEST(UnigramModelTest, SampleEncodeAndScoreTest) {
function TEST_P (line 615) | TEST_P(UnigramModelTest, PieceToIdTest) {
function TEST_P (line 678) | TEST_P(UnigramModelTest, PopulateNodesAllUnknownsTest) {
function TEST_P (line 697) | TEST_P(UnigramModelTest, PopulateNodesTest) {
function TEST_P (line 729) | TEST_P(UnigramModelTest, PopulateNodesWithUnusedTest) {
function TEST_P (line 756) | TEST_P(UnigramModelTest, ModelNBestTest) {
function TEST_P (line 782) | TEST_P(UnigramModelTest, EncodeTest) {
function TEST_P (line 873) | TEST_P(UnigramModelTest, EncodeWithUnusedTest) {
function TEST_P (line 930) | TEST_P(UnigramModelTest, VerifyOutputsEquivalent) {
FILE: src/unigram_model_trainer.cc
type sentencepiece (line 40) | namespace sentencepiece {
type unigram (line 41) | namespace unigram {
function Digamma (line 46) | double Digamma(double x) {
function ToLogProb (line 59) | void ToLogProb(IT begin, IT end) {
class BoundedPriorityQueue (line 71) | class BoundedPriorityQueue {
method BoundedPriorityQueue (line 73) | explicit BoundedPriorityQueue(size_t size) : size_(size) {}
method push (line 76) | void push(T elem, int64_t score) {
method resize (line 89) | void resize() {
FILE: src/unigram_model_trainer.h
function namespace (line 29) | namespace sentencepiece {
FILE: src/unigram_model_trainer_test.cc
type sentencepiece (line 29) | namespace sentencepiece {
type unigram (line 30) | namespace unigram {
function TEST (line 35) | TEST(UnigramTrainerTest, TrainerModelTest) {
type TrainerResult (line 42) | struct TrainerResult {
function TrainerResult (line 47) | TrainerResult RunTrainer(const std::vector<std::string>& input, int ...
function TEST (line 114) | TEST(UnigramTrainerTest, BasicTest) {
function TEST (line 128) | TEST(UnigramTrainerTest, BasicDPTest) {
function TEST (line 155) | TEST(UnigramTrainerTest, EndToEndTest) {
FILE: src/util.cc
type sentencepiece (line 22) | namespace sentencepiece {
function SetRandomGeneratorSeed (line 29) | void SetRandomGeneratorSeed(uint32_t seed) {
function GetRandomGeneratorSeed (line 33) | uint32_t GetRandomGeneratorSeed() {
function GetDataDir (line 48) | std::string GetDataDir() {
function SetDataDir (line 53) | void SetDataDir(absl::string_view data_dir) {
function SetMinLogLevel (line 59) | void SetMinLogLevel(int v) {
type string_util (line 63) | namespace string_util {
function char32 (line 66) | char32 DecodeUTF8(const char *begin, const char *end, size_t *mblen) {
function IsStructurallyValid (line 101) | bool IsStructurallyValid(absl::string_view str) {
function EncodeUTF8 (line 114) | size_t EncodeUTF8(char32 c, char *output) {
function UnicodeCharToUTF8 (line 151) | std::string UnicodeCharToUTF8(const char32 c) { return UnicodeTextTo...
function UnicodeText (line 153) | UnicodeText UTF8ToUnicodeText(absl::string_view utf8) {
function UnicodeTextToUTF8 (line 166) | std::string UnicodeTextToUTF8(const UnicodeText &utext) {
type random (line 177) | namespace random {
type util (line 194) | namespace util {
function StrError (line 196) | std::string StrError(int errnum) {
function StrSplitAsCSV (line 214) | std::vector<std::string> StrSplitAsCSV(absl::string_view text) {
function Utf8ToWide (line 247) | std::wstring Utf8ToWide(absl::string_view input) {
type log_domain (line 262) | namespace log_domain {
function LogSum (line 263) | double LogSum(const std::vector<double> &xs) {
FILE: src/util.h
function namespace (line 36) | namespace sentencepiece {
function namespace (line 54) | namespace string_util {
function namespace (line 207) | namespace port {
function mix (line 259) | inline void mix(uint64_t &a, uint64_t &b, uint64_t &c) { // 64bit version
function FingerprintCat (line 298) | inline uint64_t FingerprintCat(uint64_t x, uint64_t y) {
function namespace (line 306) | namespace random {
function namespace (line 344) | namespace util {
function namespace (line 461) | namespace port {
FILE: src/util_test.cc
type sentencepiece (line 23) | namespace sentencepiece {
function TEST (line 28) | TEST(UtilTest, LexicalCastTest) {
function TEST (line 53) | TEST(UtilTest, Hex) {
function TEST (line 64) | TEST(UtilTest, StringViewTest) {
function TEST (line 69) | TEST(UtilTest, EncodePODTet) {
function TEST (line 114) | TEST(UtilTest, ItoaTest) {
function TEST (line 128) | TEST(UtilTest, OneCharLenTest) {
function TEST (line 133) | TEST(UtilTest, DecodeUTF8Test) {
function TEST (line 228) | TEST(UtilTest, EncodeUTF8Test) {
function TEST (line 257) | TEST(UtilTest, UnicodeCharToUTF8Test) {
function TEST (line 267) | TEST(UtilTest, IsStructurallyValidTest) {
function TEST (line 292) | TEST(UtilTest, UnicodeTextToUTF8Test) {
function TEST (line 305) | TEST(UtilTest, MapUtilTest) {
function TEST (line 324) | TEST(UtilTest, InputOutputBufferTest) {
function TEST (line 351) | TEST(UtilTest, InputOutputBufferInvalidFileTest) {
function TEST (line 356) | TEST(UtilTest, STLDeleteELementsTest) {
function TEST (line 376) | TEST(UtilTest, StatusTest) {
function TEST (line 403) | TEST(UtilTest, JoinPathTest) {
function TEST (line 415) | TEST(UtilTest, ReservoirSamplerTest) {
function TEST (line 425) | TEST(UtilTest, StrSplitAsCSVTest) {
function TEST (line 450) | TEST(SentencePieceTrainerTest, DataDirTest) {
FILE: src/word_model.cc
type sentencepiece (line 18) | namespace sentencepiece {
type word (line 19) | namespace word {
function EncodeResult (line 28) | EncodeResult Model::Encode(absl::string_view normalized) const {
FILE: src/word_model.h
function namespace (line 21) | namespace sentencepiece {
FILE: src/word_model_test.cc
type sentencepiece (line 22) | namespace sentencepiece {
type word (line 23) | namespace word {
function ModelProto (line 29) | ModelProto MakeBaseModelProto() {
function AddPiece (line 45) | void AddPiece(ModelProto *model_proto, const std::string &piece,
function TEST (line 52) | TEST(WordModelTest, EncodeTest) {
function TEST (line 83) | TEST(WordModelTest, NotSupportedTest) {
FILE: src/word_model_trainer.cc
type sentencepiece (line 24) | namespace sentencepiece {
type word (line 25) | namespace word {
FILE: src/word_model_trainer.h
function namespace (line 21) | namespace sentencepiece {
FILE: src/word_model_trainer_test.cc
type sentencepiece (line 26) | namespace sentencepiece {
type word (line 27) | namespace word {
function RunTrainer (line 33) | std::string RunTrainer(const std::vector<std::string> &input, int si...
function TEST (line 75) | TEST(TrainerTest, BasicTest) {
FILE: third_party/absl/container/btree_set.h
function namespace (line 20) | namespace absl {
FILE: third_party/absl/container/flat_hash_map.h
function namespace (line 20) | namespace absl {
FILE: third_party/absl/container/flat_hash_set.h
function namespace (line 20) | namespace absl {
FILE: third_party/absl/flags/flag.cc
type FlagFunc (line 52) | struct FlagFunc {
function FlagMap (line 65) | FlagMap &GetFlagMap() {
function FlagList (line 70) | FlagList &GetFlagList() {
function FlagsUsageConfig (line 80) | FlagsUsageConfig &GetFlagsUsageConfig() {
function CommandLineGetFlag (line 85) | bool CommandLineGetFlag(int argc, char **argv, std::string *key,
function PrintHelp (line 117) | std::string PrintHelp() {
function RegisterFlag (line 133) | void RegisterFlag(const std::string &name, std::shared_ptr<FlagFunc> fun...
function T (line 157) | const T &Flag<T>::value() const {
class Flag<std::string> (line 179) | class Flag<std::string>
class Flag<int32_t> (line 180) | class Flag<int32_t>
class Flag<uint32_t> (line 181) | class Flag<uint32_t>
class Flag<double> (line 182) | class Flag<double>
class Flag<float> (line 183) | class Flag<float>
class Flag<bool> (line 184) | class Flag<bool>
class Flag<int64_t> (line 185) | class Flag<int64_t>
class Flag<uint64_t> (line 186) | class Flag<uint64_t>
function ParseCommandLine (line 188) | std::vector<char *> ParseCommandLine(int argc, char *argv[]) {
function SetProgramUsageMessage (line 236) | void SetProgramUsageMessage(absl::string_view new_usage_message) {
function SetFlagsUsageConfig (line 241) | void SetFlagsUsageConfig(FlagsUsageConfig usage_config) {
FILE: third_party/absl/flags/flag.h
function namespace (line 24) | namespace internal {
function T (line 37) | const T &value() const;
FILE: third_party/absl/flags/parse.h
function namespace (line 20) | namespace absl {
FILE: third_party/absl/flags/usage.h
function namespace (line 20) | namespace absl {
FILE: third_party/absl/flags/usage_config.h
function namespace (line 21) | namespace absl {
FILE: third_party/absl/log/globals.h
function namespace (line 20) | namespace absl {
FILE: third_party/absl/log/log.cc
type absl (line 19) | namespace absl {
function LogSeverityAtLeast (line 24) | LogSeverityAtLeast MinLogLevel() {
function SetMinLogLevel (line 28) | void SetMinLogLevel(LogSeverityAtLeast v) {
FILE: third_party/absl/log/log.h
function namespace (line 22) | namespace absl {
FILE: third_party/absl/strings/ascii.h
function namespace (line 25) | namespace absl {
FILE: third_party/absl/strings/match.h
function namespace (line 23) | namespace absl {
FILE: third_party/absl/strings/numbers.h
function namespace (line 23) | namespace absl {
FILE: third_party/absl/strings/str_cat.h
function namespace (line 25) | namespace absl {
FILE: third_party/absl/strings/str_format.h
function namespace (line 25) | namespace absl {
FILE: third_party/absl/strings/str_join.h
function namespace (line 23) | namespace absl {
FILE: third_party/absl/strings/str_replace.h
function namespace (line 23) | namespace absl {
FILE: third_party/absl/strings/str_split.h
function namespace (line 24) | namespace absl {
FILE: third_party/absl/strings/string_view.h
function namespace (line 33) | namespace absl {
function string_view (line 51) | inline string_view NullSafeStringView(const char* p) {
FILE: third_party/absl/strings/strip.h
function namespace (line 23) | namespace absl {
FILE: third_party/darts_clone/darts.h
function namespace (line 21) | namespace Darts {
type DoubleArrayImpl (line 316) | typedef DoubleArrayImpl<void, void, int, void> DoubleArray;
function namespace (line 549) | namespace Details {
function T (line 607) | const T &operator[](std::size_t id) const {
function clear (line 621) | void clear() {
function push_back (line 628) | void push_back(const T &value) {
function pop_back (line 631) | void pop_back() {
function append (line 635) | void append() {
function append (line 640) | void append(const T &value) {
function resize (line 646) | void resize(std::size_t size) {
function resize (line 657) | void resize(std::size_t size, const T &value) {
function reserve (line 669) | void reserve(std::size_t size) {
function push (line 745) | void push(const T &value) {
function pop (line 748) | void pop() {
function clear (line 752) | void clear() {
function class (line 768) | class BitVector {
function build (line 838) | inline void BitVector::build() {
function char_type (line 866) | const char_type *keys(std::size_t id) const {
function uchar_type (line 869) | uchar_type keys(std::size_t key_id, std::size_t char_id) const {
function value_type (line 892) | const value_type values(std::size_t id) const {
function set_sibling (line 922) | void set_sibling(id_type sibling) {
function set_value (line 925) | void set_value(value_type value) {
function set_label (line 928) | void set_label(uchar_type label) {
function set_is_state (line 931) | void set_is_state(bool is_state) {
function set_has_sibling (line 934) | void set_has_sibling(bool has_sibling) {
function class (line 978) | class DawgUnit {
function class (line 1015) | class DawgBuilder {
function id_type (line 1027) | id_type child(id_type id) const {
function id_type (line 1030) | id_type sibling(id_type id) const {
function value (line 1033) | int value(id_type id) const {
function is_leaf (line 1037) | bool is_leaf(id_type id) const {
function uchar_type (line 1040) | uchar_type label(id_type id) const {
function is_intersection (line 1044) | bool is_intersection(id_type id) const {
function id_type (line 1047) | id_type intersection_id(id_type id) const {
function free_node (line 1097) | void free_node(id_type id) {
function id_type (line 1101) | static id_type hash(id_type key) {
function init (line 1112) | inline void DawgBuilder::init() {
function finish (line 1124) | inline void DawgBuilder::finish() {
function insert (line 1138) | inline void DawgBuilder::insert(const char *key, std::size_t length,
function clear (line 1193) | inline void DawgBuilder::clear() {
function flush (line 1204) | inline void DawgBuilder::flush(id_type id) {
function expand_table (line 1247) | inline void DawgBuilder::expand_table() {
function id_type (line 1262) | inline id_type DawgBuilder::find_unit(id_type id, id_type *hash_id) const {
function id_type (line 1275) | inline id_type DawgBuilder::find_node(id_type node_id,
function are_equal (line 1291) | inline bool DawgBuilder::are_equal(id_type node_id, id_type unit_id) con...
function id_type (line 1312) | inline id_type DawgBuilder::hash_unit(id_type id) const {
function id_type (line 1326) | inline id_type DawgBuilder::hash_node(id_type id) const {
function id_type (line 1336) | inline id_type DawgBuilder::append_unit() {
function id_type (line 1344) | inline id_type DawgBuilder::append_node() {
function class (line 1361) | class DoubleArrayBuilderUnit {
function class (line 1400) | class DoubleArrayBuilderExtraUnit {
function class (line 1444) | class DoubleArrayBuilder {
type DoubleArrayBuilderUnit (line 1467) | typedef DoubleArrayBuilderUnit unit_type;
type DoubleArrayBuilderExtraUnit (line 1468) | typedef DoubleArrayBuilderExtraUnit extra_type;
function extra_type (line 1485) | const extra_type &extras(id_type id) const {
function copy (line 1531) | inline void DoubleArrayBuilder::copy(std::size_t *size_ptr,
function clear (line 1545) | inline void DoubleArrayBuilder::clear() {
function build_from_dawg (line 1566) | inline void DoubleArrayBuilder::build_from_dawg(const DawgBuilder &dawg) {
function build_from_dawg (line 1596) | inline void DoubleArrayBuilder::build_from_dawg(const DawgBuilder &dawg,
function id_type (line 1629) | inline id_type DoubleArrayBuilder::arrange_from_dawg(const DawgBuilder &...
function id_type (line 1767) | inline id_type DoubleArrayBuilder::find_valid_offset(id_type id) const {
function is_valid_offset (line 1784) | inline bool DoubleArrayBuilder::is_valid_offset(id_type id,
function reserve_id (line 1804) | inline void DoubleArrayBuilder::reserve_id(id_type id) {
function expand_units (line 1820) | inline void DoubleArrayBuilder::expand_units() {
function fix_all_blocks (line 1855) | inline void DoubleArrayBuilder::fix_all_blocks() {
function fix_block (line 1867) | inline void DoubleArrayBuilder::fix_block(id_type block_id) {
FILE: third_party/esaxx/esa.hxx
type esaxx_private (line 35) | namespace esaxx_private {
function index_type (line 37) | index_type suffixtree(string_type T, sarray_type SA, sarray_type L, sa...
function esaxx (line 113) | int esaxx(string_type T, sarray_type SA, sarray_type L, sarray_type R, s...
FILE: third_party/esaxx/sais.hxx
type saisxx_private (line 42) | namespace saisxx_private {
function getCounts (line 46) | void
function getBuckets (line 77) | void
function induceSA (line 87) | void
function computeBWT (line 120) | int
function suffixsort (line 161) | int
function saisxx (line 325) | int
function index_type (line 345) | index_type
FILE: third_party/protobuf-lite/arena.cc
type google (line 48) | namespace google {
type protobuf (line 49) | namespace protobuf {
type internal (line 54) | namespace internal {
function ArenaFree (line 75) | void ArenaFree(void* object, size_t size) {
function uint64 (line 190) | uint64 ArenaImpl::Reset() {
function PROTOBUF_NOINLINE (line 263) | PROTOBUF_NOINLINE
function PROTOBUF_NOINLINE (line 298) | PROTOBUF_NOINLINE
function PROTOBUF_NOINLINE (line 303) | PROTOBUF_NOINLINE
function PROTOBUF_NOINLINE (line 310) | PROTOBUF_NOINLINE
function PROTOBUF_NOINLINE (line 315) | PROTOBUF_NOINLINE
function uint64 (line 331) | uint64 ArenaImpl::SpaceAllocated() const {
function uint64 (line 335) | uint64 ArenaImpl::SpaceUsed() const {
function uint64 (line 348) | uint64 SerialArena::SpaceUsed() const {
function SerialArena (line 396) | SerialArena* SerialArena::New(Block* b, void* owner, ArenaImpl* ar...
function PROTOBUF_NOINLINE (line 412) | PROTOBUF_NOINLINE
FILE: third_party/protobuf-lite/arenastring.cc
type google (line 46) | namespace google {
type protobuf (line 47) | namespace protobuf {
type internal (line 48) | namespace internal {
FILE: third_party/protobuf-lite/bytestream.cc
type google (line 38) | namespace google {
type protobuf (line 39) | namespace protobuf {
type strings (line 40) | namespace strings {
function StringPiece (line 152) | StringPiece ArrayByteSource::Peek() {
function StringPiece (line 175) | StringPiece LimitByteSource::Peek() {
FILE: third_party/protobuf-lite/coded_stream.cc
type google (line 59) | namespace google {
type protobuf (line 60) | namespace protobuf {
type io (line 61) | namespace io {
function NextNonEmpty (line 69) | inline bool NextNonEmpty(ZeroCopyInputStream* input, const void** ...
function uint8 (line 351) | const uint8* DecodeVarint64KnownSize(const uint8* buffer, uint64* ...
function ReadVarint32FromArray (line 369) | inline ::std::pair<bool, const uint8*> ReadVarint32FromArray(
function ReadVarint64FromArray (line 414) | inline ::std::pair<bool, const uint8*> ReadVarint64FromArray(
function int64 (line 457) | int64 CodedInputStream::ReadVarint32Fallback(uint32 first_byte_or_...
function uint32 (line 505) | uint32 CodedInputStream::ReadTagSlow() {
function uint32 (line 531) | uint32 CodedInputStream::ReadTagFallback(uint32 first_byte_or_zero) {
function int64 (line 673) | int64 EpsCopyOutputStream::ByteCount(uint8* ptr) const {
function uint8 (line 704) | uint8* EpsCopyOutputStream::Trim(uint8* ptr) {
function uint8 (line 714) | uint8* EpsCopyOutputStream::FlushAndResetBuffer(uint8* ptr) {
function uint8 (line 766) | uint8* EpsCopyOutputStream::GetDirectBufferForNBytesAndAdvance(int...
function uint8 (line 787) | uint8* EpsCopyOutputStream::Next() {
function uint8 (line 825) | uint8* EpsCopyOutputStream::EnsureSpaceFallback(uint8* ptr) {
function uint8 (line 837) | uint8* EpsCopyOutputStream::WriteRawFallback(const void* data, int...
function uint8 (line 851) | uint8* EpsCopyOutputStream::WriteAliasedRaw(const void* data, int ...
function uint8 (line 864) | uint8* EpsCopyOutputStream::WriteRawLittleEndian32(const void* dat...
function uint8 (line 887) | uint8* EpsCopyOutputStream::WriteRawLittleEndian64(const void* dat...
function uint8 (line 912) | uint8* EpsCopyOutputStream::WriteStringMaybeAliasedOutline(uint32 ...
function uint8 (line 921) | uint8* EpsCopyOutputStream::WriteStringOutline(uint32 num, const s...
function uint8 (line 947) | uint8* CodedOutputStream::WriteStringWithSizeToArray(const std::st...
FILE: third_party/protobuf-lite/common.cc
type google (line 66) | namespace google {
type protobuf (line 67) | namespace protobuf {
type internal (line 69) | namespace internal {
function VerifyVersion (line 71) | void VerifyVersion(int headerVersion,
function VersionString (line 99) | std::string VersionString(int version) {
function DefaultLogHandler (line 129) | inline void DefaultLogHandler(LogLevel level, const char* filename...
function DefaultLogHandler (line 164) | void DefaultLogHandler(LogLevel level, const char* filename, int l...
function NullLogHandler (line 179) | void NullLogHandler(LogLevel /* level */, const char* /* filename */,
function LogMessage (line 187) | LogMessage& LogMessage::operator<<(const std::string& value) {
function LogMessage (line 192) | LogMessage& LogMessage::operator<<(const char* value) {
function LogMessage (line 197) | LogMessage& LogMessage::operator<<(const StringPiece& value) {
function LogMessage (line 202) | LogMessage& LogMessage::operator<<(const util::Status& status) {
function LogMessage (line 207) | LogMessage& LogMessage::operator<<(const uint128& value) {
type internal (line 126) | namespace internal {
function VerifyVersion (line 71) | void VerifyVersion(int headerVersion,
function VersionString (line 99) | std::string VersionString(int version) {
function DefaultLogHandler (line 129) | inline void DefaultLogHandler(LogLevel level, const char* filename...
function DefaultLogHandler (line 164) | void DefaultLogHandler(LogLevel level, const char* filename, int l...
function NullLogHandler (line 179) | void NullLogHandler(LogLevel /* level */, const char* /* filename */,
function LogMessage (line 187) | LogMessage& LogMessage::operator<<(const std::string& value) {
function LogMessage (line 192) | LogMessage& LogMessage::operator<<(const char* value) {
function LogMessage (line 197) | LogMessage& LogMessage::operator<<(const StringPiece& value) {
function LogMessage (line 202) | LogMessage& LogMessage::operator<<(const util::Status& status) {
function LogMessage (line 207) | LogMessage& LogMessage::operator<<(const uint128& value) {
function LogHandler (line 272) | LogHandler* SetLogHandler(LogHandler* new_func) {
type internal (line 298) | namespace internal { FunctionClosure0::~FunctionClosure0() {} }
function DoNothing (line 300) | void DoNothing() {}
function uint32 (line 308) | uint32 ghtonl(uint32 x) {
FILE: third_party/protobuf-lite/extension_set.cc
type google (line 53) | namespace google {
type protobuf (line 54) | namespace protobuf {
type internal (line 55) | namespace internal {
function real_type (line 59) | inline WireFormatLite::FieldType real_type(FieldType type) {
function cpp_type (line 64) | inline WireFormatLite::CppType cpp_type(FieldType type) {
function is_packable (line 68) | inline bool is_packable(WireFormatLite::WireType type) {
type ExtensionHasher (line 87) | struct ExtensionHasher {
function Register (line 102) | void Register(const MessageLite* containing_type, int number,
function ExtensionInfo (line 114) | const ExtensionInfo* FindRegisteredExtension(const MessageLite* co...
function CallNoArgValidityFunc (line 147) | static bool CallNoArgValidityFunc(const void* arg, int number) {
function FieldType (line 250) | FieldType ExtensionSet::ExtensionType(int number) const {
function MessageLite (line 564) | const MessageLite& ExtensionSet::GetMessage(
function MessageLite (line 585) | MessageLite* ExtensionSet::MutableMessage(int number, FieldType type,
function MessageLite (line 686) | MessageLite* ExtensionSet::ReleaseMessage(int number,
function MessageLite (line 715) | MessageLite* ExtensionSet::UnsafeArenaReleaseMessage(
function MessageLite (line 741) | const MessageLite& ExtensionSet::GetRepeatedMessage(int number,
function MessageLite (line 749) | MessageLite* ExtensionSet::MutableRepeatedMessage(int number, int ...
function MessageLite (line 756) | MessageLite* ExtensionSet::AddMessage(int number, FieldType type,
function MessageLite (line 828) | MessageLite* ExtensionSet::ReleaseLast(int number) {
function SizeOfUnion (line 884) | size_t SizeOfUnion(ItX it_xs, ItX end_xs, ItY it_ys, ItY end_ys) {
type MSLite (line 1428) | struct MSLite {
method ParseField (line 1429) | bool ParseField(int type_id, io::CodedInputStream* input) {
method SkipField (line 1435) | bool SkipField(uint32 tag, io::CodedInputStream* input) {
function uint8 (line 1458) | uint8* ExtensionSet::_InternalSerialize(int start_field_number,
function uint8 (line 1480) | uint8* ExtensionSet::InternalSerializeMessageSetWithCachedSizesToA...
function RepeatedPrimitiveDefaults (line 1914) | const RepeatedPrimitiveDefaults* RepeatedPrimitiveDefaults::defaul...
function uint8 (line 1925) | uint8* ExtensionSet::Extension::InternalSerializeFieldWithCachedSi...
function uint8 (line 2071) | uint8*
FILE: third_party/protobuf-lite/generated_enum_util.cc
type google (line 37) | namespace google {
type protobuf (line 38) | namespace protobuf {
type internal (line 39) | namespace internal {
function EnumCompareByName (line 42) | bool EnumCompareByName(const EnumEntry& a, const EnumEntry& b) {
function GetValue (line 49) | int GetValue(const EnumEntry* enums, int i, int target) {
function LookUpEnumValue (line 59) | bool LookUpEnumValue(const EnumEntry* enums, size_t size,
function LookUpEnumName (line 70) | int LookUpEnumName(const EnumEntry* enums, const int* sorted_indices,
function InitializeEnumStrings (line 83) | bool InitializeEnumStrings(
FILE: third_party/protobuf-lite/generated_message_table_driven_lite.cc
type google (line 40) | namespace google {
type protobuf (line 41) | namespace protobuf {
type internal (line 42) | namespace internal {
type UnknownFieldHandlerLite (line 51) | struct UnknownFieldHandlerLite {
method IsLite (line 54) | static constexpr bool IsLite() { return true; }
method Skip (line 56) | static bool Skip(MessageLite* msg, const ParseTable& table,
method Varint (line 67) | static void Varint(MessageLite* msg, const ParseTable& table, in...
method ParseExtension (line 78) | static bool ParseExtension(MessageLite* msg, const ParseTable& t...
function MergePartialFromCodedStreamLite (line 98) | bool MergePartialFromCodedStreamLite(MessageLite* msg, const Parse...
FILE: third_party/protobuf-lite/generated_message_util.cc
type google (line 60) | namespace google {
type protobuf (line 61) | namespace protobuf {
type internal (line 62) | namespace internal {
function DestroyMessage (line 64) | void DestroyMessage(const void* message) {
function DestroyString (line 67) | void DestroyString(const void* s) {
function InitProtobufDefaultsImpl (line 76) | static bool InitProtobufDefaultsImpl() {
function InitProtobufDefaultsSlow (line 89) | void InitProtobufDefaultsSlow() {
function StringSpaceUsedExcludingSelfLong (line 94) | size_t StringSpaceUsedExcludingSelfLong(const std::string& str) {
function T (line 106) | const T& Get(const void* ptr) {
type PrimitiveTypeHelper (line 115) | struct PrimitiveTypeHelper
type PrimitiveTypeHelper<WireFormatLite::TYPE_BOOL> (line 118) | struct PrimitiveTypeHelper<WireFormatLite::TYPE_BOOL> {
method Serialize (line 120) | static void Serialize(const void* ptr, io::CodedOutputStream* ou...
method uint8 (line 123) | static uint8* SerializeToArray(const void* ptr, uint8* buffer) {
type PrimitiveTypeHelper<WireFormatLite::TYPE_INT32> (line 129) | struct PrimitiveTypeHelper<WireFormatLite::TYPE_INT32> {
method Serialize (line 131) | static void Serialize(const void* ptr, io::CodedOutputStream* ou...
method uint8 (line 134) | static uint8* SerializeToArray(const void* ptr, uint8* buffer) {
type PrimitiveTypeHelper<WireFormatLite::TYPE_SINT32> (line 140) | struct PrimitiveTypeHelper<WireFormatLite::TYPE_SINT32> {
method Serialize (line 142) | static void Serialize(const void* ptr, io::CodedOutputStream* ou...
method uint8 (line 145) | static uint8* SerializeToArray(const void* ptr, uint8* buffer) {
type PrimitiveTypeHelper<WireFormatLite::TYPE_UINT32> (line 151) | struct PrimitiveTypeHelper<WireFormatLite::TYPE_UINT32> {
method Serialize (line 153) | static void Serialize(const void* ptr, io::CodedOutputStream* ou...
method uint8 (line 156) | static uint8* SerializeToArray(const void* ptr, uint8* buffer) {
type PrimitiveTypeHelper<WireFormatLite::TYPE_INT64> (line 161) | struct PrimitiveTypeHelper<WireFormatLite::TYPE_INT64> {
method Serialize (line 163) | static void Serialize(const void* ptr, io::CodedOutputStream* ou...
method uint8 (line 166) | static uint8* SerializeToArray(const void* ptr, uint8* buffer) {
type PrimitiveTypeHelper<WireFormatLite::TYPE_SINT64> (line 172) | struct PrimitiveTypeHelper<WireFormatLite::TYPE_SINT64> {
method Serialize (line 174) | static void Serialize(const void* ptr, io::CodedOutputStream* ou...
method uint8 (line 177) | static uint8* SerializeToArray(const void* ptr, uint8* buffer) {
type PrimitiveTypeHelper<WireFormatLite::TYPE_UINT64> (line 182) | struct PrimitiveTypeHelper<WireFormatLite::TYPE_UINT64> {
method Serialize (line 184) | static void Serialize(const void* ptr, io::CodedOutputStream* ou...
method uint8 (line 187) | static uint8* SerializeToArray(const void* ptr, uint8* buffer) {
type PrimitiveTypeHelper<WireFormatLite::TYPE_FIXED32> (line 193) | struct PrimitiveTypeHelper<WireFormatLite::TYPE_FIXED32> {
method Serialize (line 195) | static void Serialize(const void* ptr, io::CodedOutputStream* ou...
method uint8 (line 198) | static uint8* SerializeToArray(const void* ptr, uint8* buffer) {
type PrimitiveTypeHelper<WireFormatLite::TYPE_FIXED64> (line 204) | struct PrimitiveTypeHelper<WireFormatLite::TYPE_FIXED64> {
method Serialize (line 206) | static void Serialize(const void* ptr, io::CodedOutputStream* ou...
method uint8 (line 209) | static uint8* SerializeToArray(const void* ptr, uint8* buffer) {
type PrimitiveTypeHelper<WireFormatLite::TYPE_ENUM> (line 215) | struct PrimitiveTypeHelper<WireFormatLite::TYPE_ENUM>
type PrimitiveTypeHelper<WireFormatLite::TYPE_SFIXED32> (line 219) | struct PrimitiveTypeHelper<WireFormatLite::TYPE_SFIXED32>
type PrimitiveTypeHelper<WireFormatLite::TYPE_SFIXED64> (line 224) | struct PrimitiveTypeHelper<WireFormatLite::TYPE_SFIXED64>
type PrimitiveTypeHelper<WireFormatLite::TYPE_FLOAT> (line 229) | struct PrimitiveTypeHelper<WireFormatLite::TYPE_FLOAT>
type PrimitiveTypeHelper<WireFormatLite::TYPE_DOUBLE> (line 234) | struct PrimitiveTypeHelper<WireFormatLite::TYPE_DOUBLE>
type PrimitiveTypeHelper<WireFormatLite::TYPE_STRING> (line 240) | struct PrimitiveTypeHelper<WireFormatLite::TYPE_STRING> {
method Serialize (line 242) | static void Serialize(const void* ptr, io::CodedOutputStream* ou...
method uint8 (line 247) | static uint8* SerializeToArray(const void* ptr, uint8* buffer) {
type PrimitiveTypeHelper<WireFormatLite::TYPE_BYTES> (line 254) | struct PrimitiveTypeHelper<WireFormatLite::TYPE_BYTES>
type OutputHelper (line 262) | struct OutputHelper
function SerializeTo (line 265) | void SerializeTo(const void* ptr, O* output) {
function WriteTagTo (line 270) | void WriteTagTo(uint32 tag, O* output) {
function WriteLengthTo (line 275) | void WriteLengthTo(uint32 length, O* output) {
type OutputHelper<io::CodedOutputStream, type> (line 281) | struct OutputHelper<io::CodedOutputStream, type> {
method Serialize (line 282) | static void Serialize(const void* ptr, io::CodedOutputStream* ou...
type ArrayOutput (line 288) | struct ArrayOutput {
type OutputHelper<ArrayOutput, type> (line 294) | struct OutputHelper<ArrayOutput, type> {
method Serialize (line 295) | static void Serialize(const void* ptr, ArrayOutput* output) {
function SerializeMessageNoTable (line 300) | void SerializeMessageNoTable(const MessageLite* msg,
function SerializeMessageNoTable (line 305) | void SerializeMessageNoTable(const MessageLite* msg, ArrayOutput* ...
function SerializeMessageDispatch (line 314) | void SerializeMessageDispatch(const MessageLite& msg,
function SerializeMessageDispatch (line 323) | void SerializeMessageDispatch(const MessageLite& msg,
function SerializeMessageTo (line 334) | void SerializeMessageTo(const MessageLite* msg, const void* table_...
function SerializeGroupTo (line 355) | void SerializeGroupTo(const MessageLite* msg, const void* table_ptr,
type SingularFieldHelper (line 373) | struct SingularFieldHelper {
method Serialize (line 375) | static void Serialize(const void* field, const FieldMetadata& md...
type SingularFieldHelper<WireFormatLite::TYPE_STRING> (line 382) | struct SingularFieldHelper<WireFormatLite::TYPE_STRING> {
method Serialize (line 384) | static void Serialize(const void* field, const FieldMetadata& md...
type SingularFieldHelper<WireFormatLite::TYPE_BYTES> (line 392) | struct SingularFieldHelper<WireFormatLite::TYPE_BYTES>
type SingularFieldHelper<WireFormatLite::TYPE_GROUP> (line 396) | struct SingularFieldHelper<WireFormatLite::TYPE_GROUP> {
method Serialize (line 398) | static void Serialize(const void* field, const FieldMetadata& md...
type SingularFieldHelper<WireFormatLite::TYPE_MESSAGE> (line 407) | struct SingularFieldHelper<WireFormatLite::TYPE_MESSAGE> {
method Serialize (line 409) | static void Serialize(const void* field, const FieldMetadata& md...
type RepeatedFieldHelper (line 417) | struct RepeatedFieldHelper {
method Serialize (line 419) | static void Serialize(const void* field, const FieldMetadata& md...
class AccessorHelper (line 430) | class AccessorHelper {
method Size (line 432) | static int Size(const RepeatedPtrFieldBase& x) { return x.size(); }
type RepeatedFieldHelper<WireFormatLite::TYPE_STRING> (line 439) | struct RepeatedFieldHelper<WireFormatLite::TYPE_STRING> {
method Serialize (line 441) | static void Serialize(const void* field, const FieldMetadata& md...
type RepeatedFieldHelper<WireFormatLite::TYPE_BYTES> (line 453) | struct RepeatedFieldHelper<WireFormatLite::TYPE_BYTES>
type RepeatedFieldHelper<WireFormatLite::TYPE_GROUP> (line 457) | struct RepeatedFieldHelper<WireFormatLite::TYPE_GROUP> {
method Serialize (line 459) | static void Serialize(const void* field, const FieldMetadata& md...
type RepeatedFieldHelper<WireFormatLite::TYPE_MESSAGE> (line 473) | struct RepeatedFieldHelper<WireFormatLite::TYPE_MESSAGE> {
method Serialize (line 475) | static void Serialize(const void* field, const FieldMetadata& md...
type PackedFieldHelper (line 489) | struct PackedFieldHelper {
method Serialize (line 491) | static void Serialize(const void* field, const FieldMetadata& md...
type PackedFieldHelper<WireFormatLite::TYPE_STRING> (line 506) | struct PackedFieldHelper<WireFormatLite::TYPE_STRING> {
method Serialize (line 508) | static void Serialize(const void* field, const FieldMetadata& md...
type PackedFieldHelper<WireFormatLite::TYPE_BYTES> (line 515) | struct PackedFieldHelper<WireFormatLite::TYPE_BYTES>
type PackedFieldHelper<WireFormatLite::TYPE_GROUP> (line 518) | struct PackedFieldHelper<WireFormatLite::TYPE_GROUP>
type PackedFieldHelper<WireFormatLite::TYPE_MESSAGE> (line 521) | struct PackedFieldHelper<WireFormatLite::TYPE_MESSAGE>
type OneOfFieldHelper (line 525) | struct OneOfFieldHelper {
method Serialize (line 527) | static void Serialize(const void* field, const FieldMetadata& md...
function SerializeNotImplemented (line 533) | void SerializeNotImplemented(int field) {
function IsNull (line 547) | bool IsNull(const void* ptr) {
function SerializeInternal (line 594) | void SerializeInternal(const uint8* base,
function uint8 (line 635) | uint8
Copy disabled (too large)
Download .json
Condensed preview — 261 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (48,634K chars).
[
{
"path": ".github/ISSUE_TEMPLATE/bug_report.md",
"chars": 931,
"preview": "---\nname: Bug report\nabout: Create a report to help us improve\ntitle: ''\nlabels: bug\nassignees: ''\n\n---\n\n**Describe the "
},
{
"path": ".github/ISSUE_TEMPLATE/feature_request.md",
"chars": 505,
"preview": "---\nname: Feature request\nabout: Suggest an idea for this project\ntitle: ''\nlabels: feature request\nassignees: ''\n\n---\n\n"
},
{
"path": ".github/dependabot.yml",
"chars": 697,
"preview": "# To get started with Dependabot version updates, you'll need to specify which\n# package ecosystems to update and where "
},
{
"path": ".github/pull_request_template.md",
"chars": 2721,
"preview": "## Thank you for your contribution\n\nWe sincerely appreciate your interest in contributing to this project. Our goal is t"
},
{
"path": ".github/workflows/cifuzz.yml",
"chars": 743,
"preview": "name: CIFuzz\non: [pull_request]\n\npermissions:\n contents: read\n\njobs:\n Fuzzing:\n runs-on: ubuntu-latest\n steps:\n "
},
{
"path": ".github/workflows/cmake.yml",
"chars": 2483,
"preview": "name: CI for general build\n\non:\n push:\n branches: [ master ]\n tags:\n - 'v*'\n pull_request:\n branches: [ "
},
{
"path": ".github/workflows/cross_build.yml",
"chars": 1762,
"preview": "name: CrossBuild\n\non:\n push:\n branches: [ master ]\n tags:\n - 'v*'\n pull_request:\n branches: [ master ]\n "
},
{
"path": ".github/workflows/requirements/base.in",
"chars": 57,
"preview": "pip\nsetuptools\nwheel\ntwine\npytest\nbuild\npackaging >= 25.0"
},
{
"path": ".github/workflows/requirements/base.txt",
"chars": 27973,
"preview": "#\n# This file is autogenerated by pip-compile with Python 3.11\n# by the following command:\n#\n# pip-compile --allow-un"
},
{
"path": ".github/workflows/requirements/cibuildwheel.in",
"chars": 25,
"preview": "-c base.txt\ncibuildwheel\n"
},
{
"path": ".github/workflows/requirements/cibuildwheel.txt",
"chars": 3896,
"preview": "#\n# This file is autogenerated by pip-compile with Python 3.11\n# by the following command:\n#\n# pip-compile --allow-un"
},
{
"path": ".github/workflows/wheel.yml",
"chars": 7143,
"preview": "name: Build Wheels\n\non:\n push:\n branches: [ master ]\n tags:\n - 'v*'\n pull_request:\n branches: [ master ]"
},
{
"path": ".gitignore",
"chars": 794,
"preview": "Makefile\nMakefile.in\n/ar-lib\n/mdate-sh\n/py-compile\n/test-driver\n/ylwrap\n/build\n\n/autom4te.cache\n/autoscan.log\n/autoscan-"
},
{
"path": "CMakeLists.txt",
"chars": 8162,
"preview": "# Copyright 2018 Google Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this "
},
{
"path": "CONTRIBUTING.md",
"chars": 1448,
"preview": "Want to contribute? Great! First, read this page (including the small print at the end).\n\n### Before you contribute\nBefo"
},
{
"path": "LICENSE",
"chars": 11358,
"preview": "\n Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README.md",
"chars": 18505,
"preview": "# SentencePiece\n\n[](https://g"
},
{
"path": "VERSION.txt",
"chars": 6,
"preview": "0.2.2\n"
},
{
"path": "cmake/ios.toolchain.cmake",
"chars": 46672,
"preview": "# This file is part of the ios-cmake project. It was retrieved from\n# https://github.com/leetal/ios-cmake.git, which is "
},
{
"path": "config.h.in",
"chars": 213,
"preview": "#ifndef CONFIG_H_\n#define CONFIG_H_\n\n#define VERSION \"@PROJECT_VERSION@\"\n#define PACKAGE \"@PROJECT_NAME@\"\n#define PACKAG"
},
{
"path": "contrib/docker/Dockerfile",
"chars": 621,
"preview": "FROM alpine:3.23 AS build\n\nRUN apk add --no-cache \\\n cmake \\\n make \\\n g++ \\\n pkgconf \\\n && apk add --no-c"
},
{
"path": "contrib/docker/README.md",
"chars": 673,
"preview": "# Dockerfile\n\nThis directory contains a convenient Dockerfile to build and run sentencepiece C++ command line tools in a"
},
{
"path": "data/Scripts.txt",
"chars": 160022,
"preview": "# Scripts-9.0.0.txt\n# Date: 2016-06-01, 10:34:37 GMT\n# © 2016 Unicode®, Inc.\n# Unicode and the Unicode Logo are register"
},
{
"path": "data/botchan.txt",
"chars": 278777,
"preview": "Project Gutenberg's Botchan (Master Darling), by Kin-nosuke Natsume\r\nThis eBook is for the use of anyone anywhere at no"
},
{
"path": "data/extract_headers.pl",
"chars": 1068,
"preview": "#!/usr/bin/perl\n\n# Copyright 2018 Google Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you "
},
{
"path": "data/gen_spec_parser.pl",
"chars": 6066,
"preview": "#!/usr/bin/perl\n\n# Copyright 2018 Google Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you "
},
{
"path": "data/gen_unicode_scripts_code.pl",
"chars": 1507,
"preview": "#!/usr/bin/perl\n\n# Copyright 2016 Google Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you "
},
{
"path": "data/ids_denorm.tsv",
"chars": 1031580,
"preview": "U+2FF0 U+5FC4 U+2FF3 U+4E3F U+2FF4 U+53E3 U+30BF U+5FC3\tU+6181\t# ⿰忄⿳丿⿴口タ心 => 憁\nU+2FF0 U+2FF1 U+2FF1 U+2E8C U+4E00 U+513F"
},
{
"path": "data/ids_norm.tsv",
"chars": 1031580,
"preview": "U+6181\tU+2FF0 U+5FC4 U+2FF3 U+4E3F U+2FF4 U+53E3 U+30BF U+5FC3\t# 憁 => ⿰忄⿳丿⿴口タ心\nU+8000\tU+2FF0 U+2FF1 U+2FF1 U+2E8C U+4E00"
},
{
"path": "data/nfc.tsv",
"chars": 400633,
"preview": "3C 338\t226E\t# ≮ => ≮\n3D 338\t2260\t# ≠ => ≠\n3E 338\t226F\t# ≯ => ≯\n41 300\tC0\t# À => À\n41 301\tC1\t# Á => Á\n41 302\tC2\t# Â"
},
{
"path": "data/nfc_cf.tsv",
"chars": 487383,
"preview": "41\t61\t# A => a\n42\t62\t# B => b\n43\t63\t# C => c\n44\t64\t# D => d\n45\t65\t# E => e\n46\t66\t# F => f\n47\t67\t# G => g\n48\t68\t# H => h\n"
},
{
"path": "data/nfd.tsv",
"chars": 389378,
"preview": "C0\t41 300\t# À => À\nC1\t41 301\t# Á => Á\nC2\t41 302\t# Â => Â\nC3\t41 303\t# Ã => Ã\nC4\t41 308\t# Ä => Ä\nC5\t41 30A\t# Å => Å\n"
},
{
"path": "data/nfd_cf.tsv",
"chars": 410132,
"preview": "41\t61\t# A => a\n42\t62\t# B => b\n43\t63\t# C => c\n44\t64\t# D => d\n45\t65\t# E => e\n46\t66\t# F => f\n47\t67\t# G => g\n48\t68\t# H => h\n"
},
{
"path": "data/nfkc.tsv",
"chars": 6830141,
"preview": "3C 338\t226E\t# ≮ => ≮\n3D 338\t2260\t# ≠ => ≠\n3E 338\t226F\t# ≯ => ≯\n41 300\tC0\t# À => À\n41 301\tC1\t# Á => Á\n41 302\tC2\t# Â"
},
{
"path": "data/nfkc_cf.tsv",
"chars": 6855850,
"preview": "3C 338\t226E\t# ≮ => ≮\n3D 338\t2260\t# ≠ => ≠\n3E 338\t226F\t# ≯ => ≯\n41\t61\t# A => a\n41 300\tE0\t# À => à\n41 301\tE1\t# Á => á"
},
{
"path": "data/nfkd.tsv",
"chars": 468294,
"preview": "A0\t20\t# => \nA8\t20 308\t# ¨ => ̈\nAA\t61\t# ª => a\nAF\t20 304\t# ¯ => ̄\nB2\t32\t# ² => 2\nB3\t33\t# ³ => 3\nB4\t20 301\t# ´ => ́\n"
},
{
"path": "data/nfkd_cf.tsv",
"chars": 487383,
"preview": "41\t61\t# A => a\n42\t62\t# B => b\n43\t63\t# C => c\n44\t64\t# D => d\n45\t65\t# E => e\n46\t66\t# F => f\n47\t67\t# G => g\n48\t68\t# H => h\n"
},
{
"path": "data/nmt_nfkc.tsv",
"chars": 6830699,
"preview": "1\t\t# \u0001 => \n2\t\t# \u0002 => \n3\t\t# \u0003 => \n4\t\t# \u0004 => \n5\t\t# \u0005 => \n6\t\t# \u0006 => \n7\t\t# \u0007 => \n8\t\t# => \n9\t20\t# \t => \nA\t20\t# => \nB\t\t#"
},
{
"path": "data/nmt_nfkc_cf.tsv",
"chars": 6856408,
"preview": "1\t\t# \u0001 => \n2\t\t# \u0002 => \n3\t\t# \u0003 => \n4\t\t# \u0004 => \n5\t\t# \u0005 => \n6\t\t# \u0006 => \n7\t\t# \u0007 => \n8\t\t# => \n9\t20\t# \t => \nA\t20\t# => \nB\t\t#"
},
{
"path": "data/wagahaiwa_nekodearu.txt",
"chars": 377212,
"preview": "吾輩は猫である\r\n夏目漱石\r\n-------------------------------------------------------\r\n【テキスト中に現れる記号について】\r\n《》:ルビ\r\n(例)吾輩《わがはい》は猫である\r\n|:ルビ"
},
{
"path": "doc/api.md",
"chars": 4961,
"preview": "# SentencePieceProcessor C++ API\n\n## Load SentencePiece model\nTo start working with the SentencePiece model, you will wa"
},
{
"path": "doc/experiments.md",
"chars": 9085,
"preview": "# SentencePiece Experiments\n\n## Experiments 1 (subword vs word-based model)\n### Experimental settings\n\n* Segmentation "
},
{
"path": "doc/normalization.md",
"chars": 3181,
"preview": "# Use custom normalization rule\nBy default, SentencePiece normalizes the input sentence with a variant of Unicode\n[NFKC]"
},
{
"path": "doc/options.md",
"chars": 5495,
"preview": "# Training options\n\nThe training options for the `spm_train` can be listed using `spm_train --help`. Since the standard "
},
{
"path": "doc/special_symbols.md",
"chars": 1095,
"preview": "# Use custom symbols\nSentencePiece model supports two types of special symbols.\n\n## Control symbol\nControl symbols are u"
},
{
"path": "python/.gitignore",
"chars": 75,
"preview": "/*.so\n/build\n/*.pickle\n/m*.model\n/m*.vocab\n/src/sentencepiece/package_data\n"
},
{
"path": "python/MANIFEST.in",
"chars": 136,
"preview": "recursive-include test *.py *.model botchan.txt\nrecursive-include src *\nrecursive-include sentencepiece *\ninclude *.md b"
},
{
"path": "python/README.md",
"chars": 8456,
"preview": "# SentencePiece Python Wrapper\n\nPython wrapper for SentencePiece. This API will offer the encoding, decoding and trainin"
},
{
"path": "python/add_new_vocab.ipynb",
"chars": 2817,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"#### You can add new special tokens"
},
{
"path": "python/build_bundled.sh",
"chars": 657,
"preview": "#!/bin/sh\n\nVERSION=\"$1\"\n\nmkdir -p build\n\nBUILD_DIR=./build\nINSTALL_DIR=./build/root\n\nif [ -f ./sentencepiece/src/CMakeLi"
},
{
"path": "python/build_sdist.sh",
"chars": 256,
"preview": "#!/bin/sh\n\nmkdir -p sentencepiece\n\nfor i in CMakeLists.txt LICENSE README.md VERSION.txt cmake config.h.in sentencepiec"
},
{
"path": "python/pyproject.toml",
"chars": 1551,
"preview": "[build-system]\nrequires = [\"setuptools>=61.0\", \"wheel\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[project]\nname = \"sente"
},
{
"path": "python/sentencepiece_python_module_example.ipynb",
"chars": 45596,
"preview": "{\n \"nbformat\": 4,\n \"nbformat_minor\": 0,\n \"metadata\": {\n \"colab\": {\n \"name\": \"Sentencepiece python module exam"
},
{
"path": "python/setup.cfg",
"chars": 40,
"preview": "[metadata]\ndescription_file = README.md\n"
},
{
"path": "python/setup.py",
"chars": 7290,
"preview": "#!/usr/bin/env python\n\n# Copyright 2018 Google Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n"
},
{
"path": "python/src/sentencepiece/__init__.py",
"chars": 49422,
"preview": "# This file was automatically generated by SWIG (https://www.swig.org).\n# Version 4.3.0\n#\n# Do not make changes to this "
},
{
"path": "python/src/sentencepiece/_version.py",
"chars": 22,
"preview": "__version__ = '0.2.2'\n"
},
{
"path": "python/src/sentencepiece/sentencepiece.i",
"chars": 72647,
"preview": "%module sentencepiece\n%include exception.i\n\n%{\n\n#include <atomic>\n#include <iostream>\n#include <algorithm>\n#include <fun"
},
{
"path": "python/src/sentencepiece/sentencepiece_model_pb2.py",
"chars": 6257,
"preview": "# -*- coding: utf-8 -*-\n# Generated by the protocol buffer compiler. DO NOT EDIT!\n# source: sentencepiece_model.proto\n\""
},
{
"path": "python/src/sentencepiece/sentencepiece_pb2.py",
"chars": 1753,
"preview": "# -*- coding: utf-8 -*-\n# Generated by the protocol buffer compiler. DO NOT EDIT!\n# source: sentencepiece.proto\n\"\"\"Gene"
},
{
"path": "python/src/sentencepiece/sentencepiece_wrap.cxx",
"chars": 381494,
"preview": "/* ----------------------------------------------------------------------------\n * This file was automatically generated"
},
{
"path": "python/test/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "python/test/botchan.txt",
"chars": 278777,
"preview": "Project Gutenberg's Botchan (Master Darling), by Kin-nosuke Natsume\r\nThis eBook is for the use of anyone anywhere at no"
},
{
"path": "python/test/sentencepiece_test.py",
"chars": 32777,
"preview": "#!/usr/bin/python\n# -*- coding: utf-8 -*-\n\n# Copyright 2018 Google Inc.\n#\n# Licensed under the Apache License, Version 2"
},
{
"path": "sentencepiece.pc.in",
"chars": 401,
"preview": "prefix=@prefix@\nexec_prefix=@exec_prefix@\nlibdir=@libdir_for_pc_file@\nincludedir=@includedir_for_pc_file@\ndatadir=@datad"
},
{
"path": "src/CMakeLists.txt",
"chars": 12428,
"preview": "# Copyright 2018 Google Inc.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this "
},
{
"path": "src/bpe_model.cc",
"chars": 6591,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/bpe_model.h",
"chars": 1748,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/bpe_model_test.cc",
"chars": 9284,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/bpe_model_trainer.cc",
"chars": 10630,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/bpe_model_trainer.h",
"chars": 4616,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/bpe_model_trainer_test.cc",
"chars": 4465,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/builder.cc",
"chars": 22287,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/builder.h",
"chars": 5734,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/builder_test.cc",
"chars": 7149,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/builtin_pb/sentencepiece.pb.cc",
"chars": 35154,
"preview": "// Generated by the protocol buffer compiler. DO NOT EDIT!\n// source: sentencepiece.proto\n\n#include \"sentencepiece.pb.h"
},
{
"path": "src/builtin_pb/sentencepiece.pb.h",
"chars": 40648,
"preview": "// Generated by the protocol buffer compiler. DO NOT EDIT!\n// source: sentencepiece.proto\n\n#ifndef GOOGLE_PROTOBUF_INCL"
},
{
"path": "src/builtin_pb/sentencepiece_model.pb.cc",
"chars": 138754,
"preview": "// Generated by the protocol buffer compiler. DO NOT EDIT!\n// source: sentencepiece_model.proto\n\n#include \"sentencepiec"
},
{
"path": "src/builtin_pb/sentencepiece_model.pb.h",
"chars": 204921,
"preview": "// Generated by the protocol buffer compiler. DO NOT EDIT!\n// source: sentencepiece_model.proto\n\n#ifndef GOOGLE_PROTOBU"
},
{
"path": "src/char_model.cc",
"chars": 1304,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/char_model.h",
"chars": 1061,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/char_model_test.cc",
"chars": 3523,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/char_model_trainer.cc",
"chars": 1784,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/char_model_trainer.h",
"chars": 1265,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/char_model_trainer_test.cc",
"chars": 2482,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/common.h",
"chars": 1757,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/compile_charsmap_main.cc",
"chars": 6757,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/error.cc",
"chars": 3252,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/filesystem.cc",
"chars": 3587,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/filesystem.h",
"chars": 1852,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/filesystem_test.cc",
"chars": 1553,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/freelist.h",
"chars": 2605,
"preview": "// Copyright 2018 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/freelist_test.cc",
"chars": 1280,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/init.cc",
"chars": 2162,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/init.h",
"chars": 1001,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/init_test.cc",
"chars": 4041,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/model_factory.cc",
"chars": 1596,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/model_factory.h",
"chars": 972,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/model_factory_test.cc",
"chars": 1743,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/model_interface.cc",
"chars": 7123,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/model_interface.h",
"chars": 8751,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/model_interface_test.cc",
"chars": 15351,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/normalization_rule.h",
"chars": 7274179,
"preview": "#ifndef NORMALIZATION_RULE_H_\n#define NORMALIZATION_RULE_H_\n#include <cstdio>\nnamespace sentencepiece {\nnamespace {\n\nstr"
},
{
"path": "src/normalizer.cc",
"chars": 12112,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/normalizer.h",
"chars": 5775,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/normalizer_test.cc",
"chars": 17416,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/pretokenizer_for_training.cc",
"chars": 2132,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/pretokenizer_for_training.h",
"chars": 2136,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/pretokenizer_for_training_test.cc",
"chars": 2836,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/sentencepiece.proto",
"chars": 2708,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/sentencepiece_model.proto",
"chars": 14023,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/sentencepiece_processor.cc",
"chars": 39526,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/sentencepiece_processor.h",
"chars": 28523,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/sentencepiece_processor_test.cc",
"chars": 53908,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/sentencepiece_trainer.cc",
"chars": 14823,
"preview": "// Copyright 2018 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/sentencepiece_trainer.h",
"chars": 8559,
"preview": "// Copyright 2018 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/sentencepiece_trainer_test.cc",
"chars": 16718,
"preview": "// Copyright 2018 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/spec_parser.h",
"chars": 11012,
"preview": "// Copyright 2016 Google LLC.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/spm_decode_main.cc",
"chars": 4042,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/spm_encode_main.cc",
"chars": 6724,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/spm_export_vocab_main.cc",
"chars": 2083,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/spm_normalize_main.cc",
"chars": 4220,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/spm_train_main.cc",
"chars": 13282,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/test_main.cc",
"chars": 1077,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/testharness.cc",
"chars": 1875,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/testharness.h",
"chars": 8635,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/trainer_factory.cc",
"chars": 2090,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/trainer_factory.h",
"chars": 1104,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/trainer_factory_test.cc",
"chars": 1656,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/trainer_interface.cc",
"chars": 29150,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/trainer_interface.h",
"chars": 5731,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/trainer_interface_test.cc",
"chars": 20579,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/unicode_script.cc",
"chars": 1239,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/unicode_script.h",
"chars": 2817,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/unicode_script_map.h",
"chars": 106446,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/unicode_script_test.cc",
"chars": 1515,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/unigram_model.cc",
"chars": 34073,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/unigram_model.h",
"chars": 7330,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/unigram_model_test.cc",
"chars": 32227,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/unigram_model_trainer.cc",
"chars": 22055,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/unigram_model_trainer.h",
"chars": 3945,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/unigram_model_trainer_test.cc",
"chars": 6467,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/util.cc",
"chars": 7753,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/util.h",
"chars": 12167,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/util_test.cc",
"chars": 13038,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/word_model.cc",
"chars": 1124,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/word_model.h",
"chars": 1045,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/word_model_test.cc",
"chars": 2639,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/word_model_trainer.cc",
"chars": 2105,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/word_model_trainer.h",
"chars": 1372,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "src/word_model_trainer_test.cc",
"chars": 2440,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "third_party/CMakeLists.txt",
"chars": 69,
"preview": "include_directories(absl/strings darts_clone esaxx protobuf-lite)\n\n\n\n"
},
{
"path": "third_party/absl/LICENSE",
"chars": 11365,
"preview": "\n Apache License\n Version 2.0, January 2004\n "
},
{
"path": "third_party/absl/container/btree_set.h",
"chars": 890,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "third_party/absl/container/flat_hash_map.h",
"chars": 1001,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "third_party/absl/container/flat_hash_set.h",
"chars": 966,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "third_party/absl/flags/flag.cc",
"chars": 6195,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "third_party/absl/flags/flag.h",
"chars": 1691,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "third_party/absl/flags/parse.h",
"chars": 799,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "third_party/absl/flags/usage.h",
"chars": 836,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "third_party/absl/flags/usage_config.h",
"chars": 915,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "third_party/absl/log/check.h",
"chars": 1786,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "third_party/absl/log/globals.h",
"chars": 831,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "third_party/absl/log/log.cc",
"chars": 951,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "third_party/absl/log/log.h",
"chars": 2066,
"preview": "// Copyright 2016 Google Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use t"
},
{
"path": "third_party/absl/strings/ascii.h",
"chars": 1309,
"preview": "//\n// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you ma"
},
{
"path": "third_party/absl/strings/match.h",
"chars": 1308,
"preview": "//\n// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you ma"
},
{
"path": "third_party/absl/strings/numbers.h",
"chars": 1012,
"preview": "//\n// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you ma"
},
{
"path": "third_party/absl/strings/str_cat.h",
"chars": 1447,
"preview": "//\n// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you ma"
},
{
"path": "third_party/absl/strings/str_format.h",
"chars": 1088,
"preview": "//\n// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you ma"
},
{
"path": "third_party/absl/strings/str_join.h",
"chars": 2413,
"preview": "//\n// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you ma"
},
{
"path": "third_party/absl/strings/str_replace.h",
"chars": 2127,
"preview": "//\n// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you ma"
},
{
"path": "third_party/absl/strings/str_split.h",
"chars": 2669,
"preview": "//\n// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you ma"
},
{
"path": "third_party/absl/strings/string_view.h",
"chars": 2106,
"preview": "//\n// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you ma"
},
{
"path": "third_party/absl/strings/strip.h",
"chars": 991,
"preview": "//\n// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you ma"
},
{
"path": "third_party/darts_clone/LICENSE",
"chars": 1481,
"preview": "Copyright (c) 2008-2011, Susumu Yata\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or wi"
},
{
"path": "third_party/darts_clone/darts.h",
"chars": 51812,
"preview": "#ifndef DARTS_H_\n#define DARTS_H_\n\n#include <cstdio>\n#include <exception>\n#include <new>\n\n#define DARTS_VERSION \"0.32\"\n\n"
},
{
"path": "third_party/esaxx/LICENSE",
"chars": 1112,
"preview": "This is the esaxx copyright.\n\nCopyright (c) 2010 Daisuke Okanohara All Rights Reserved.\n\nPermission is hereby granted, f"
},
{
"path": "third_party/esaxx/esa.hxx",
"chars": 4224,
"preview": "/*\n * esa.hxx\n * Copyright (c) 2010 Daisuke Okanohara All Rights Reserved.\n *\n * Permission is hereby granted, free of c"
},
{
"path": "third_party/esaxx/sais.hxx",
"chars": 12385,
"preview": "/*\n * sais.hxx for sais-lite\n * Copyright (c) 2008-2009 Yuta Mori All Rights Reserved.\n *\n * Permission is hereby grante"
},
{
"path": "third_party/protobuf-lite/LICENSE",
"chars": 1732,
"preview": "Copyright 2008 Google Inc. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmod"
},
{
"path": "third_party/protobuf-lite/arena.cc",
"chars": 15726,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/arenastring.cc",
"chars": 8985,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/bytestream.cc",
"chars": 5975,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/coded_stream.cc",
"chars": 30847,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/common.cc",
"chars": 10999,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/extension_set.cc",
"chars": 82338,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/generated_enum_util.cc",
"chars": 3573,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/generated_message_table_driven_lite.cc",
"chars": 4441,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/generated_message_util.cc",
"chars": 30000,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/google/protobuf/any.h",
"chars": 6215,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/google/protobuf/arena.h",
"chars": 29756,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/google/protobuf/arena_impl.h",
"chars": 18092,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/google/protobuf/arenastring.h",
"chars": 15918,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/google/protobuf/descriptor.h",
"chars": 96637,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/google/protobuf/extension_set.h",
"chars": 78585,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/google/protobuf/extension_set_inl.h",
"chars": 12437,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/google/protobuf/generated_enum_reflection.h",
"chars": 3993,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/google/protobuf/generated_enum_util.h",
"chars": 3266,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
},
{
"path": "third_party/protobuf-lite/google/protobuf/generated_message_table_driven.h",
"chars": 12532,
"preview": "// Protocol Buffers - Google's data interchange format\n// Copyright 2008 Google Inc. All rights reserved.\n// https://de"
}
]
// ... and 61 more files (download for full content)
About this extraction
This page contains the full source code of the google/sentencepiece GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 261 files (42.0 MB), approximately 11.0M tokens, and a symbol index with 2706 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.